NER and NED with spaCy

Named Entity Recognition

import spacynlp = spacy.load("en_core_web_sm")doc = nlp("Apple is looking at buying U.K. startup for $1 billion")for ent in doc.ents:print(ent.text, ent.start_char, ent.end_char, ent.label_)

Accessing entity annotations and labels



import spacynlp = spacy.load("en_core_web_sm")doc = nlp("San Francisco considers banning sidewalk delivery robots")# document levelents = [(e.text, e.start_char, e.end_char, e.label_) for e in doc.ents]print(ents)# token levelent_san = [doc[0].text, doc[0].ent_iob_, doc[0].ent_type_]ent_francisco = [doc[1].text, doc[1].ent_iob_, doc[1].ent_type_]print(ent_san)  # ['San', 'B', 'GPE']print(ent_francisco)  # ['Francisco', 'I', 'GPE']

Setting entity annotations

import spacyfrom spacy.tokens import Spannlp = spacy.load("en_core_web_sm")doc = nlp("fb is hiring a new vice president of global policy")ents = [(e.text, e.start_char, e.end_char, e.label_) for e in doc.ents]print('Before', ents)# The model didn't recognize "fb" as an entity :(# Create a span for the new entityfb_ent = Span(doc, 0, 1, label="ORG")orig_ents = list(doc.ents)# Option 1: Modify the provided entity spans, leaving the rest unmodifieddoc.set_ents([fb_ent], default="unmodified")# Option 2: Assign a complete list of ents to doc.entsdoc.ents = orig_ents + [fb_ent]ents = [(e.text, e.start, e.end, e.label_) for e in doc.ents]print('After', ents)# [('fb', 0, 1, 'ORG')]
fb_ent = doc.char_span(0, 2, label="ORG")

Setting entity annotations from array

Editable CodespaCy v3.0 · Python 3 · via Binderimport numpyimport spacyfrom spacy.attrs import ENT_IOB, ENT_TYPEnlp = spacy.load("en_core_web_sm")doc = nlp.make_doc("London is a big city in the United Kingdom.")print("Before", doc.ents)  # []header = [ENT_IOB, ENT_TYPE]attr_array = numpy.zeros((len(doc), len(header)), dtype="uint64")attr_array[0, 0] = 3  # Battr_array[0, 1] = doc.vocab.strings["GPE"]doc.from_array(header, attr_array)print("After", doc.ents)  # [London]

Visualizing named entities

from spacy import displacy
text = "When Sebastian Thrun started working on self-driving cars at Google in 2007, few people outside of the company took him seriously."nlp = spacy.load("en_core_web_sm")
doc = nlp(text)
displacy.serve(doc, style="ent")

Entity Linking(NED)

Accessing entity identifiers NEEDS MODEL

import spacynlp = spacy.load("my_custom_el_pipeline")
doc = nlp("Ada Lovelace was born in London")
# Document level
ents = [(e.text, e.label_, e.kb_id_) for e in doc.ents]
print(ents) # [('Ada Lovelace', 'PERSON', 'Q7259'), ('London', 'GPE', 'Q84')]
# Token level
ent_ada_0 = [doc[0].text, doc[0].ent_type_, doc[0].ent_kb_id_]
ent_ada_1 = [doc[1].text, doc[1].ent_type_, doc[1].ent_kb_id_]
ent_london_5 = [doc[5].text, doc[5].ent_type_, doc[5].ent_kb_id_]
print(ent_ada_0) # ['Ada', 'PERSON', 'Q7259']
print(ent_ada_1) # ['Lovelace', 'PERSON', 'Q7259']
print(ent_london_5) # ['London', 'GPE', 'Q84']



Data Science student @Flatiron-School

Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store