2222# Extract tokens for the given doc
2323print ([token .text for token in introduction_doc ])
2424
25-
2625# %% Reading text from a file instead
2726
28-
2927file_name = "introduction.txt"
3028introduction_file_text = pathlib .Path (file_name ).read_text ()
3129introduction_file_doc = nlp (introduction_file_text )
@@ -78,7 +76,6 @@ def set_custom_boundaries(doc):
7876for sentence in custom_ellipsis_sentences :
7977 print (sentence )
8078
81-
8279ellipsis_doc = nlp (ellipsis_text )
8380ellipsis_sentences = list (ellipsis_doc .sents )
8481for sentence in ellipsis_sentences :
@@ -117,7 +114,6 @@ def set_custom_boundaries(doc):
117114
118115print ([token .text for token in nlp (custom_about_text )[8 :15 ]])
119116
120-
121117custom_nlp = spacy .load ("en_core_web_sm" )
122118prefix_re = spacy .util .compile_prefix_regex (custom_nlp .Defaults .prefixes )
123119suffix_re = spacy .util .compile_suffix_regex (custom_nlp .Defaults .suffixes )
@@ -176,10 +172,8 @@ def set_custom_boundaries(doc):
176172 if str (token ) != str (token .lemma_ ):
177173 print (f"{ str (token ):>20} : { str (token .lemma_ ):20} " )
178174
179-
180175# %% Making use of stop words to count words that aren't stop words
181176
182-
183177complete_text = (
184178 "Gus Proto is a Python developer currently"
185179 " working for a London-based Fintech company. He is"
@@ -224,7 +218,6 @@ def set_custom_boundaries(doc):
224218 ).most_common (5 )
225219)
226220
227-
228221# %% Part of speech tagging
229222
230223for token in about_doc [:5 ]:
@@ -255,7 +248,6 @@ def set_custom_boundaries(doc):
255248
256249# Windows server needs to be manually changed to 127.0.0.1
257250
258-
259251about_interest_text = (
260252 "He is interested in learning" " Natural Language Processing."
261253)
@@ -297,7 +289,6 @@ def preprocess_token(token):
297289
298290# %% Rule based matching
299291
300-
301292matcher = Matcher (nlp .vocab )
302293
303294
@@ -314,7 +305,6 @@ def extract_full_name(nlp_doc):
314305
315306# %% Extracting phone numbers from text with patterns
316307
317-
318308matcher = Matcher (nlp .vocab )
319309conference_org_text = (
320310 "There is a developer conference"
@@ -358,7 +348,6 @@ def extract_phone_number(nlp_doc):
358348{ token .dep_ = } """
359349 )
360350
361-
362351displacy .serve (piano_doc , style = "dep" )
363352
364353# %% Navigating the parsed tree and subtree
@@ -409,7 +398,6 @@ def flatten_tree(tree):
409398
410399# %% Verb phrase detection
411400
412-
413401about_talk_text = (
414402 "In this talk, the speaker will introduce the audience to the use"
415403 " cases of Natural Language Processing in Fintech, making use of"
@@ -420,7 +408,6 @@ def flatten_tree(tree):
420408about_talk_doc = textacy .make_spacy_doc (about_talk_text , lang = "en_core_web_sm" )
421409verb_phrases = textacy .extract .token_matches (about_talk_doc , patterns = patterns )
422410
423-
424411# Print all verb phrases
425412
426413for chunk in verb_phrases :
@@ -450,7 +437,6 @@ def flatten_tree(tree):
450437{ spacy .explain (ent .label_ ) = } """
451438 )
452439
453-
454440displacy .serve (piano_class_doc , style = "ent" )
455441# %% Use NER to redact names in document
456442
0 commit comments