Skip to content

Commit e82f16f

Browse files
committed
reverts change that introduced new, unused fields
1 parent e22685f commit e82f16f

File tree

1 file changed

+0
-14
lines changed

1 file changed

+0
-14
lines changed

stanza/models/coref/dataset.py

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,6 @@ def __init__(self, path, config, tokenizer):
3838
word2subword = []
3939
subwords = []
4040
word_id = []
41-
nonblank_subwords = [] # a list of subwords, skipping _
42-
previous_was_blank = [] # was the word before _?
43-
was_blank = False # a flag to set if we saw "_"
4441
for i, word in enumerate(doc["cased_words"]):
4542
tokenized = self.tokenizer.tokenize(word)
4643
if len(tokenized) == 0:
@@ -53,17 +50,6 @@ def __init__(self, path, config, tokenizer):
5350
word2subword.append((len(subwords), len(subwords) + len(tokenized_word)))
5451
subwords.extend(tokenized_word)
5552
word_id.extend([i] * len(tokenized_word))
56-
if word == "_":
57-
was_blank = True
58-
else:
59-
nonblank_subwords.extend(tokenized_word)
60-
previous_was_blank.extend(
61-
[True if was_blank else False]+[False]*(len(tokenized_word)-1)
62-
)
63-
was_blank = False
64-
65-
doc["nonblank_subwords"] = nonblank_subwords
66-
doc["blank_prefix"] = previous_was_blank
6753

6854
doc["word2subword"] = word2subword
6955
doc["subwords"] = subwords

0 commit comments

Comments
 (0)