Skip to content

Commit 6f3a3f3

Browse files
author
Marcin Kardas
committed
Filter out stop words
Remove stop words from automatically generated evidences.
1 parent a6bfbed commit 6f3a3f3

File tree

2 files changed

+9
-1
lines changed

2 files changed

+9
-1
lines changed

sota_extractor2/models/linking/context_search.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def __init__(self, taxonomy):
3131
@staticmethod
3232
def evidences_from_name(key):
3333
x = normalize_dataset_ws(key)
34-
y = x.split()
34+
y = [w for w in x.split() if w not in manual_dicts.stop_words]
3535
return [x] + y if len(y) > 1 else [x]
3636

3737
@staticmethod

sota_extractor2/models/linking/manual_dicts.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,3 +172,11 @@
172172
'Top-1 Accuracy': 'Top-1 Error Rate',
173173
'Top-5 Accuracy': 'Top-5 Error',
174174
}
175+
176+
stop_words = {
177+
"a", "an", "and", "are", "as", "at", "be", "but", "by",
178+
"for", "if", "in", "into", "is", "it",
179+
"no", "not", "of", "on", "or", "such",
180+
"that", "the", "their", "then", "there", "these",
181+
"they", "this", "to", "was", "will", "with"
182+
}

0 commit comments

Comments
 (0)