We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 90ce791 commit 53a320eCopy full SHA for 53a320e
nemo_curator/stages/text/filters/fasttext_filter.py
@@ -44,9 +44,9 @@ def score_document(self, text: str) -> float:
44
model = self._fasttext_quality_filter_model
45
46
text = text.replace("\n", " ").replace("__label__", " ")
47
- pred = model.predict(text)
48
- document_score = pred[1][0]
49
- if pred[0][0] != self._label:
+ label, score = model.predict([text])
+ document_score = score[0][0].item()
+ if label[0][0] != self._label:
50
document_score = 1 - document_score
51
52
return document_score
@@ -78,9 +78,9 @@ def score_document(self, text: str) -> list[float | str]:
78
model = self._fasttext_langid_model
79
80
pp = text.strip().replace("\n", " ")
81
- label, score = model.predict(pp, k=1)
82
- score = score[0]
83
- lang_code = label[0][-2:].upper()
+ label, score = model.predict([pp], k=1)
+ score = score[0][0].item()
+ lang_code = label[0][0][-2:].upper()
84
85
# Need to convert it to a string to allow backend conversions
86
return str([score, lang_code])
0 commit comments