From f6467ac9ee80a84e60a6a0abc4e0515ddfba430d Mon Sep 17 00:00:00 2001 From: imLogM Date: Tue, 18 Jun 2019 18:28:22 +0800 Subject: [PATCH] correct IDF formula in "compute_idf.py" The compute of IDF in "compute_idf.py" is wrong, now correct it. --- Utils/compute_idf.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Utils/compute_idf.py b/Utils/compute_idf.py index ec3bb9c..c2cce81 100644 --- a/Utils/compute_idf.py +++ b/Utils/compute_idf.py @@ -29,8 +29,9 @@ def load_corpus(self): def com_idf(self,word): n = 0 for _,line in enumerate(self.corpus_data): - n+=line.count(word) - idf=math.log(1.0*self.N/n+1) + if line.find(word) != -1: + n += 1 + idf=math.log(1.0*self.N/(n+1)) return {word:idf} def parts(self):