Skip to content

Commit a06cb9c

Browse files
authored
vetor normalizado
1 parent 5524405 commit a06cb9c

File tree

1 file changed

+10
-2
lines changed

1 file changed

+10
-2
lines changed

src/util_doc2vec_facil.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import json
2323
from nltk.stem.snowball import SnowballStemmer
2424
from util_tradutor_termos import TradutorTermos
25+
from numpy import linalg
2526

2627
STEMMER = SnowballStemmer('portuguese')
2728

@@ -467,8 +468,15 @@ def carregar_modelo(self):
467468
def tokens_sentenca(self, sentenca):
468469
return self.tokenizer.tokenizar(sentenca)
469470

470-
def vetor_sentenca(self, sentenca):
471-
return self.model.infer_vector(self.tokens_sentenca(sentenca))
471+
# vetor normalizado
472+
@classmethod
473+
def normalizar(self, v):
474+
return [float(f) for f in v / linalg.norm(v)]
475+
476+
def vetor_sentenca(self, sentenca, normalizado = True):
477+
if normalizado:
478+
return self.normalizar( self.model.infer_vector(self.tokens_sentenca(sentenca)) )
479+
return self.model.infer_vector(self.tokens_sentenca(sentenca))
472480

473481
def similaridade_vetor(self, vetor1,vetor2):
474482
return 1- distance.cosine(vetor1,vetor2)

0 commit comments

Comments
 (0)