@@ -108,55 +108,48 @@ export class TfIdfSearchIndex implements ISearchIndex {
108108 documents . push ( uidToDocumentMap [ uid ] ) ;
109109 }
110110
111- // Return documents sorted by TF-IDF
112- return documents . sort ( ( documentA , documentB ) =>
113- this . _calculateTfIdf ( tokens , documentB , corpus ) -
114- this . _calculateTfIdf ( tokens , documentA , corpus )
115- ) ;
116- }
111+ var tokenMap = this . _tokenMap ;
112+ var tokenToIdfCache = this . _tokenToIdfCache ;
113+ var uidFieldName = this . _uidFieldName ;
117114
118- /**
119- * Calculate the inverse document frequency of a search token. This calculation diminishes the weight of tokens that
120- * occur very frequently in the set of searchable documents and increases the weight of terms that occur rarely.
121- */
122- _calculateIdf ( token : string , documents : Array < Object > ) : number {
123- if ( ! this . _tokenToIdfCache [ token ] ) {
124- var numDocumentsWithToken :number = this . _tokenMap [ token ] && this . _tokenMap [ token ] . $numDocumentOccurrences || 0 ;
115+ function calculateIdf ( token : string , documents : Array < Object > ) : number {
116+ if ( ! tokenToIdfCache [ token ] ) {
117+ var numDocumentsWithToken :number = tokenMap [ token ] && tokenMap [ token ] . $numDocumentOccurrences || 0 ;
125118
126- this . _tokenToIdfCache [ token ] = 1 + Math . log ( documents . length / ( 1 + numDocumentsWithToken ) ) ;
119+ tokenToIdfCache [ token ] = 1 + Math . log ( documents . length / ( 1 + numDocumentsWithToken ) ) ;
120+ }
121+
122+ return tokenToIdfCache [ token ] ;
127123 }
128124
129- return this . _tokenToIdfCache [ token ] ;
130- }
125+ function calculateTfIdf ( tokens : Array < string > , document : Object , documents : Array < Object > ) : number {
126+ var score : number = 0 ;
131127
132- /**
133- * Calculate the term frequency–inverse document frequency (TF-IDF) ranking for a set of search tokens and a
134- * document. The TF-IDF is a numeric statistic intended to reflect how important a word (or words) are to a document
135- * in a corpus. The TF-IDF value increases proportionally to the number of times a word appears in the document but
136- * is offset by the frequency of the word in the corpus. This helps to adjust for the fact that some words appear
137- * more frequently in general (e.g. a, and, the).
138- */
139- _calculateTfIdf ( tokens : Array < string > , document : Object , documents : Array < Object > ) : number {
140- var score :number = 0 ;
128+ for ( var i = 0 , numTokens = tokens . length ; i < numTokens ; ++ i ) {
129+ var token :string = tokens [ i ] ;
141130
142- for ( var i = 0 , numTokens = tokens . length ; i < numTokens ; ++ i ) {
143- var token :string = tokens [ i ] ;
131+ var inverseDocumentFrequency :number = calculateIdf ( token , documents ) ;
144132
145- var inverseDocumentFrequency :number = this . _calculateIdf ( token , documents ) ;
133+ if ( inverseDocumentFrequency === Infinity ) {
134+ inverseDocumentFrequency = 0 ;
135+ }
146136
147- if ( inverseDocumentFrequency === Infinity ) {
148- inverseDocumentFrequency = 0 ;
149- }
137+ var uid :any = document && document [ uidFieldName ] ;
138+ var termFrequency :number =
139+ tokenMap [ token ] &&
140+ tokenMap [ token ] . $uidMap [ uid ] &&
141+ tokenMap [ token ] . $uidMap [ uid ] . $numTokenOccurrences || 0 ;
150142
151- var uid :any = document && document [ this . _uidFieldName ] ;
152- var termFrequency :number =
153- this . _tokenMap [ token ] &&
154- this . _tokenMap [ token ] . $uidMap [ uid ] &&
155- this . _tokenMap [ token ] . $uidMap [ uid ] . $numTokenOccurrences || 0 ;
143+ score += termFrequency * inverseDocumentFrequency ;
144+ }
156145
157- score += termFrequency * inverseDocumentFrequency ;
146+ return score ;
158147 }
159148
160- return score ;
149+ // Return documents sorted by TF-IDF
150+ return documents . sort ( ( documentA , documentB ) =>
151+ calculateTfIdf ( tokens , documentB , corpus ) -
152+ calculateTfIdf ( tokens , documentA , corpus )
153+ ) ;
161154 }
162155} ;
0 commit comments