@@ -147,15 +147,11 @@ class TokenIndex {
147147 /// Maps token Strings to a weighted documents (addressed via indexes).
148148 final _inverseIds = < String , Map <int , double >> {};
149149
150- /// {id: size} map to store a value representative to the document length
151- late final List <double > _docWeights;
152-
153- late final _length = _docWeights.length;
150+ late final _length = _ids.length;
154151
155152 TokenIndex (List <String > ids, List <String ?> values) : _ids = ids {
156153 assert (ids.length == values.length);
157154 final length = values.length;
158- _docWeights = List <double >.filled (length, 0.0 );
159155 for (var i = 0 ; i < length; i++ ) {
160156 final text = values[i];
161157
@@ -166,12 +162,12 @@ class TokenIndex {
166162 if (tokens == null || tokens.isEmpty) {
167163 continue ;
168164 }
165+ // Document weight is a highly scaled-down proxy of the length.
166+ final dw = 1 + math.log (1 + tokens.length) / 100 ;
169167 for (final token in tokens.keys) {
170168 final weights = _inverseIds.putIfAbsent (token, () => {});
171- weights[i] = math.max (weights[i] ?? 0.0 , tokens[token]! );
169+ weights[i] = math.max (weights[i] ?? 0.0 , tokens[token]! / dw );
172170 }
173- // Document weight is a highly scaled-down proxy of the length.
174- _docWeights[i] = 1 + math.log (1 + tokens.length) / 100 ;
175171 }
176172 }
177173
@@ -215,7 +211,7 @@ class TokenIndex {
215211 /// When [limitToIds] is specified, the result will contain only the set of
216212 /// identifiers in it.
217213 Map <String , double > _scoreDocs (TokenMatch tokenMatch,
218- {double weight = 1.0 , int wordCount = 1 , Set <String >? limitToIds}) {
214+ {double weight = 1.0 , Set <String >? limitToIds}) {
219215 // Summarize the scores for the documents.
220216 final docScores = List <double >.filled (_length, 0.0 );
221217 for (final token in tokenMatch.tokens) {
@@ -226,11 +222,6 @@ class TokenIndex {
226222 }
227223 }
228224
229- // In multi-word queries we will penalize the score with the document size
230- // for each word separately. As these scores will be multiplied, we need to
231- // compensate the formula in order to prevent multiple exponential penalties.
232- final double wordSizeExponent = 1.0 / wordCount;
233-
234225 final result = < String , double > {};
235226 // post-process match weights
236227 for (var i = 0 ; i < _length; i++ ) {
@@ -242,11 +233,7 @@ class TokenIndex {
242233 if (limitToIds != null && ! limitToIds.contains (id)) {
243234 continue ;
244235 }
245- var dw = _docWeights[i];
246- if (wordCount > 1 ) {
247- dw = math.pow (dw, wordSizeExponent).toDouble ();
248- }
249- result[id] = w * weight / dw;
236+ result[id] = w * weight;
250237 }
251238 return result;
252239 }
@@ -255,7 +242,7 @@ class TokenIndex {
255242 /// scoring.
256243 @visibleForTesting
257244 Map <String , double > search (String text) {
258- return _scoreDocs ( lookupTokens (text));
245+ return searchWords ( splitForQuery (text))._values ;
259246 }
260247
261248 /// Search the index for [words] , with a (term-match / document coverage percent)
@@ -271,7 +258,6 @@ class TokenIndex {
271258 final values = _scoreDocs (
272259 tokens,
273260 weight: weight,
274- wordCount: words.length,
275261 limitToIds: limitToIds,
276262 );
277263 if (values.isEmpty) {
0 commit comments