Skip to content

Commit ec19ac0

Browse files
authored
Reduce memory use in case-based tokenization. (#8375)
1 parent 86ada2f commit ec19ac0

File tree

1 file changed

+11
-11
lines changed

1 file changed

+11
-11
lines changed

app/lib/search/text_utils.dart

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -102,23 +102,23 @@ Map<String, double>? tokenize(String? originalText, {bool isSplit = false}) {
102102

103103
// Scan for CamelCase phrases and extract Camel and Case separately.
104104
final wordCodeUnits = word.codeUnits;
105-
final changeIndex = <int>[0];
106105
bool prevLower = _isLower(wordCodeUnits[0]);
107-
for (int i = 1; i < word.length; i++) {
108-
final lower = _isLower(wordCodeUnits[i]);
109-
if (!lower && prevLower) {
110-
changeIndex.add(i);
106+
int prevIndex = 0;
107+
for (int i = 1; i <= word.length; i++) {
108+
if (i < word.length) {
109+
final lower = _isLower(wordCodeUnits[i]);
110+
final isChanging = !lower && prevLower;
111+
prevLower = lower;
112+
if (!isChanging) continue;
111113
}
112-
prevLower = lower;
113-
}
114-
changeIndex.add(word.length);
115-
for (int i = 1; i < changeIndex.length; i++) {
116-
final token = normalizeBeforeIndexing(
117-
word.substring(changeIndex[i - 1], changeIndex[i]));
114+
115+
final token = normalizeBeforeIndexing(word.substring(prevIndex, i));
118116
final weight = math.pow((token.length / word.length), 0.5).toDouble();
119117
if ((tokens[token] ?? 0.0) < weight) {
120118
tokens[token] = weight;
121119
}
120+
121+
prevIndex = i;
122122
}
123123
}
124124
return tokens;

0 commit comments

Comments
 (0)