Skip to content

Commit 1fbd8a1

Browse files
committed
Reduce memory usage during index construction.
1 parent 7abfe99 commit 1fbd8a1

File tree

2 files changed

+43
-39
lines changed

2 files changed

+43
-39
lines changed

app/lib/search/mem_index.dart

Lines changed: 12 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ import 'package:collection/collection.dart';
1010
import 'package:logging/logging.dart';
1111
import 'package:meta/meta.dart';
1212
import 'package:pub_dev/search/heap.dart';
13-
import 'package:pub_dev/service/topics/models.dart';
1413
import 'package:pub_dev/third_party/bit_array/bit_array.dart';
1514

1615
import 'models.dart';
@@ -23,7 +22,6 @@ final _textSearchTimeout = Duration(milliseconds: 500);
2322

2423
class InMemoryPackageIndex {
2524
final List<PackageDocument> _documents;
26-
final _documentsByName = <String, PackageDocument>{};
2725
final _nameToIndex = <String, int>{};
2826
late final PackageNameIndex _packageNameIndex;
2927
late final TokenIndex<String> _descrIndex;
@@ -47,23 +45,15 @@ class InMemoryPackageIndex {
4745
late final List<IndexedPackageHit> _pointsOrderedHits;
4846
late final List<IndexedPackageHit> _trendingOrderedHits;
4947

50-
// Contains all of the topics the index had seen so far.
51-
// TODO: consider moving this into a separate index
52-
// TODO: get the list of topics from the bucket
53-
final _topics = <String>{
54-
...canonicalTopics.aliasToCanonicalMap.values,
55-
};
56-
5748
late final DateTime _lastUpdated;
5849

5950
InMemoryPackageIndex({
6051
required Iterable<PackageDocument> documents,
6152
}) : _documents = [...documents] {
6253
final apiDocPageKeys = <IndexedApiDocPage>[];
63-
final apiDocPageValues = <String>[];
54+
final apiDocPageValues = <List<String>>[];
6455
for (var i = 0; i < _documents.length; i++) {
6556
final doc = _documents[i];
66-
_documentsByName[doc.package] = doc;
6757
_nameToIndex[doc.package] = i;
6858

6959
// transform tags into numberical IDs
@@ -78,16 +68,10 @@ class InMemoryPackageIndex {
7868
for (final page in apiDocPages) {
7969
if (page.symbols != null && page.symbols!.isNotEmpty) {
8070
apiDocPageKeys.add(IndexedApiDocPage(i, page));
81-
apiDocPageValues.add(page.symbols!.join(' '));
71+
apiDocPageValues.add(page.symbols!);
8272
}
8373
}
8474
}
85-
86-
// Note: we are not removing topics from this set, only adding them, no
87-
// need for tracking the current topic count.
88-
_topics.addAll(doc.tags
89-
.where((t) => t.startsWith('topic:'))
90-
.map((t) => t.split('topic:').last));
9175
}
9276

9377
final packageKeys = _documents.map((d) => d.package).toList();
@@ -101,7 +85,7 @@ class InMemoryPackageIndex {
10185
packageKeys,
10286
_documents.map((d) => d.readme).toList(),
10387
);
104-
_apiSymbolIndex = TokenIndex(apiDocPageKeys, apiDocPageValues);
88+
_apiSymbolIndex = TokenIndex.fromValues(apiDocPageKeys, apiDocPageValues);
10589

10690
// update download scores only if they were not set (should happen on old runtime's snapshot and local tests)
10791
if (_documents.any((e) => e.downloadScore == null)) {
@@ -131,7 +115,7 @@ class InMemoryPackageIndex {
131115
IndexInfo indexInfo() {
132116
return IndexInfo(
133117
isReady: true,
134-
packageCount: _documentsByName.length,
118+
packageCount: _documents.length,
135119
lastUpdated: _lastUpdated,
136120
);
137121
}
@@ -363,7 +347,7 @@ class InMemoryPackageIndex {
363347
}
364348

365349
// exact package name
366-
if (_documentsByName.containsKey(parsedQueryText)) {
350+
if (_nameToIndex.containsKey(parsedQueryText)) {
367351
return parsedQueryText;
368352
}
369353

@@ -377,12 +361,9 @@ class InMemoryPackageIndex {
377361
}
378362
// Note: to keep it simple, we select the most downloaded one from competing matches.
379363
return matches.reduce((a, b) {
380-
if (_documentsByName[a]!.downloadCount >
381-
_documentsByName[b]!.downloadCount) {
382-
return a;
383-
} else {
384-
return b;
385-
}
364+
final aDoc = _documents[_nameToIndex[a]!];
365+
final bDoc = _documents[_nameToIndex[b]!];
366+
return aDoc.downloadCount > bDoc.downloadCount ? a : b;
386367
});
387368
}
388369

@@ -487,7 +468,7 @@ class InMemoryPackageIndex {
487468
packageScores.setValueMaxOf(doc.index, value);
488469

489470
// add the page and re-sort the current results
490-
pages.add(MapEntry(doc.page.relativePath, value));
471+
pages.add(MapEntry(doc.relativePath, value));
491472
if (pages.length > 1) {
492473
pages.sort((a, b) => -a.value.compareTo(b.value));
493474
}
@@ -750,7 +731,8 @@ class IndexedPackageHit {
750731

751732
class IndexedApiDocPage {
752733
final int index;
753-
final ApiDocPage page;
734+
final String relativePath;
754735

755-
IndexedApiDocPage(this.index, this.page);
736+
IndexedApiDocPage(this.index, ApiDocPage page)
737+
: relativePath = page.relativePath;
756738
}

app/lib/search/token_index.dart

Lines changed: 31 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -51,21 +51,43 @@ class TokenIndex<K> {
5151
if (text == null) {
5252
continue;
5353
}
54-
final tokens = tokenize(text);
55-
if (tokens == null || tokens.isEmpty) {
54+
_build(i, text, skipDocumentWeight);
55+
}
56+
}
57+
58+
TokenIndex.fromValues(
59+
List<K> ids,
60+
List<List<String>?> values, {
61+
bool skipDocumentWeight = false,
62+
}) : _ids = ids {
63+
assert(ids.length == values.length);
64+
final length = values.length;
65+
for (var i = 0; i < length; i++) {
66+
final parts = values[i];
67+
68+
if (parts == null || parts.isEmpty) {
5669
continue;
5770
}
58-
// Document weight is a highly scaled-down proxy of the length.
59-
final dw =
60-
skipDocumentWeight ? 1.0 : 1 + math.log(1 + tokens.length) / 100;
61-
for (final e in tokens.entries) {
62-
final token = e.key;
63-
final weights = _inverseIds.putIfAbsent(token, () => {});
64-
weights[i] = math.max(weights[i] ?? 0.0, e.value / dw);
71+
for (final text in parts) {
72+
_build(i, text, skipDocumentWeight);
6573
}
6674
}
6775
}
6876

77+
void _build(int i, String text, bool skipDocumentWeight) {
78+
final tokens = tokenize(text);
79+
if (tokens == null || tokens.isEmpty) {
80+
return;
81+
}
82+
// Document weight is a highly scaled-down proxy of the length.
83+
final dw = skipDocumentWeight ? 1.0 : 1 + math.log(1 + tokens.length) / 100;
84+
for (final e in tokens.entries) {
85+
final token = e.key;
86+
final weights = _inverseIds.putIfAbsent(token, () => {});
87+
weights[i] = math.max(weights[i] ?? 0.0, e.value / dw);
88+
}
89+
}
90+
6991
factory TokenIndex.fromMap(Map<K, String> map) {
7092
final keys = map.keys.toList();
7193
final values = map.values.toList();

0 commit comments

Comments
 (0)