Skip to content

Commit a66ef95

Browse files
scheglovCommit Queue
authored andcommitted
Fine. Deduplicate manifest IDs in binary format
Introduce a manifest-ID table to deduplicate `ManifestItemId`s during serialization and shrink stored artifacts. IDs are now written once per blob and referenced by varint indices, reducing bundle size with a small write-time overhead. Key changes: - Add `ManifestIdTableBuilder` and encode IDs via table indices. - Add `BinaryWriter.writeManifestItemId` / `BinaryReader.readManifestItemId`; redirect `ManifestItemId.write/read` to these helpers. - Replace `StringIndexer` with `StringTableBuilder` (same behavior, new name). - Introduce a unified trailer: - Layout: `<payload><manifest_id_table><string_table><u32 idOff><u32 strOff>` - New APIs: `BinaryWriter.writeTableTrailer()` and `BinaryReader.initFromTableTrailer()`. - Update all call sites that write/read analyzer bundles (e.g. `LinkedBundleProvider`, `LibraryDiagnosticsBundle`, manifest/requirements serializers) to the new trailer. - Keep summary2 bundle format unchanged; it still uses its own four-u32 footer. Calls there switch to `initStringTableAt(stringsOffset)` only. - Simplify `ManifestItemId.hashCode` for faster lookups. - Propagate tables across `BinaryReader.fork()` to avoid reinitialization. - Bump data format: `AnalysisDriver.DATA_VERSION` 561 → 562. Impact (bundleProvider.put): - Size: 42,694,174 → 33,591,531 bytes (−9,102,643; −21.32%). - Write time: 179.650 ms → 201.271 ms (+21.621 ms; +12.04%). Motivation: Consolidating manifest IDs removes repeated 64-bit values from payloads, cutting I/O and storage while keeping deserialization simple and fast. Change-Id: I886a8d5a745f81a94c7bca126193cb1460d99fae Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/451321 Commit-Queue: Konstantin Shcheglov <[email protected]> Reviewed-by: Johnni Winther <[email protected]>
1 parent b01bf92 commit a66ef95

File tree

11 files changed

+221
-142
lines changed

11 files changed

+221
-142
lines changed

pkg/analyzer/lib/src/binary/binary_reader.dart

Lines changed: 53 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -8,53 +8,60 @@ import 'dart:typed_data';
88
import 'package:_fe_analyzer_shared/src/scanner/string_canonicalizer.dart';
99
import 'package:analyzer/src/binary/binary_writer.dart';
1010
import 'package:analyzer/src/binary/string_table.dart';
11+
import 'package:analyzer/src/fine/manifest_id.dart';
1112
import 'package:analyzer/src/utilities/uri_cache.dart';
1213

1314
/// Reader for binary formats.
1415
class BinaryReader {
1516
final Uint8List bytes;
1617
int offset = 0;
1718

18-
late final StringTable _stringTable;
19-
2019
final Int64List _int64Buffer = Int64List(1);
2120
late final Uint8List _int64BufferUint8 = _int64Buffer.buffer.asUint8List();
2221

2322
final Float64List _doubleBuffer = Float64List(1);
2423
late final Uint8List _doubleBufferUint8 = _doubleBuffer.buffer.asUint8List();
2524

25+
List<ManifestItemId>? _manifestIdTable;
26+
StringTable? _stringTable;
27+
2628
BinaryReader(this.bytes);
2729

2830
/// Create a new instance with the given [offset].
2931
/// It shares the same bytes and string reader.
3032
BinaryReader fork(int offset) {
3133
var result = BinaryReader(bytes);
3234
result.offset = offset;
35+
result._manifestIdTable = _manifestIdTable;
3336
result._stringTable = _stringTable;
3437
return result;
3538
}
3639

37-
void initializeStringTableAtOffset(int offset) {
38-
_stringTable = StringTable(bytes: bytes, startOffset: offset);
39-
}
40-
41-
/// Initializes the string table by reading its offset from the end of the
42-
/// buffer.
40+
/// Initializes the manifest-ID and string tables by reading their start
41+
/// offsets from two `uint32` values trailer at the end of the buffer.
42+
/// The reader's current offset is preserved.
4343
///
44-
/// This is the counterpart to [BinaryWriter.writeStringTableAtEnd]. That
45-
/// method writes the string table data, and then appends a `uint32` offset
46-
/// pointing to the beginning of that data. This method reads that final
47-
/// offset to initialize the reader's string table, preserving the reader's
48-
/// current position.
49-
void initializeStringTableFromEnd() {
50-
var savedOffset = offset;
51-
try {
52-
offset = bytes.length - 4 * 1;
44+
/// Layout (BOF -> EOF):
45+
/// ```text
46+
/// <payload>
47+
/// <manifest_id_table>
48+
/// <string_table>
49+
/// <manifestIdTableOffset:u32>
50+
/// <stringTableOffset:u32>
51+
/// ```
52+
///
53+
/// This is the counterpart to [BinaryWriter.writeTableTrailer].
54+
void initFromTableTrailer() {
55+
runAtOffset(bytes.length - 4 * 2, () {
56+
var manifestIdTableOffset = readUint32();
5357
var stringTableOffset = readUint32();
54-
initializeStringTableAtOffset(stringTableOffset);
55-
} finally {
56-
offset = savedOffset;
57-
}
58+
_initManifestIdTableAt(manifestIdTableOffset);
59+
initStringTableAt(stringTableOffset);
60+
});
61+
}
62+
63+
void initStringTableAt(int offset) {
64+
_stringTable = StringTable(bytes: bytes, startOffset: offset);
5865
}
5966

6067
@pragma("vm:prefer-inline")
@@ -96,6 +103,15 @@ class BinaryReader {
96103
return _int64Buffer[0];
97104
}
98105

106+
ManifestItemId readManifestItemId() {
107+
var table = _manifestIdTable;
108+
if (table == null) {
109+
throw StateError('Manifest ID table not initialized.');
110+
}
111+
var index = readUint30();
112+
return table[index];
113+
}
114+
99115
Map<K, V> readMap<K, V>({
100116
required K Function() readKey,
101117
required V Function() readValue,
@@ -274,11 +290,24 @@ class BinaryReader {
274290
void runAtOffset(int offset, void Function() operation) {
275291
var oldOffset = this.offset;
276292
this.offset = offset;
277-
operation();
278-
this.offset = oldOffset;
293+
try {
294+
operation();
295+
} finally {
296+
this.offset = oldOffset;
297+
}
279298
}
280299

281300
String stringOfIndex(int index) {
282-
return _stringTable[index];
301+
var table = _stringTable;
302+
if (table == null) {
303+
throw StateError('String table not initialized.');
304+
}
305+
return table[index];
306+
}
307+
308+
void _initManifestIdTableAt(int offset) {
309+
runAtOffset(offset, () {
310+
_manifestIdTable = ManifestIdTableBuilder.readTable(this);
311+
});
283312
}
284313
}

pkg/analyzer/lib/src/binary/binary_writer.dart

Lines changed: 42 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import 'dart:typed_data';
77

88
import 'package:analyzer/src/binary/binary_reader.dart';
99
import 'package:analyzer/src/binary/string_table.dart';
10+
import 'package:analyzer/src/fine/manifest_id.dart';
1011

1112
/// Buffered writer for binary formats.
1213
class BinaryWriter {
@@ -24,17 +25,23 @@ class BinaryWriter {
2425
final Float64List _doubleBuffer = Float64List(1);
2526
late final Uint8List _doubleBufferUint8 = _doubleBuffer.buffer.asUint8List();
2627

27-
final StringIndexer _stringIndexer;
28+
final ManifestIdTableBuilder _manifestIdTableBuilder;
29+
final StringTableBuilder _stringTableBuilder;
2830

29-
BinaryWriter() : this.withStringIndexer(stringIndexer: StringIndexer());
30-
31-
BinaryWriter.withStringIndexer({required StringIndexer stringIndexer})
32-
: _stringIndexer = stringIndexer;
31+
BinaryWriter({
32+
StringTableBuilder? stringTableBuilder,
33+
ManifestIdTableBuilder? manifestIdTableBuilder,
34+
}) : _stringTableBuilder = stringTableBuilder ?? StringTableBuilder(),
35+
_manifestIdTableBuilder =
36+
manifestIdTableBuilder ?? ManifestIdTableBuilder();
3337

3438
int get offset => _builder.length + _length;
3539

3640
BinaryWriter clone() {
37-
return BinaryWriter.withStringIndexer(stringIndexer: _stringIndexer);
41+
return BinaryWriter(
42+
stringTableBuilder: _stringTableBuilder,
43+
manifestIdTableBuilder: _manifestIdTableBuilder,
44+
);
3845
}
3946

4047
Uint8List takeBytes() {
@@ -144,6 +151,11 @@ class BinaryWriter {
144151
}
145152
}
146153

154+
void writeManifestItemId(ManifestItemId id) {
155+
var index = _manifestIdTableBuilder[id];
156+
writeUint30(index);
157+
}
158+
147159
void writeMap<K, V>(
148160
Map<K, V> map, {
149161
required void Function(K key) writeKey,
@@ -224,25 +236,10 @@ class BinaryWriter {
224236
}
225237

226238
void writeStringReference(String value) {
227-
var index = _stringIndexer[value];
239+
var index = _stringTableBuilder[value];
228240
writeUint30(index);
229241
}
230242

231-
/// Writes the string table and its starting offset.
232-
///
233-
/// This method writes the collected string data first, then writes the
234-
/// `uint32` offset where that data begins.
235-
///
236-
/// It must be called after all other data has been written and immediately
237-
/// before [takeBytes]. This ensures the last 4 bytes reliably point to
238-
/// the string table, which a reader will use to decode string references.
239-
///
240-
/// This is the counterpart to [BinaryReader.initializeStringTableFromEnd].
241-
void writeStringTableAtEnd() {
242-
var offset = _stringIndexer.write(this);
243-
writeUint32(offset);
244-
}
245-
246243
/// Write the [value] as UTF8 encoded byte array.
247244
void writeStringUtf8(String value) {
248245
var bytes = const Utf8Encoder().convert(value);
@@ -256,6 +253,29 @@ class BinaryWriter {
256253
}
257254
}
258255

256+
/// Writes the table trailer, recording the start offsets of the manifest-ID
257+
/// table and the string table as two `uint32` values.
258+
///
259+
/// Layout (BOF -> EOF):
260+
///
261+
/// ```text
262+
/// <payload>
263+
/// <manifest_id_table>
264+
/// <string_table>
265+
/// <manifestIdTableOffset:u32>
266+
/// <stringTableOffset:u32>
267+
/// ```
268+
///
269+
/// Call this after writing all other data, immediately before [takeBytes].
270+
///
271+
/// This is the counterpart to [BinaryReader.initFromTableTrailer].
272+
void writeTableTrailer() {
273+
var manifestIdTableOffset = _manifestIdTableBuilder.write(this);
274+
var stringTableOffset = _stringTableBuilder.write(this);
275+
writeUint32(manifestIdTableOffset);
276+
writeUint32(stringTableOffset);
277+
}
278+
259279
@pragma("vm:prefer-inline")
260280
void writeUint30(int value) {
261281
assert(value >= 0 && value >> 30 == 0);

pkg/analyzer/lib/src/binary/string_table.dart

Lines changed: 74 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -7,80 +7,6 @@ import 'dart:typed_data';
77
import 'package:_fe_analyzer_shared/src/scanner/string_canonicalizer.dart';
88
import 'package:analyzer/src/binary/binary_writer.dart';
99

10-
class StringIndexer {
11-
final Map<String, int> _index = {};
12-
13-
int operator [](String string) {
14-
var result = _index[string];
15-
16-
if (result == null) {
17-
result = _index.length;
18-
_index[string] = result;
19-
}
20-
21-
return result;
22-
}
23-
24-
int write(BinaryWriter writer) {
25-
var bytesOffset = writer.offset;
26-
27-
var length = _index.length;
28-
var lengths = Uint32List(length);
29-
var lengthsIndex = 0;
30-
for (var key in _index.keys) {
31-
var stringStart = writer.offset;
32-
_writeWtf8(writer, key);
33-
lengths[lengthsIndex++] = writer.offset - stringStart;
34-
}
35-
36-
var resultOffset = writer.offset;
37-
38-
var lengthOfBytes = writer.offset - bytesOffset;
39-
writer.writeUint30(lengthOfBytes);
40-
writer.writeUint30List(lengths);
41-
42-
return resultOffset;
43-
}
44-
45-
/// Write [source] string into [writer].
46-
static void _writeWtf8(BinaryWriter writer, String source) {
47-
var end = source.length;
48-
if (end == 0) {
49-
return;
50-
}
51-
52-
int i = 0;
53-
do {
54-
var codeUnit = source.codeUnitAt(i++);
55-
if (codeUnit < 128) {
56-
// ASCII.
57-
writer.writeByte(codeUnit);
58-
} else if (codeUnit < 0x800) {
59-
// Two-byte sequence (11-bit unicode value).
60-
writer.writeByte(0xC0 | (codeUnit >> 6));
61-
writer.writeByte(0x80 | (codeUnit & 0x3f));
62-
} else if ((codeUnit & 0xFC00) == 0xD800 &&
63-
i < end &&
64-
(source.codeUnitAt(i) & 0xFC00) == 0xDC00) {
65-
// Surrogate pair -> four-byte sequence (non-BMP unicode value).
66-
int codeUnit2 = source.codeUnitAt(i++);
67-
int unicode =
68-
0x10000 + ((codeUnit & 0x3FF) << 10) + (codeUnit2 & 0x3FF);
69-
writer.writeByte(0xF0 | (unicode >> 18));
70-
writer.writeByte(0x80 | ((unicode >> 12) & 0x3F));
71-
writer.writeByte(0x80 | ((unicode >> 6) & 0x3F));
72-
writer.writeByte(0x80 | (unicode & 0x3F));
73-
} else {
74-
// Three-byte sequence (16-bit unicode value), including lone
75-
// surrogates.
76-
writer.writeByte(0xE0 | (codeUnit >> 12));
77-
writer.writeByte(0x80 | ((codeUnit >> 6) & 0x3f));
78-
writer.writeByte(0x80 | (codeUnit & 0x3f));
79-
}
80-
} while (i < end);
81-
}
82-
}
83-
8410
class StringTable {
8511
final Uint8List _bytes;
8612
int _byteOffset;
@@ -199,3 +125,77 @@ class StringTable {
199125
return String.fromCharCodes(charCodes, 0, j);
200126
}
201127
}
128+
129+
class StringTableBuilder {
130+
final Map<String, int> _index = {};
131+
132+
int operator [](String string) {
133+
var result = _index[string];
134+
135+
if (result == null) {
136+
result = _index.length;
137+
_index[string] = result;
138+
}
139+
140+
return result;
141+
}
142+
143+
int write(BinaryWriter writer) {
144+
var bytesOffset = writer.offset;
145+
146+
var length = _index.length;
147+
var lengths = Uint32List(length);
148+
var lengthsIndex = 0;
149+
for (var key in _index.keys) {
150+
var stringStart = writer.offset;
151+
_writeWtf8(writer, key);
152+
lengths[lengthsIndex++] = writer.offset - stringStart;
153+
}
154+
155+
var resultOffset = writer.offset;
156+
157+
var lengthOfBytes = writer.offset - bytesOffset;
158+
writer.writeUint30(lengthOfBytes);
159+
writer.writeUint30List(lengths);
160+
161+
return resultOffset;
162+
}
163+
164+
/// Write [source] string into [writer].
165+
static void _writeWtf8(BinaryWriter writer, String source) {
166+
var end = source.length;
167+
if (end == 0) {
168+
return;
169+
}
170+
171+
int i = 0;
172+
do {
173+
var codeUnit = source.codeUnitAt(i++);
174+
if (codeUnit < 128) {
175+
// ASCII.
176+
writer.writeByte(codeUnit);
177+
} else if (codeUnit < 0x800) {
178+
// Two-byte sequence (11-bit unicode value).
179+
writer.writeByte(0xC0 | (codeUnit >> 6));
180+
writer.writeByte(0x80 | (codeUnit & 0x3f));
181+
} else if ((codeUnit & 0xFC00) == 0xD800 &&
182+
i < end &&
183+
(source.codeUnitAt(i) & 0xFC00) == 0xDC00) {
184+
// Surrogate pair -> four-byte sequence (non-BMP unicode value).
185+
int codeUnit2 = source.codeUnitAt(i++);
186+
int unicode =
187+
0x10000 + ((codeUnit & 0x3FF) << 10) + (codeUnit2 & 0x3FF);
188+
writer.writeByte(0xF0 | (unicode >> 18));
189+
writer.writeByte(0x80 | ((unicode >> 12) & 0x3F));
190+
writer.writeByte(0x80 | ((unicode >> 6) & 0x3F));
191+
writer.writeByte(0x80 | (unicode & 0x3F));
192+
} else {
193+
// Three-byte sequence (16-bit unicode value), including lone
194+
// surrogates.
195+
writer.writeByte(0xE0 | (codeUnit >> 12));
196+
writer.writeByte(0x80 | ((codeUnit >> 6) & 0x3f));
197+
writer.writeByte(0x80 | (codeUnit & 0x3f));
198+
}
199+
} while (i < end);
200+
}
201+
}

0 commit comments

Comments
 (0)