Skip to content

Commit 92395a1

Browse files
scheglovCommit Queue
authored andcommitted
Elements. Move binary sink / reader out of summaries.
They are used for more than only summaries. I will rename classes, and do other changes in separate CLs. Change-Id: Ia4dec125743a8cf63df11974f8bd42037b1b81da Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/448800 Commit-Queue: Konstantin Shcheglov <[email protected]> Reviewed-by: Samuel Rawlins <[email protected]>
1 parent e0494b7 commit 92395a1

22 files changed

+240
-250
lines changed

pkg/analyzer/lib/src/summary2/data_reader.dart renamed to pkg/analyzer/lib/src/binary/binary_reader.dart

Lines changed: 3 additions & 121 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,14 @@ import 'dart:convert';
66
import 'dart:typed_data';
77

88
import 'package:_fe_analyzer_shared/src/scanner/string_canonicalizer.dart';
9+
import 'package:analyzer/src/binary/string_table.dart';
910

1011
/// Helper for reading primitive types from bytes.
1112
class SummaryDataReader {
1213
final Uint8List bytes;
1314
int offset = 0;
1415

15-
late final _StringTable _stringTable;
16+
late final StringTable _stringTable;
1617

1718
final Int64List _int64Buffer = Int64List(1);
1819
late final Uint8List _int64BufferUint8 = _int64Buffer.buffer.asUint8List();
@@ -23,7 +24,7 @@ class SummaryDataReader {
2324
SummaryDataReader(this.bytes);
2425

2526
void createStringTable(int offset) {
26-
_stringTable = _StringTable(bytes: bytes, startOffset: offset);
27+
_stringTable = StringTable(bytes: bytes, startOffset: offset);
2728
}
2829

2930
/// Create a new instance with the given [offset].
@@ -240,122 +241,3 @@ class SummaryDataReader {
240241
return _stringTable[index];
241242
}
242243
}
243-
244-
class _StringTable {
245-
final Uint8List _bytes;
246-
int _byteOffset;
247-
248-
late final Uint32List _offsets;
249-
late final List<String?> _strings;
250-
251-
/// The structure of the table:
252-
/// - `<bytes with encoded strings>`
253-
/// - `<the length of the bytes> <-- [startOffset]`
254-
/// - `<the number strings>`
255-
/// - `<the array of lengths of individual strings>`
256-
_StringTable({required Uint8List bytes, required int startOffset})
257-
: _bytes = bytes,
258-
_byteOffset = startOffset {
259-
var offset = startOffset - _readUInt30();
260-
var length = _readUInt30();
261-
262-
_offsets = Uint32List(length + 1);
263-
for (var i = 0; i < length; i++) {
264-
var stringLength = _readUInt30();
265-
_offsets[i] = offset;
266-
offset += stringLength;
267-
}
268-
_offsets[length] = offset;
269-
270-
_strings = List.filled(length, null);
271-
}
272-
273-
String operator [](int index) {
274-
var result = _strings[index];
275-
276-
if (result == null) {
277-
int start = _offsets[index];
278-
int end = _offsets[index + 1];
279-
int length = end - start;
280-
result = _readStringEntry(_offsets[index], length);
281-
result = considerCanonicalizeString(result);
282-
_strings[index] = result;
283-
}
284-
285-
return result;
286-
}
287-
288-
int _readByte() {
289-
return _bytes[_byteOffset++];
290-
}
291-
292-
String _readStringEntry(int start, int numBytes) {
293-
var end = start + numBytes;
294-
for (var i = start; i < end; i++) {
295-
if (_bytes[i] > 127) {
296-
return _decodeWtf8(_bytes, start, end);
297-
}
298-
}
299-
return String.fromCharCodes(_bytes, start, end);
300-
}
301-
302-
int _readUInt30() {
303-
var byte = _readByte();
304-
if (byte & 0x80 == 0) {
305-
// 0xxxxxxx
306-
return byte;
307-
} else if (byte & 0x40 == 0) {
308-
// 10xxxxxx
309-
return ((byte & 0x3F) << 8) | _readByte();
310-
} else {
311-
// 11xxxxxx
312-
return ((byte & 0x3F) << 24) |
313-
(_readByte() << 16) |
314-
(_readByte() << 8) |
315-
_readByte();
316-
}
317-
}
318-
319-
static String _decodeWtf8(Uint8List bytes, int start, int end) {
320-
// WTF-8 decoder that trusts its input, meaning that the correctness of
321-
// the code depends on the bytes from start to end being valid and
322-
// complete WTF-8. Instead of masking off the control bits from every
323-
// byte, it simply xor's the byte values together at their appropriate
324-
// bit shifts, and then xor's out all of the control bits at once.
325-
Uint16List charCodes = Uint16List(end - start);
326-
int i = start;
327-
int j = 0;
328-
while (i < end) {
329-
int byte = bytes[i++];
330-
if (byte < 0x80) {
331-
// ASCII.
332-
charCodes[j++] = byte;
333-
} else if (byte < 0xE0) {
334-
// Two-byte sequence (11-bit unicode value).
335-
int byte2 = bytes[i++];
336-
int value = (byte << 6) ^ byte2 ^ 0x3080;
337-
assert(value >= 0x80 && value < 0x800);
338-
charCodes[j++] = value;
339-
} else if (byte < 0xF0) {
340-
// Three-byte sequence (16-bit unicode value).
341-
int byte2 = bytes[i++];
342-
int byte3 = bytes[i++];
343-
int value = (byte << 12) ^ (byte2 << 6) ^ byte3 ^ 0xE2080;
344-
assert(value >= 0x800 && value < 0x10000);
345-
charCodes[j++] = value;
346-
} else {
347-
// Four-byte sequence (non-BMP unicode value).
348-
int byte2 = bytes[i++];
349-
int byte3 = bytes[i++];
350-
int byte4 = bytes[i++];
351-
int value =
352-
(byte << 18) ^ (byte2 << 12) ^ (byte3 << 6) ^ byte4 ^ 0x3C82080;
353-
assert(value >= 0x10000 && value < 0x110000);
354-
charCodes[j++] = 0xD7C0 + (value >> 10);
355-
charCodes[j++] = 0xDC00 + (value & 0x3FF);
356-
}
357-
}
358-
assert(i == end);
359-
return String.fromCharCodes(charCodes, 0, j);
360-
}
361-
}

pkg/analyzer/lib/src/summary2/data_writer.dart renamed to pkg/analyzer/lib/src/binary/binary_writer.dart

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -104,18 +104,6 @@ class BufferedSink {
104104
}
105105
}
106106

107-
void writeIfType<T extends Object>(
108-
Object? object,
109-
void Function(T t) ifTrue,
110-
) {
111-
if (object is T) {
112-
writeBool(true);
113-
ifTrue(object);
114-
} else {
115-
writeBool(false);
116-
}
117-
}
118-
119107
void writeInt64(int value) {
120108
_int64Buffer[0] = value;
121109
_addByte4(
@@ -144,15 +132,6 @@ class BufferedSink {
144132
}
145133
}
146134

147-
/// Write [items] filtering them by [T].
148-
void writeList2<T>(List<Object> items, void Function(T x) writeItem) {
149-
var typedItems = items.whereType<T>().toList();
150-
writeUInt30(typedItems.length);
151-
for (var i = 0; i < typedItems.length; i++) {
152-
writeItem(typedItems[i]);
153-
}
154-
}
155-
156135
void writeMap<K, V>(
157136
Map<K, V> map, {
158137
required void Function(K key) writeKey,
Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,201 @@
1+
// Copyright (c) 2025, the Dart project authors. Please see the AUTHORS file
2+
// for details. All rights reserved. Use of this source code is governed by a
3+
// BSD-style license that can be found in the LICENSE file.
4+
5+
import 'dart:typed_data';
6+
7+
import 'package:_fe_analyzer_shared/src/scanner/string_canonicalizer.dart';
8+
import 'package:analyzer/src/binary/binary_writer.dart';
9+
10+
class StringIndexer {
11+
final Map<String, int> _index = {};
12+
13+
int operator [](String string) {
14+
var result = _index[string];
15+
16+
if (result == null) {
17+
result = _index.length;
18+
_index[string] = result;
19+
}
20+
21+
return result;
22+
}
23+
24+
int write(BufferedSink sink) {
25+
var bytesOffset = sink.offset;
26+
27+
var length = _index.length;
28+
var lengths = Uint32List(length);
29+
var lengthsIndex = 0;
30+
for (var key in _index.keys) {
31+
var stringStart = sink.offset;
32+
_writeWtf8(sink, key);
33+
lengths[lengthsIndex++] = sink.offset - stringStart;
34+
}
35+
36+
var resultOffset = sink.offset;
37+
38+
var lengthOfBytes = sink.offset - bytesOffset;
39+
sink.writeUInt30(lengthOfBytes);
40+
sink.writeUint30List(lengths);
41+
42+
return resultOffset;
43+
}
44+
45+
/// Write [source] string into [sink].
46+
static void _writeWtf8(BufferedSink sink, String source) {
47+
var end = source.length;
48+
if (end == 0) {
49+
return;
50+
}
51+
52+
int i = 0;
53+
do {
54+
var codeUnit = source.codeUnitAt(i++);
55+
if (codeUnit < 128) {
56+
// ASCII.
57+
sink.writeByte(codeUnit);
58+
} else if (codeUnit < 0x800) {
59+
// Two-byte sequence (11-bit unicode value).
60+
sink.writeByte(0xC0 | (codeUnit >> 6));
61+
sink.writeByte(0x80 | (codeUnit & 0x3f));
62+
} else if ((codeUnit & 0xFC00) == 0xD800 &&
63+
i < end &&
64+
(source.codeUnitAt(i) & 0xFC00) == 0xDC00) {
65+
// Surrogate pair -> four-byte sequence (non-BMP unicode value).
66+
int codeUnit2 = source.codeUnitAt(i++);
67+
int unicode =
68+
0x10000 + ((codeUnit & 0x3FF) << 10) + (codeUnit2 & 0x3FF);
69+
sink.writeByte(0xF0 | (unicode >> 18));
70+
sink.writeByte(0x80 | ((unicode >> 12) & 0x3F));
71+
sink.writeByte(0x80 | ((unicode >> 6) & 0x3F));
72+
sink.writeByte(0x80 | (unicode & 0x3F));
73+
} else {
74+
// Three-byte sequence (16-bit unicode value), including lone
75+
// surrogates.
76+
sink.writeByte(0xE0 | (codeUnit >> 12));
77+
sink.writeByte(0x80 | ((codeUnit >> 6) & 0x3f));
78+
sink.writeByte(0x80 | (codeUnit & 0x3f));
79+
}
80+
} while (i < end);
81+
}
82+
}
83+
84+
class StringTable {
85+
final Uint8List _bytes;
86+
int _byteOffset;
87+
88+
late final Uint32List _offsets;
89+
late final List<String?> _strings;
90+
91+
/// The structure of the table:
92+
/// - `<bytes with encoded strings>`
93+
/// - `<the length of the bytes> <-- [startOffset]`
94+
/// - `<the number strings>`
95+
/// - `<the array of lengths of individual strings>`
96+
StringTable({required Uint8List bytes, required int startOffset})
97+
: _bytes = bytes,
98+
_byteOffset = startOffset {
99+
var offset = startOffset - _readUInt30();
100+
var length = _readUInt30();
101+
102+
_offsets = Uint32List(length + 1);
103+
for (var i = 0; i < length; i++) {
104+
var stringLength = _readUInt30();
105+
_offsets[i] = offset;
106+
offset += stringLength;
107+
}
108+
_offsets[length] = offset;
109+
110+
_strings = List.filled(length, null);
111+
}
112+
113+
String operator [](int index) {
114+
var result = _strings[index];
115+
116+
if (result == null) {
117+
int start = _offsets[index];
118+
int end = _offsets[index + 1];
119+
int length = end - start;
120+
result = _readStringEntry(_offsets[index], length);
121+
result = considerCanonicalizeString(result);
122+
_strings[index] = result;
123+
}
124+
125+
return result;
126+
}
127+
128+
int _readByte() {
129+
return _bytes[_byteOffset++];
130+
}
131+
132+
String _readStringEntry(int start, int numBytes) {
133+
var end = start + numBytes;
134+
for (var i = start; i < end; i++) {
135+
if (_bytes[i] > 127) {
136+
return _decodeWtf8(_bytes, start, end);
137+
}
138+
}
139+
return String.fromCharCodes(_bytes, start, end);
140+
}
141+
142+
int _readUInt30() {
143+
var byte = _readByte();
144+
if (byte & 0x80 == 0) {
145+
// 0xxxxxxx
146+
return byte;
147+
} else if (byte & 0x40 == 0) {
148+
// 10xxxxxx
149+
return ((byte & 0x3F) << 8) | _readByte();
150+
} else {
151+
// 11xxxxxx
152+
return ((byte & 0x3F) << 24) |
153+
(_readByte() << 16) |
154+
(_readByte() << 8) |
155+
_readByte();
156+
}
157+
}
158+
159+
static String _decodeWtf8(Uint8List bytes, int start, int end) {
160+
// WTF-8 decoder that trusts its input, meaning that the correctness of
161+
// the code depends on the bytes from start to end being valid and
162+
// complete WTF-8. Instead of masking off the control bits from every
163+
// byte, it simply xor's the byte values together at their appropriate
164+
// bit shifts, and then xor's out all of the control bits at once.
165+
Uint16List charCodes = Uint16List(end - start);
166+
int i = start;
167+
int j = 0;
168+
while (i < end) {
169+
int byte = bytes[i++];
170+
if (byte < 0x80) {
171+
// ASCII.
172+
charCodes[j++] = byte;
173+
} else if (byte < 0xE0) {
174+
// Two-byte sequence (11-bit unicode value).
175+
int byte2 = bytes[i++];
176+
int value = (byte << 6) ^ byte2 ^ 0x3080;
177+
assert(value >= 0x80 && value < 0x800);
178+
charCodes[j++] = value;
179+
} else if (byte < 0xF0) {
180+
// Three-byte sequence (16-bit unicode value).
181+
int byte2 = bytes[i++];
182+
int byte3 = bytes[i++];
183+
int value = (byte << 12) ^ (byte2 << 6) ^ byte3 ^ 0xE2080;
184+
assert(value >= 0x800 && value < 0x10000);
185+
charCodes[j++] = value;
186+
} else {
187+
// Four-byte sequence (non-BMP unicode value).
188+
int byte2 = bytes[i++];
189+
int byte3 = bytes[i++];
190+
int byte4 = bytes[i++];
191+
int value =
192+
(byte << 18) ^ (byte2 << 12) ^ (byte3 << 6) ^ byte4 ^ 0x3C82080;
193+
assert(value >= 0x10000 && value < 0x110000);
194+
charCodes[j++] = 0xD7C0 + (value >> 10);
195+
charCodes[j++] = 0xDC00 + (value & 0x3FF);
196+
}
197+
}
198+
assert(i == end);
199+
return String.fromCharCodes(charCodes, 0, j);
200+
}
201+
}

0 commit comments

Comments
 (0)