diff --git a/packages/protobuf/src/to-binary.ts b/packages/protobuf/src/to-binary.ts index 786e74944..69e82d614 100644 --- a/packages/protobuf/src/to-binary.ts +++ b/packages/protobuf/src/to-binary.ts @@ -45,7 +45,7 @@ const writeDefaults: Readonly = { }; function makeWriteOptions( - options?: Partial, + options?: Partial ): Readonly { return options ? { ...writeDefaults, ...options } : writeDefaults; } @@ -53,25 +53,25 @@ function makeWriteOptions( export function toBinary( schema: Desc, message: MessageShape, - options?: Partial, + options?: Partial ): Uint8Array { return writeFields( new BinaryWriter(), makeWriteOptions(options), - reflect(schema, message), + reflect(schema, message) ).finish(); } function writeFields( writer: BinaryWriter, opts: BinaryWriteOptions, - msg: ReflectMessage, + msg: ReflectMessage ): BinaryWriter { for (const f of msg.sortedFields) { if (!msg.isSet(f)) { if (f.presence == LEGACY_REQUIRED) { throw new Error( - `cannot encode field ${msg.desc.typeName}.${f.name} to binary: required field not set`, + `cannot encode field ${msg.desc.typeName}.${f.name} to binary: required field not set` ); } continue; @@ -80,7 +80,7 @@ function writeFields( } if (opts.writeUnknownFields) { for (const { no, wireType, data } of msg.getUnknown() ?? []) { - writer.tag(no, wireType).raw(data); + writer.tag(no, wireType).bytes(data); } } return writer; @@ -93,7 +93,7 @@ export function writeField( writer: BinaryWriter, opts: BinaryWriteOptions, msg: ReflectMessage, - field: DescField, + field: DescField ) { switch (field.fieldKind) { case "scalar": @@ -102,7 +102,7 @@ export function writeField( writer, field.scalar ?? ScalarType.INT32, field.number, - msg.get(field), + msg.get(field) ); break; case "list": @@ -123,13 +123,10 @@ function writeScalar( writer: BinaryWriter, scalarType: ScalarType, fieldNo: number, - value: unknown, + value: unknown ) { - writeScalarValue( - writer.tag(fieldNo, writeTypeOfScalar(scalarType)), - scalarType, - value as ScalarValue, - ); + writer.tag(fieldNo, writeTypeOfScalar(scalarType)); + writeScalarValue(writer, scalarType, value as ScalarValue); } function writeMessageField( @@ -137,20 +134,24 @@ function writeMessageField( opts: BinaryWriteOptions, field: DescField & ({ fieldKind: "message" } | { fieldKind: "list"; listKind: "message" }), - message: ReflectMessage, + message: ReflectMessage ) { if (field.delimitedEncoding) { - writeFields( - writer.tag(field.number, WireType.StartGroup), - opts, - message, - ).tag(field.number, WireType.EndGroup); + writer.tag(field.number, WireType.StartGroup); + writeFields(writer, opts, message).tag(field.number, WireType.EndGroup); + writer.tag(field.number, WireType.EndGroup); } else { - writeFields( - writer.tag(field.number, WireType.LengthDelimited).fork(), - opts, - message, - ).join(); + // TODO(ekrekr): this is really slow, because it has to allocate a whole new array buffer. + // Instead we should be writing the message directly to the original arraybuffer, then inserting + // the length beforehand. + const subMessage = writeFields(new BinaryWriter(), opts, message).finish(); + + // Add the prefix for the number of bytes in the submessage. + writer.tag(field.number, WireType.LengthDelimited); + writer.uint32(subMessage.byteLength); + + // Insert the sub message to the message. + writer.bytes(subMessage); } } @@ -158,7 +159,7 @@ function writeListField( writer: BinaryWriter, opts: BinaryWriteOptions, field: DescField & { fieldKind: "list" }, - list: ReflectList, + list: ReflectList ) { if (field.listKind == "message") { for (const item of list) { @@ -171,11 +172,10 @@ function writeListField( if (!list.size) { return; } - writer.tag(field.number, WireType.LengthDelimited).fork(); for (const item of list) { writeScalarValue(writer, scalarType, item as ScalarValue); } - writer.join(); + writer.tag(field.number, WireType.LengthDelimited); return; } for (const item of list) { @@ -188,10 +188,8 @@ function writeMapEntry( opts: BinaryWriteOptions, field: DescField & { fieldKind: "map" }, key: unknown, - value: unknown, + value: unknown ) { - writer.tag(field.number, WireType.LengthDelimited).fork(); - // write key, expecting key field number = 1 writeScalar(writer, field.mapKey, 1, key); @@ -202,20 +200,17 @@ function writeMapEntry( writeScalar(writer, field.scalar ?? ScalarType.INT32, 2, value); break; case "message": - writeFields( - writer.tag(2, WireType.LengthDelimited).fork(), - opts, - value as ReflectMessage, - ).join(); + writeFields(writer, opts, value as ReflectMessage); + writer.tag(2, WireType.LengthDelimited); break; } - writer.join(); + writer.tag(field.number, WireType.LengthDelimited); } function writeScalarValue( writer: BinaryWriter, type: ScalarType, - value: ScalarValue, + value: ScalarValue ) { switch (type) { case ScalarType.STRING: diff --git a/packages/protobuf/src/wire/binary-encoding.ts b/packages/protobuf/src/wire/binary-encoding.ts index 29d57d11d..b3ca68299 100644 --- a/packages/protobuf/src/wire/binary-encoding.ts +++ b/packages/protobuf/src/wire/binary-encoding.ts @@ -97,123 +97,60 @@ export const INT32_MIN = -0x80000000; export class BinaryWriter { /** - * We cannot allocate a buffer for the entire output - * because we don't know it's size. + * This is the storage backing for the bytes buffer. * - * So we collect smaller chunks of known size and - * concat them later. + * Max byte length is 2GiB - 1, which is the maximum for array buffers. + * This value is generally accepted the maximum protobuf size, so this is fine. * - * Use `raw()` to push data to this array. It will flush - * `buf` first. + * TODO(ekrekr): remove the `any` cast once types are fixed: + * https://github.com/microsoft/TypeScript/pull/58573. */ - private chunks: Uint8Array[]; + private bytesBufStorage = new (ArrayBuffer as any)(0, { + maxByteLength: -(((2 << 10) << 10) << 10) - 1, + }); /** - * A growing buffer for byte values. If you don't know - * the size of the data you are writing, push to this - * array. + * This contains the editable view onto the storage backing. */ - protected buf: number[]; - - /** - * Previous fork states. - */ - private stack: Array<{ chunks: Uint8Array[]; buf: number[] }> = []; + private bytesBuf = new Uint8Array(this.bytesBufStorage); constructor( - private readonly encodeUtf8: ( + private readonly encodeInto: ( text: string, - ) => Uint8Array = getTextEncoding().encodeUtf8, - ) { - this.chunks = []; - this.buf = []; - } + buf: Uint8Array + ) => { read: number; written: number } = getTextEncoding().encodeInto + ) {} /** * Return all bytes written and reset this writer. */ finish(): Uint8Array { - this.chunks.push(new Uint8Array(this.buf)); // flush the buffer - let len = 0; - for (let i = 0; i < this.chunks.length; i++) len += this.chunks[i].length; - let bytes = new Uint8Array(len); - let offset = 0; - for (let i = 0; i < this.chunks.length; i++) { - bytes.set(this.chunks[i], offset); - offset += this.chunks[i].length; - } - this.chunks = []; - return bytes; - } - - /** - * Start a new fork for length-delimited data like a message - * or a packed repeated field. - * - * Must be joined later with `join()`. - */ - fork(): this { - this.stack.push({ chunks: this.chunks, buf: this.buf }); - this.chunks = []; - this.buf = []; - return this; - } - - /** - * Join the last fork. Write its length and bytes, then - * return to the previous state. - */ - join(): this { - // get chunk of fork - let chunk = this.finish(); - - // restore previous state - let prev = this.stack.pop(); - if (!prev) throw new Error("invalid state, fork stack empty"); - this.chunks = prev.chunks; - this.buf = prev.buf; - - // write length of chunk as varint - this.uint32(chunk.byteLength); - return this.raw(chunk); + return this.bytesBuf; } /** * Writes a tag (field number and wire type). - * - * Equivalent to `uint32( (fieldNo << 3 | type) >>> 0 )`. - * - * Generated code should compute the tag ahead of time and call `uint32()`. */ tag(fieldNo: number, type: WireType): this { return this.uint32(((fieldNo << 3) | type) >>> 0); } - /** - * Write a chunk of raw bytes. - */ - raw(chunk: Uint8Array): this { - if (this.buf.length) { - this.chunks.push(new Uint8Array(this.buf)); - this.buf = []; - } - this.chunks.push(chunk); - return this; - } - /** * Write a `uint32` value, an unsigned 32 bit varint. */ uint32(value: number): this { assertUInt32(value); - // write value as varint 32, inlined for speed while (value > 0x7f) { - this.buf.push((value & 0x7f) | 0x80); + const currentBufHead = this.bytesBufStorage.byteLength; + this.bytesBufStorage.resize(currentBufHead + 1); + this.bytesBuf.set([(value & 0x7f) | 0x80], currentBufHead); value = value >>> 7; } - this.buf.push(value); - + const currentBufHead = this.bytesBufStorage.byteLength; + // Expand the bytes buffer, then allocate the byte at the head. + this.bytesBufStorage.resize(currentBufHead + 1); + this.bytesBuf.set([value], currentBufHead); return this; } @@ -222,7 +159,11 @@ export class BinaryWriter { */ int32(value: number): this { assertInt32(value); - varint32write(value, this.buf); + const currentBufHead = this.bytesBufStorage.byteLength; + const numberBuf: number[] = []; + varint32write(value, numberBuf); + this.bytesBufStorage.resize(currentBufHead + numberBuf.length); + this.bytesBuf.set(numberBuf, currentBufHead); return this; } @@ -230,7 +171,9 @@ export class BinaryWriter { * Write a `bool` value, a variant. */ bool(value: boolean): this { - this.buf.push(value ? 1 : 0); + const currentBufHead = this.bytesBufStorage.byteLength; + this.bytesBufStorage.resize(currentBufHead + 1); + this.bytesBuf.set([value ? 1 : 0], currentBufHead); return this; } @@ -238,36 +181,56 @@ export class BinaryWriter { * Write a `bytes` value, length-delimited arbitrary data. */ bytes(value: Uint8Array): this { + const currentBufHead = this.bytesBufStorage.byteLength; this.uint32(value.byteLength); // write length of chunk as varint - return this.raw(value); + this.bytesBufStorage.resize(currentBufHead + value.byteLength); + this.bytesBuf.set(value, currentBufHead); + return this; } /** * Write a `string` value, length-delimited data converted to UTF-8 text. */ string(value: string): this { - let chunk = this.encodeUtf8(value); - this.uint32(chunk.byteLength); // write length of chunk as varint - return this.raw(chunk); + // NodeJS strings are by default UTF-8, so we can assume the byte length as the length of + // the string. + const valueBytesLength = value.length; + + this.uint32(valueBytesLength); + + const currentBufHead = this.bytesBufStorage.byteLength; + + // Allocate new bytes for the string. + this.bytesBufStorage.resize(currentBufHead + valueBytesLength); + + // Write the decoded string directly into the encoded message array buffer. + this.encodeInto( + value, + this.bytesBuf.subarray(currentBufHead, currentBufHead + valueBytesLength) + ); + + return this; } /** * Write a `float` value, 32-bit floating point number. */ float(value: number): this { + const currentBufHead = this.bytesBufStorage.byteLength; assertFloat32(value); - let chunk = new Uint8Array(4); - new DataView(chunk.buffer).setFloat32(0, value, true); - return this.raw(chunk); + this.bytesBufStorage.resize(currentBufHead + 4); + new DataView(this.bytesBufStorage).setFloat32(currentBufHead, value, true); + return this; } /** * Write a `double` value, a 64-bit floating point number. */ double(value: number): this { - let chunk = new Uint8Array(8); - new DataView(chunk.buffer).setFloat64(0, value, true); - return this.raw(chunk); + const currentBufHead = this.bytesBufStorage.byteLength; + this.bytesBufStorage.resize(currentBufHead + 8); + new DataView(this.bytesBufStorage).setFloat64(currentBufHead, value, true); + return this; } /** @@ -275,9 +238,10 @@ export class BinaryWriter { */ fixed32(value: number): this { assertUInt32(value); - let chunk = new Uint8Array(4); - new DataView(chunk.buffer).setUint32(0, value, true); - return this.raw(chunk); + const currentBufHead = this.bytesBufStorage.byteLength; + this.bytesBufStorage.resize(currentBufHead + 4); + new DataView(this.bytesBufStorage).setUint32(currentBufHead, value, true); + return this; } /** @@ -285,9 +249,10 @@ export class BinaryWriter { */ sfixed32(value: number): this { assertInt32(value); - let chunk = new Uint8Array(4); - new DataView(chunk.buffer).setInt32(0, value, true); - return this.raw(chunk); + const currentBufHead = this.bytesBufStorage.byteLength; + this.bytesBufStorage.resize(currentBufHead + 4); + new DataView(this.bytesBufStorage).setInt32(currentBufHead, value, true); + return this; } /** @@ -297,7 +262,11 @@ export class BinaryWriter { assertInt32(value); // zigzag encode value = ((value << 1) ^ (value >> 31)) >>> 0; - varint32write(value, this.buf); + const numberBuf: number[] = []; + varint32write(value, numberBuf); + const currentBufHead = this.bytesBufStorage.byteLength; + this.bytesBufStorage.resize(currentBufHead + numberBuf.length); + this.bytesBuf.set(numberBuf, currentBufHead); return this; } @@ -305,24 +274,26 @@ export class BinaryWriter { * Write a `fixed64` value, a signed, fixed-length 64-bit integer. */ sfixed64(value: string | number | bigint): this { - let chunk = new Uint8Array(8), - view = new DataView(chunk.buffer), - tc = protoInt64.enc(value); - view.setInt32(0, tc.lo, true); - view.setInt32(4, tc.hi, true); - return this.raw(chunk); + const currentBufHead = this.bytesBufStorage.byteLength; + this.bytesBufStorage.resize(currentBufHead + 8); + let view = new DataView(this.bytesBufStorage); + let tc = protoInt64.enc(value); + view.setInt32(currentBufHead, tc.lo, true); + view.setInt32(currentBufHead + 4, tc.hi, true); + return this; } /** * Write a `fixed64` value, an unsigned, fixed-length 64 bit integer. */ fixed64(value: string | number | bigint): this { - let chunk = new Uint8Array(8), - view = new DataView(chunk.buffer), - tc = protoInt64.uEnc(value); - view.setInt32(0, tc.lo, true); - view.setInt32(4, tc.hi, true); - return this.raw(chunk); + const currentBufHead = this.bytesBufStorage.byteLength; + this.bytesBufStorage.resize(currentBufHead + 8); + let view = new DataView(this.bytesBufStorage); + let tc = protoInt64.uEnc(value); + view.setInt32(currentBufHead, tc.lo, true); + view.setInt32(currentBufHead + 4, tc.hi, true); + return this; } /** @@ -330,7 +301,11 @@ export class BinaryWriter { */ int64(value: string | number | bigint): this { let tc = protoInt64.enc(value); - varint64write(tc.lo, tc.hi, this.buf); + const numberBuf: number[] = []; + varint64write(tc.lo, tc.hi, numberBuf); + const currentBufHead = this.bytesBufStorage.byteLength; + this.bytesBufStorage.resize(currentBufHead + numberBuf.length); + this.bytesBuf.set(numberBuf, currentBufHead); return this; } @@ -343,7 +318,11 @@ export class BinaryWriter { sign = tc.hi >> 31, lo = (tc.lo << 1) ^ sign, hi = ((tc.hi << 1) | (tc.lo >>> 31)) ^ sign; - varint64write(lo, hi, this.buf); + const numberBuf: number[] = []; + varint64write(lo, hi, numberBuf); + const currentBufHead = this.bytesBufStorage.byteLength; + this.bytesBufStorage.resize(currentBufHead + numberBuf.length); + this.bytesBuf.set(numberBuf, currentBufHead); return this; } @@ -352,7 +331,11 @@ export class BinaryWriter { */ uint64(value: string | number | bigint): this { let tc = protoInt64.uEnc(value); - varint64write(tc.lo, tc.hi, this.buf); + const numberBuf: number[] = []; + varint64write(tc.lo, tc.hi, numberBuf); + const currentBufHead = this.bytesBufStorage.byteLength; + this.bytesBufStorage.resize(currentBufHead + numberBuf.length); + this.bytesBuf.set(numberBuf, currentBufHead); return this; } } @@ -374,8 +357,8 @@ export class BinaryReader { constructor( buf: Uint8Array, private readonly decodeUtf8: ( - bytes: Uint8Array, - ) => string = getTextEncoding().decodeUtf8, + bytes: Uint8Array + ) => string = getTextEncoding().decodeUtf8 ) { this.buf = buf; this.len = buf.length; @@ -392,7 +375,7 @@ export class BinaryReader { wireType = tag & 7; if (fieldNo <= 0 || wireType < 0 || wireType > 5) throw new Error( - "illegal tag: field no " + fieldNo + " wire type " + wireType, + "illegal tag: field no " + fieldNo + " wire type " + wireType ); return [fieldNo, wireType]; } diff --git a/packages/protobuf/src/wire/text-encoding.ts b/packages/protobuf/src/wire/text-encoding.ts index 04fb0d400..7e8de3809 100644 --- a/packages/protobuf/src/wire/text-encoding.ts +++ b/packages/protobuf/src/wire/text-encoding.ts @@ -23,6 +23,13 @@ interface TextEncoding { * Encode UTF-8 text to binary. */ encodeUtf8: (text: string) => Uint8Array; + /** + * Encode UTF-8 text to an existing binary. + */ + encodeInto: ( + text: string, + buf: Uint8Array + ) => { read: number; written: number }; /** * Decode UTF-8 text from binary. */ @@ -54,6 +61,12 @@ export function getTextEncoding() { encodeUtf8(text: string): Uint8Array { return te.encode(text); }, + encodeInto( + text: string, + buf: Uint8Array + ): { read: number; written: number } { + return te.encodeInto(text, buf); + }, decodeUtf8(bytes: Uint8Array): string { return td.decode(bytes); }, @@ -78,6 +91,10 @@ type GlobalWithTextEncoderDecoder = { TextEncoder: { new (): { encode(text: string): Uint8Array; + encodeInto( + text: string, + buf: Uint8Array + ): { read: number; written: number }; }; }; TextDecoder: {