Skip to content

Commit ae46953

Browse files
authored
Add BinaryEncodingOptions with option for deterministic ordering (#1480)
* Add `BinaryEncodingOptions` & option for deterministic ordering Implements the same setting added for JSON in #1478 for binary serialization. Related to #1477.
1 parent 5c44631 commit ae46953

File tree

5 files changed

+221
-39
lines changed

5 files changed

+221
-39
lines changed
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
// Sources/SwiftProtobuf/BinaryEncodingOptions.swift - Binary encoding options
2+
//
3+
// Copyright (c) 2014 - 2023 Apple Inc. and the project authors
4+
// Licensed under Apache License v2.0 with Runtime Library Exception
5+
//
6+
// See LICENSE.txt for license information:
7+
// https://github.com/apple/swift-protobuf/blob/main/LICENSE.txt
8+
//
9+
// -----------------------------------------------------------------------------
10+
///
11+
/// Binary encoding options
12+
///
13+
// -----------------------------------------------------------------------------
14+
15+
/// Options for binary encoding.
16+
public struct BinaryEncodingOptions: Sendable {
17+
/// Whether to use deterministic ordering when serializing.
18+
///
19+
/// Note that the deterministic serialization is NOT canonical across languages.
20+
/// It is NOT guaranteed to remain stable over time. It is unstable across
21+
/// different builds with schema changes due to unknown fields. Users who need
22+
/// canonical serialization (e.g., persistent storage in a canonical form,
23+
/// fingerprinting, etc.) should define their own canonicalization specification
24+
/// and implement their own serializer rather than relying on this API.
25+
///
26+
/// If deterministic serialization is requested, map entries will be sorted
27+
/// by keys in lexographical order. This is an implementation detail
28+
/// and subject to change.
29+
public var useDeterministicOrdering: Bool = false
30+
31+
public init() {}
32+
}

Sources/SwiftProtobuf/BinaryEncodingVisitor.swift

Lines changed: 71 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import Foundation
1717

1818
/// Visitor that encodes a message graph in the protobuf binary wire format.
1919
internal struct BinaryEncodingVisitor: Visitor {
20+
private let options: BinaryEncodingOptions
2021

2122
var encoder: BinaryEncoder
2223

@@ -26,8 +27,9 @@ internal struct BinaryEncodingVisitor: Visitor {
2627
/// - Precondition: `pointer` must point to an allocated block of memory that
2728
/// is large enough to hold the entire encoded message. For performance
2829
/// reasons, the encoder does not make any attempts to verify this.
29-
init(forWritingInto pointer: UnsafeMutableRawPointer) {
30-
encoder = BinaryEncoder(forWritingInto: pointer)
30+
init(forWritingInto pointer: UnsafeMutableRawPointer, options: BinaryEncodingOptions) {
31+
self.encoder = BinaryEncoder(forWritingInto: pointer)
32+
self.options = options
3133
}
3234

3335
mutating func visitUnknown(bytes: Data) throws {
@@ -258,49 +260,79 @@ internal struct BinaryEncodingVisitor: Visitor {
258260
value: _ProtobufMap<KeyType, ValueType>.BaseType,
259261
fieldNumber: Int
260262
) throws {
261-
for (k,v) in value {
262-
encoder.startField(fieldNumber: fieldNumber, wireFormat: .lengthDelimited)
263-
var sizer = BinaryEncodingSizeVisitor()
264-
try KeyType.visitSingular(value: k, fieldNumber: 1, with: &sizer)
265-
try ValueType.visitSingular(value: v, fieldNumber: 2, with: &sizer)
266-
let entrySize = sizer.serializedSize
267-
encoder.putVarInt(value: entrySize)
268-
try KeyType.visitSingular(value: k, fieldNumber: 1, with: &self)
269-
try ValueType.visitSingular(value: v, fieldNumber: 2, with: &self)
270-
}
263+
try iterateAndEncode(
264+
map: value, fieldNumber: fieldNumber, isOrderedBefore: KeyType._lessThan,
265+
encodeWithSizer: { sizer, key, value in
266+
try KeyType.visitSingular(value: key, fieldNumber: 1, with: &sizer)
267+
try ValueType.visitSingular(value: value, fieldNumber: 2, with: &sizer)
268+
}, encodeWithVisitor: { visitor, key, value in
269+
try KeyType.visitSingular(value: key, fieldNumber: 1, with: &visitor)
270+
try ValueType.visitSingular(value: value, fieldNumber: 2, with: &visitor)
271+
}
272+
)
271273
}
272274

273275
mutating func visitMapField<KeyType, ValueType>(
274276
fieldType: _ProtobufEnumMap<KeyType, ValueType>.Type,
275277
value: _ProtobufEnumMap<KeyType, ValueType>.BaseType,
276278
fieldNumber: Int
277279
) throws where ValueType.RawValue == Int {
278-
for (k,v) in value {
279-
encoder.startField(fieldNumber: fieldNumber, wireFormat: .lengthDelimited)
280-
var sizer = BinaryEncodingSizeVisitor()
281-
try KeyType.visitSingular(value: k, fieldNumber: 1, with: &sizer)
282-
try sizer.visitSingularEnumField(value: v, fieldNumber: 2)
283-
let entrySize = sizer.serializedSize
284-
encoder.putVarInt(value: entrySize)
285-
try KeyType.visitSingular(value: k, fieldNumber: 1, with: &self)
286-
try visitSingularEnumField(value: v, fieldNumber: 2)
287-
}
280+
try iterateAndEncode(
281+
map: value, fieldNumber: fieldNumber, isOrderedBefore: KeyType._lessThan,
282+
encodeWithSizer: { sizer, key, value in
283+
try KeyType.visitSingular(value: key, fieldNumber: 1, with: &sizer)
284+
try sizer.visitSingularEnumField(value: value, fieldNumber: 2)
285+
}, encodeWithVisitor: { visitor, key, value in
286+
try KeyType.visitSingular(value: key, fieldNumber: 1, with: &visitor)
287+
try visitor.visitSingularEnumField(value: value, fieldNumber: 2)
288+
}
289+
)
288290
}
289291

290292
mutating func visitMapField<KeyType, ValueType>(
291293
fieldType: _ProtobufMessageMap<KeyType, ValueType>.Type,
292294
value: _ProtobufMessageMap<KeyType, ValueType>.BaseType,
293295
fieldNumber: Int
294296
) throws {
295-
for (k,v) in value {
296-
encoder.startField(fieldNumber: fieldNumber, wireFormat: .lengthDelimited)
297-
var sizer = BinaryEncodingSizeVisitor()
298-
try KeyType.visitSingular(value: k, fieldNumber: 1, with: &sizer)
299-
try sizer.visitSingularMessageField(value: v, fieldNumber: 2)
300-
let entrySize = sizer.serializedSize
301-
encoder.putVarInt(value: entrySize)
302-
try KeyType.visitSingular(value: k, fieldNumber: 1, with: &self)
303-
try visitSingularMessageField(value: v, fieldNumber: 2)
297+
try iterateAndEncode(
298+
map: value, fieldNumber: fieldNumber, isOrderedBefore: KeyType._lessThan,
299+
encodeWithSizer: { sizer, key, value in
300+
try KeyType.visitSingular(value: key, fieldNumber: 1, with: &sizer)
301+
try sizer.visitSingularMessageField(value: value, fieldNumber: 2)
302+
}, encodeWithVisitor: { visitor, key, value in
303+
try KeyType.visitSingular(value: key, fieldNumber: 1, with: &visitor)
304+
try visitor.visitSingularMessageField(value: value, fieldNumber: 2)
305+
}
306+
)
307+
}
308+
309+
/// Helper to encapsulate the common structure of iterating over a map
310+
/// and encoding the keys and values.
311+
private mutating func iterateAndEncode<K, V>(
312+
map: Dictionary<K, V>,
313+
fieldNumber: Int,
314+
isOrderedBefore: (K, K) -> Bool,
315+
encodeWithSizer: (inout BinaryEncodingSizeVisitor, K, V) throws -> (),
316+
encodeWithVisitor: (inout BinaryEncodingVisitor, K, V) throws -> ()
317+
) throws {
318+
if options.useDeterministicOrdering {
319+
for (k,v) in map.sorted(by: { isOrderedBefore( $0.0, $1.0) }) {
320+
encoder.startField(fieldNumber: fieldNumber, wireFormat: .lengthDelimited)
321+
var sizer = BinaryEncodingSizeVisitor()
322+
try encodeWithSizer(&sizer, k, v)
323+
let entrySize = sizer.serializedSize
324+
encoder.putVarInt(value: entrySize)
325+
try encodeWithVisitor(&self, k, v)
326+
}
327+
} else {
328+
for (k,v) in map {
329+
encoder.startField(fieldNumber: fieldNumber, wireFormat: .lengthDelimited)
330+
var sizer = BinaryEncodingSizeVisitor()
331+
try encodeWithSizer(&sizer, k, v)
332+
let entrySize = sizer.serializedSize
333+
encoder.putVarInt(value: entrySize)
334+
try encodeWithVisitor(&self, k, v)
335+
}
304336
}
305337
}
306338

@@ -309,7 +341,7 @@ internal struct BinaryEncodingVisitor: Visitor {
309341
start: Int,
310342
end: Int
311343
) throws {
312-
var subVisitor = BinaryEncodingMessageSetVisitor(encoder: encoder)
344+
var subVisitor = BinaryEncodingMessageSetVisitor(encoder: encoder, options: options)
313345
try fields.traverse(visitor: &subVisitor, start: start, end: end)
314346
encoder = subVisitor.encoder
315347
}
@@ -319,9 +351,12 @@ extension BinaryEncodingVisitor {
319351

320352
// Helper Visitor to when writing out the extensions as MessageSets.
321353
internal struct BinaryEncodingMessageSetVisitor: SelectiveVisitor {
354+
private let options: BinaryEncodingOptions
355+
322356
var encoder: BinaryEncoder
323357

324-
init(encoder: BinaryEncoder) {
358+
init(encoder: BinaryEncoder, options: BinaryEncodingOptions) {
359+
self.options = options
325360
self.encoder = encoder
326361
}
327362

@@ -338,7 +373,9 @@ extension BinaryEncodingVisitor {
338373
let length = try value.serializedDataSize()
339374
encoder.putVarInt(value: length)
340375
// Create the sub encoder after writing the length.
341-
var subVisitor = BinaryEncodingVisitor(forWritingInto: encoder.pointer)
376+
var subVisitor = BinaryEncodingVisitor(
377+
forWritingInto: encoder.pointer, options: options
378+
)
342379
try value.traverse(visitor: &subVisitor)
343380
encoder.pointer = subVisitor.encoder.pointer
344381

@@ -347,5 +384,4 @@ extension BinaryEncodingVisitor {
347384

348385
// SelectiveVisitor handles the rest.
349386
}
350-
351387
}

Sources/SwiftProtobuf/Message+BinaryAdditions.swift

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,16 +22,38 @@ extension Message {
2222
/// - Parameters:
2323
/// - partial: If `false` (the default), this method will check
2424
/// `Message.isInitialized` before encoding to verify that all required
25-
/// fields are present. If any are missing, this method throws
25+
/// fields are present. If any are missing, this method throws.
2626
/// `BinaryEncodingError.missingRequiredFields`.
2727
/// - Returns: A `SwiftProtobufContiguousBytes` instance containing the binary serialization
2828
/// of the message.
2929
///
3030
/// - Throws: `BinaryEncodingError` if encoding fails.
3131
public func serializedBytes<Bytes: SwiftProtobufContiguousBytes>(partial: Bool = false) throws -> Bytes {
32+
try serializedBytes(partial: partial, options: BinaryEncodingOptions())
33+
}
34+
35+
/// Returns a `SwiftProtobufContiguousBytes` instance containing the Protocol Buffer binary
36+
/// format serialization of the message.
37+
///
38+
/// - Parameters:
39+
/// - partial: If `false` (the default), this method will check
40+
/// `Message.isInitialized` before encoding to verify that all required
41+
/// fields are present. If any are missing, this method throws.
42+
/// `BinaryEncodingError.missingRequiredFields`.
43+
/// - options: The `BinaryEncodingOptions` to use.
44+
/// - Returns: A `SwiftProtobufContiguousBytes` instance containing the binary serialization
45+
/// of the message.
46+
///
47+
/// - Throws: `BinaryEncodingError` if encoding fails.
48+
public func serializedBytes<Bytes: SwiftProtobufContiguousBytes>(
49+
partial: Bool = false,
50+
options: BinaryEncodingOptions
51+
) throws -> Bytes {
3252
if !partial && !isInitialized {
3353
throw BinaryEncodingError.missingRequiredFields
3454
}
55+
56+
// Note that this assumes `options` will not change the required size.
3557
let requiredSize = try serializedDataSize()
3658

3759
// Messages have a 2GB limit in encoded size, the upstread C++ code
@@ -48,7 +70,7 @@ extension Message {
4870
var data = Bytes(repeating: 0, count: requiredSize)
4971
try data.withUnsafeMutableBytes { (body: UnsafeMutableRawBufferPointer) in
5072
if let baseAddress = body.baseAddress, body.count > 0 {
51-
var visitor = BinaryEncodingVisitor(forWritingInto: baseAddress)
73+
var visitor = BinaryEncodingVisitor(forWritingInto: baseAddress, options: options)
5274
try traverse(visitor: &visitor)
5375
// Currently not exposing this from the api because it really would be
5476
// an internal error in the library and should never happen.

Sources/SwiftProtobuf/Message+BinaryAdditions_Data.swift

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ extension Message {
5858
/// `Message.isInitialized` after decoding to verify that all required
5959
/// fields are present. If any are missing, this method throws
6060
/// `BinaryDecodingError.missingRequiredFields`.
61-
/// - options: The BinaryDecodingOptions to use.
61+
/// - options: The `BinaryDecodingOptions` to use.
6262
/// - Throws: `BinaryDecodingError` if decoding fails.
6363
@inlinable
6464
public mutating func merge(
@@ -81,6 +81,24 @@ extension Message {
8181
/// - Returns: A `Data` instance containing the binary serialization of the message.
8282
/// - Throws: `BinaryEncodingError` if encoding fails.
8383
public func serializedData(partial: Bool = false) throws -> Data {
84-
try serializedBytes(partial: partial)
84+
try serializedBytes(partial: partial, options: BinaryEncodingOptions())
85+
}
86+
87+
/// Returns a `Data` instance containing the Protocol Buffer binary
88+
/// format serialization of the message.
89+
///
90+
/// - Parameters:
91+
/// - partial: If `false` (the default), this method will check
92+
/// `Message.isInitialized` before encoding to verify that all required
93+
/// fields are present. If any are missing, this method throws
94+
/// `BinaryEncodingError.missingRequiredFields`.
95+
/// - options: The `BinaryEncodingOptions` to use.
96+
/// - Returns: A `Data` instance containing the binary serialization of the message.
97+
/// - Throws: `BinaryEncodingError` if encoding fails.
98+
public func serializedData(
99+
partial: Bool = false,
100+
options: BinaryEncodingOptions
101+
) throws -> Data {
102+
try serializedBytes(partial: partial, options: options)
85103
}
86104
}
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
// Tests/SwiftProtobufTests/Test_BinaryEncodingOptions.swift - Tests for binary encoding options
2+
//
3+
// Copyright (c) 2014 - 2023 Apple Inc. and the project authors
4+
// Licensed under Apache License v2.0 with Runtime Library Exception
5+
//
6+
// See LICENSE.txt for license information:
7+
// https://github.com/apple/swift-protobuf/blob/main/LICENSE.txt
8+
//
9+
// -----------------------------------------------------------------------------
10+
///
11+
/// Test for the use of BinaryEncodingOptions
12+
///
13+
// -----------------------------------------------------------------------------
14+
15+
import Foundation
16+
import XCTest
17+
import SwiftProtobuf
18+
19+
final class Test_BinaryEncodingOptions: XCTestCase {
20+
21+
func testUseDeterministicOrdering() throws {
22+
var options = BinaryEncodingOptions()
23+
options.useDeterministicOrdering = true
24+
25+
let message1 = SwiftProtoTesting_Message3.with {
26+
$0.mapStringString = [
27+
"b": "B",
28+
"a": "A",
29+
"0": "0",
30+
"UPPER": "v",
31+
"x": "X",
32+
]
33+
$0.mapInt32Message = [
34+
5: .with { $0.optionalSint32 = 5 },
35+
1: .with { $0.optionalSint32 = 1 },
36+
3: .with { $0.optionalSint32 = 3 },
37+
]
38+
$0.mapInt32Enum = [
39+
5: .foo,
40+
3: .bar,
41+
0: .baz,
42+
1: .extra3,
43+
]
44+
}
45+
46+
let message2 = SwiftProtoTesting_Message3.with {
47+
$0.mapStringString = [
48+
"UPPER": "v",
49+
"a": "A",
50+
"b": "B",
51+
"x": "X",
52+
"0": "0",
53+
]
54+
$0.mapInt32Message = [
55+
1: .with { $0.optionalSint32 = 1 },
56+
3: .with { $0.optionalSint32 = 3 },
57+
5: .with { $0.optionalSint32 = 5 },
58+
]
59+
$0.mapInt32Enum = [
60+
3: .bar,
61+
5: .foo,
62+
1: .extra3,
63+
0: .baz,
64+
]
65+
}
66+
67+
// Approximation that serializing models with the same data (but initialized with keys in
68+
// different orders) consistently produces the same outputs.
69+
let expectedOutput = try message1.serializedData(options: options)
70+
for _ in 0..<10 {
71+
XCTAssertEqual(try message2.serializedData(options: options), expectedOutput)
72+
}
73+
}
74+
}

0 commit comments

Comments
 (0)