Skip to content

Commit fdae759

Browse files
authored
SWIFT-1044 Improve BSON to JSON performance (#53)
1 parent 70c0ac0 commit fdae759

8 files changed

+209
-166
lines changed

Sources/SwiftBSON/BSONBinary.swift

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -265,16 +265,16 @@ extension BSONBinary: BSONValue {
265265
guard oldSize == (byteLength - 4) else {
266266
throw BSONError.InternalError(message: "Invalid size for BSONBinary subtype: \(subtype)")
267267
}
268-
guard let bytes = buffer.readBytes(length: Int(oldSize)) else {
268+
guard let bytes = buffer.readSlice(length: Int(oldSize)) else {
269269
throw BSONError.InternalError(message: "Cannot read \(oldSize) from buffer for BSONBinary")
270270
}
271-
return .binary(try BSONBinary(bytes: bytes, subtype: subtype))
271+
return .binary(try BSONBinary(buffer: bytes, subtype: subtype))
272272
}
273273

274-
guard let bytes = buffer.readBytes(length: Int(byteLength)) else {
274+
guard let bytes = buffer.readSlice(length: Int(byteLength)) else {
275275
throw BSONError.InternalError(message: "Cannot read \(byteLength) from buffer for BSONBinary")
276276
}
277-
return .binary(try BSONBinary(bytes: bytes, subtype: subtype))
277+
return .binary(try BSONBinary(buffer: bytes, subtype: subtype))
278278
}
279279

280280
internal func write(to buffer: inout ByteBuffer) {

Sources/SwiftBSON/BSONDocument+Sequence.swift

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,16 @@ extension BSONDocument: Sequence {
2222
BSONDocumentIterator(over: self.buffer)
2323
}
2424

25+
// We need to re-implement this using the default Sequence implementation since the default one from
26+
// `Collection` (which `BSONDocument` also conforms to) relies on numeric indexes for iteration and is therefore
27+
// very slow.
28+
@inlinable
29+
public func map<T>(
30+
_ transform: (Element) throws -> T
31+
) rethrows -> [T] {
32+
try AnySequence(self).map(transform)
33+
}
34+
2535
/**
2636
* Returns a new document containing the keys of this document with the values transformed by the given closure.
2737
*

Sources/SwiftBSON/BSONDocument.swift

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,6 @@ public struct BSONDocument {
125125
/// On error, an empty string will be returned.
126126
public func toExtendedJSONString() -> String {
127127
let encoder = ExtendedJSONEncoder()
128-
encoder.outputFormatting = [.prettyPrinted]
129128
guard let encoded = try? encoder.encode(self) else {
130129
return ""
131130
}
@@ -137,15 +136,20 @@ public struct BSONDocument {
137136
public func toCanonicalExtendedJSONString() -> String {
138137
let encoder = ExtendedJSONEncoder()
139138
encoder.mode = .canonical
140-
encoder.outputFormatting = [.prettyPrinted]
141139
guard let encoded = try? encoder.encode(self) else {
142140
return ""
143141
}
144142
return String(data: encoded, encoding: .utf8) ?? ""
145143
}
146144

147145
/// The keys in this `BSONDocument`.
148-
public var keys: [String] { self.map { key, _ in key } }
146+
public var keys: [String] {
147+
do {
148+
return try BSONDocumentIterator.getKeys(from: self.storage.buffer)
149+
} catch {
150+
fatalError("Failed to retrieve keys for document")
151+
}
152+
}
149153

150154
/// The values in this `BSONDocument`.
151155
public var values: [BSON] { self.map { _, val in val } }
@@ -177,10 +181,11 @@ public struct BSONDocument {
177181
*/
178182
public subscript(key: String) -> BSON? {
179183
get {
180-
for (docKey, value) in self where docKey == key {
181-
return value
184+
do {
185+
return try BSONDocumentIterator.find(key: key, in: self)?.value
186+
} catch {
187+
fatalError("Error looking up key \(key) in document: \(error)")
182188
}
183-
return nil
184189
}
185190
set {
186191
// The only time this would crash is document too big error
@@ -281,9 +286,7 @@ public struct BSONDocument {
281286
return
282287
}
283288

284-
let iter = BSONDocumentIterator(over: self.storage.buffer)
285-
286-
guard let range = iter.findByteRange(for: key) else {
289+
guard let range = try BSONDocumentIterator.findByteRange(for: key, in: self) else {
287290
throw BSONError.InternalError(message: "Cannot find \(key) to delete")
288291
}
289292

@@ -541,10 +544,12 @@ extension BSONDocument: BSONValue {
541544
throw BSONError.InternalError(message: "Cannot read document byte length")
542545
}
543546
buffer.moveReaderIndex(to: reader)
544-
guard let bytes = buffer.readBytes(length: Int(encodedLength)) else {
547+
guard let bytes = buffer.readSlice(length: Int(encodedLength)) else {
545548
throw BSONError.InternalError(message: "Cannot read document contents")
546549
}
547-
return .document(try BSONDocument(fromBSON: Data(bytes)))
550+
551+
let keys = try BSONDocumentIterator.getKeySet(from: bytes)
552+
return .document(BSONDocument(fromUnsafeBSON: BSONDocument.BSONDocumentStorage(bytes), keys: keys))
548553
}
549554

550555
internal func write(to buffer: inout ByteBuffer) {

Sources/SwiftBSON/BSONDocumentIterator.swift

Lines changed: 159 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -36,74 +36,182 @@ public class BSONDocumentIterator: IteratorProtocol {
3636
* - `InternalError` if the underlying buffer contains invalid BSON
3737
*/
3838
internal func nextThrowing() throws -> BSONDocument.KeyValuePair? {
39-
guard self.buffer.readableBytes != 0 else {
40-
// Iteration has been exhausted
41-
guard self.exhausted else {
42-
throw BSONIterationError(
43-
buffer: self.buffer,
44-
message: "There are no readable bytes remaining but a null terminator was not encountered"
45-
)
46-
}
39+
guard let type = try self.readNextType() else {
40+
return nil
41+
}
42+
let key = try self.buffer.readCString()
43+
guard let bson = try BSON.allBSONTypes[type]?.read(from: &self.buffer) else {
44+
throw BSONIterationError(message: "Encountered invalid BSON type: \(type)")
45+
}
46+
return (key: key, value: bson)
47+
}
48+
49+
/// Get the next key in the iterator, if there is one.
50+
/// This method should only be used for iterating through the keys. It advances to the beginning of the next
51+
/// element, meaning the element associated with the last returned key cannot be accessed via this iterator.
52+
private func nextKey() throws -> String? {
53+
guard let type = try self.readNextType() else {
54+
return nil
55+
}
56+
let key = try self.buffer.readCString()
57+
try self.skipNextValue(type: type)
58+
return key
59+
}
60+
61+
/// Assuming the buffer is currently positioned at the start of an element, returns the BSON type for the element.
62+
/// Returns nil if the end of the document has been reached.
63+
/// Throws an error if the byte does not correspond to a BSON type.
64+
internal func readNextType() throws -> BSONType? {
65+
guard !self.exhausted else {
4766
return nil
4867
}
4968

50-
guard let typeByte = self.buffer.readInteger(as: UInt8.self) else {
69+
guard let nextByte = self.buffer.readInteger(endianness: .little, as: UInt8.self) else {
5170
throw BSONIterationError(
5271
buffer: self.buffer,
53-
message: "Cannot read type indicator from bson"
72+
message: "There are no readable bytes remaining, but a null terminator was not encountered"
5473
)
5574
}
5675

57-
guard typeByte != 0 else {
58-
// Iteration exhausted after we've read the null terminator (special case)
76+
guard nextByte != 0 else {
77+
// if we are out of readable bytes, this is the null terminator
5978
guard self.buffer.readableBytes == 0 else {
6079
throw BSONIterationError(
6180
buffer: self.buffer,
62-
message: "Bytes remain after document iteration exhausted"
81+
message: "Encountered invalid type indicator"
6382
)
6483
}
6584
self.exhausted = true
6685
return nil
6786
}
6887

69-
guard let type = BSONType(rawValue: typeByte), type != .invalid else {
88+
guard let bsonType = BSONType(rawValue: nextByte) else {
7089
throw BSONIterationError(
7190
buffer: self.buffer,
72-
typeByte: typeByte,
73-
message: "Invalid type indicator"
91+
message: "Encountered invalid BSON type indicator \(nextByte)"
7492
)
7593
}
7694

77-
let key = try self.buffer.readCString()
78-
guard let bson = try BSON.allBSONTypes[type]?.read(from: &buffer) else {
79-
throw BSONIterationError(
80-
buffer: self.buffer,
81-
key: key,
82-
type: type,
83-
typeByte: typeByte,
84-
message: "Cannot decode type"
85-
)
95+
return bsonType
96+
}
97+
98+
/// Finds an element with the specified key in the document. Returns nil if the key is not found.
99+
internal static func find(key: String, in document: BSONDocument) throws -> BSONDocument.KeyValuePair? {
100+
let iter = document.makeIterator()
101+
while let type = try iter.readNextType() {
102+
let foundKey = try iter.buffer.readCString()
103+
if foundKey == key {
104+
// the map contains a value for every valid BSON type.
105+
// swiftlint:disable:next force_unwrapping
106+
let bson = try BSON.allBSONTypes[type]!.read(from: &iter.buffer)
107+
return (key: key, value: bson)
108+
}
109+
110+
try iter.skipNextValue(type: type)
111+
}
112+
return nil
113+
}
114+
115+
/// Given the type of the encoded value starting at self.buffer.readerIndex, advances the reader index to the index
116+
/// after the end of the element.
117+
internal func skipNextValue(type: BSONType) throws {
118+
switch type {
119+
case .invalid:
120+
throw BSONIterationError(message: "encountered invalid BSON type")
121+
122+
case .undefined, .null, .minKey, .maxKey:
123+
// no data stored, nothing to skip.
124+
return
125+
126+
case .bool:
127+
self.buffer.moveReaderIndex(forwardBy: 1)
128+
129+
case .double, .int64, .timestamp, .datetime:
130+
self.buffer.moveReaderIndex(forwardBy: 8)
131+
132+
case .objectID:
133+
self.buffer.moveReaderIndex(forwardBy: 12)
134+
135+
case .int32:
136+
self.buffer.moveReaderIndex(forwardBy: 4)
137+
138+
case .string, .code, .symbol:
139+
guard let strLength = buffer.readInteger(endianness: .little, as: Int32.self) else {
140+
throw BSONError.InternalError(message: "Failed to read encoded string length")
141+
}
142+
self.buffer.moveReaderIndex(forwardBy: Int(strLength))
143+
144+
case .regex:
145+
_ = try self.buffer.readCString()
146+
_ = try self.buffer.readCString()
147+
148+
case .binary:
149+
guard let dataLength = buffer.readInteger(endianness: .little, as: Int32.self) else {
150+
throw BSONError.InternalError(message: "Failed to read encoded binary data length")
151+
}
152+
self.buffer.moveReaderIndex(forwardBy: Int(dataLength) + 1) // +1 for the binary subtype.
153+
154+
case .document, .array, .codeWithScope:
155+
guard let embeddedDocLength = buffer.readInteger(endianness: .little, as: Int32.self) else {
156+
throw BSONError.InternalError(message: "Failed to read encoded document length")
157+
}
158+
// -4 because the encoded length includes the bytes necessary to store the length itself.
159+
self.buffer.moveReaderIndex(forwardBy: Int(embeddedDocLength) - 4)
160+
161+
case .dbPointer:
162+
// initial string
163+
guard let strLength = buffer.readInteger(endianness: .little, as: Int32.self) else {
164+
throw BSONError.InternalError(message: "Failed to read encoded string length")
165+
}
166+
self.buffer.moveReaderIndex(forwardBy: Int(strLength))
167+
// 12 bytes of data
168+
self.buffer.moveReaderIndex(forwardBy: 12)
169+
170+
case .decimal128:
171+
self.buffer.moveReaderIndex(forwardBy: 16)
86172
}
87-
return (key: key, value: bson)
88173
}
89174

90175
/// Finds the key in the underlying buffer, and returns the [startIndex, endIndex) containing the corresponding
91176
/// element.
92-
internal func findByteRange(for searchKey: String) -> Range<Int>? {
177+
internal static func findByteRange(for searchKey: String, in document: BSONDocument) throws -> Range<Int>? {
178+
let iter = document.makeIterator()
179+
93180
while true {
94-
let startIndex = self.buffer.readerIndex
95-
guard let (key, _) = self.next() else {
96-
// Iteration ended without finding a match
181+
let startIndex = iter.buffer.readerIndex
182+
guard let type = try iter.readNextType() else {
97183
return nil
98184
}
99-
let endIndex = self.buffer.readerIndex
185+
let foundKey = try iter.buffer.readCString()
186+
try iter.skipNextValue(type: type)
100187

101-
if key == searchKey {
188+
if foundKey == searchKey {
189+
let endIndex = iter.buffer.readerIndex
102190
return startIndex..<endIndex
103191
}
104192
}
105193
}
106194

195+
/// Retrieves an ordered list of the keys in the provided document buffer.
196+
internal static func getKeys(from buffer: ByteBuffer) throws -> [String] {
197+
let iter = BSONDocumentIterator(over: buffer)
198+
var keys = [String]()
199+
while let key = try iter.nextKey() {
200+
keys.append(key)
201+
}
202+
return keys
203+
}
204+
205+
/// Retrieves an unordered list of the keys in the provided document buffer.
206+
internal static func getKeySet(from buffer: ByteBuffer) throws -> Set<String> {
207+
let iter = BSONDocumentIterator(over: buffer)
208+
var keySet: Set<String> = []
209+
while let key = try iter.nextKey() {
210+
keySet.insert(key)
211+
}
212+
return keySet
213+
}
214+
107215
// uses an iterator to copy (key, value) pairs of the provided document from range [startIndex, endIndex) into a new
108216
// document. starts at the startIndex-th pair and ends at the end of the document or the (endIndex-1)th index,
109217
// whichever comes first.
@@ -119,35 +227,30 @@ public class BSONDocumentIterator: IteratorProtocol {
119227

120228
let iter = BSONDocumentIterator(over: doc)
121229

122-
var excludedKeys: [String] = []
123-
124-
for _ in 0..<startIndex {
125-
guard let next = iter.next() else {
126-
// we ran out of values
127-
break
230+
do {
231+
for _ in 0..<startIndex {
232+
guard let type = try iter.readNextType() else {
233+
// we ran out of values
234+
break
235+
}
236+
_ = try iter.buffer.readCString()
237+
try iter.skipNextValue(type: type)
128238
}
129-
excludedKeys.append(next.key)
130-
}
131239

132-
// skip the values between startIndex and endIndex. this has better performance than calling next, because
133-
// it doesn't pull the unneeded key/values out of the iterator
134-
for _ in startIndex..<endIndex {
135-
guard (try? iter.nextThrowing()) != nil else {
136-
// we ran out of values
137-
break
138-
}
139-
}
240+
var newDoc = BSONDocument()
140241

141-
while let next = iter.next() {
142-
excludedKeys.append(next.key)
143-
}
242+
for _ in startIndex..<endIndex {
243+
guard let next = try iter.nextThrowing() else {
244+
// we ran out of values
245+
break
246+
}
247+
newDoc[next.key] = next.value
248+
}
144249

145-
guard !excludedKeys.isEmpty else {
146-
return doc
250+
return newDoc
251+
} catch {
252+
fatalError("Failed to retrieve document subsequence: \(error)")
147253
}
148-
149-
let newDoc = doc.filter { key, _ in !excludedKeys.contains(key) }
150-
return newDoc
151254
}
152255
}
153256

0 commit comments

Comments
 (0)