Skip to content

Commit 949f874

Browse files
Merge pull request #13 from willtemperley/main
Add test to ensure record batch schema nullability and data match.
2 parents 31dcb7c + 2799399 commit 949f874

File tree

6 files changed

+58
-10
lines changed

6 files changed

+58
-10
lines changed

Sources/Arrow/ArrowArrayBuilder.swift

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ import Foundation
1919

2020
/// A type which builds a type-erased `ArrowArray`.
2121
public protocol AnyArrowArrayBuilder {
22+
/// Returns an unparameterised `ArrowArray`.
23+
/// - Returns: The type-erased Arrow array.
2224
func toAnyArrowArray() throws(ArrowError) -> AnyArrowArray
2325
func appendAny(_ val: Any?)
2426
}
@@ -73,8 +75,6 @@ extension ArrowArrayBuilderInternal {
7375
self.arrowType.getStride()
7476
}
7577

76-
/// Returns an unparameterised `ArrowArray`.
77-
/// - Returns: The type-erased Arrow array.
7878
public func toAnyArrowArray() throws(ArrowError) -> AnyArrowArray {
7979
try self.finish()
8080
}
@@ -109,7 +109,7 @@ public class ArrowArrayBuilderBase<
109109
}
110110
}
111111

112-
/// A type which builds an `ArrowArray` with a numeric `ItemType`.
112+
/// An array builder for numeric types.
113113
public class NumberArrayBuilder<ItemType>: ArrowArrayBuilderBase<
114114
FixedBufferBuilder<ItemType>,
115115
FixedArray<ItemType>
@@ -120,6 +120,7 @@ where ItemType: Numeric, ItemType: BitwiseCopyable {
120120
}
121121
}
122122

123+
/// A `String` array builder.
123124
public class StringArrayBuilder: ArrowArrayBuilderBase<
124125
VariableBufferBuilder<String>,
125126
StringArray
@@ -130,6 +131,7 @@ public class StringArrayBuilder: ArrowArrayBuilderBase<
130131
}
131132
}
132133

134+
/// A `Data` array builder.
133135
public class BinaryArrayBuilder: ArrowArrayBuilderBase<
134136
VariableBufferBuilder<Data>,
135137
BinaryArray
@@ -140,6 +142,7 @@ public class BinaryArrayBuilder: ArrowArrayBuilderBase<
140142
}
141143
}
142144

145+
/// A `Bool` array builder.
143146
public class BoolArrayBuilder: ArrowArrayBuilderBase<
144147
BoolBufferBuilder, BoolArray
145148
>
@@ -149,6 +152,7 @@ public class BoolArrayBuilder: ArrowArrayBuilderBase<
149152
}
150153
}
151154

155+
/// A 32-bit date array builder.
152156
public class Date32ArrayBuilder: ArrowArrayBuilderBase<
153157
Date32BufferBuilder,
154158
Date32Array
@@ -159,6 +163,7 @@ public class Date32ArrayBuilder: ArrowArrayBuilderBase<
159163
}
160164
}
161165

166+
/// A 64-bit date array builder.
162167
public class Date64ArrayBuilder: ArrowArrayBuilderBase<
163168
Date64BufferBuilder,
164169
Date64Array
@@ -169,6 +174,7 @@ public class Date64ArrayBuilder: ArrowArrayBuilderBase<
169174
}
170175
}
171176

177+
// A 32-bit elaspsed time builder.
172178
public class Time32ArrayBuilder: ArrowArrayBuilderBase<
173179
FixedBufferBuilder<Time32>,
174180
Time32Array
@@ -179,6 +185,7 @@ public class Time32ArrayBuilder: ArrowArrayBuilderBase<
179185
}
180186
}
181187

188+
// A 64-bit elaspsed time builder.
182189
public class Time64ArrayBuilder: ArrowArrayBuilderBase<
183190
FixedBufferBuilder<Time64>,
184191
Time64Array
@@ -189,6 +196,7 @@ public class Time64ArrayBuilder: ArrowArrayBuilderBase<
189196
}
190197
}
191198

199+
// A Timestamp array builder.
192200
public class TimestampArrayBuilder: ArrowArrayBuilderBase<
193201
FixedBufferBuilder<Int64>,
194202
TimestampArray
@@ -203,6 +211,7 @@ public class TimestampArrayBuilder: ArrowArrayBuilderBase<
203211

204212
// MARK: Struct array builder.
205213

214+
/// Builds an array of structs.
206215
public class StructArrayBuilder: ArrowArrayBuilderBase<
207216
StructBufferBuilder,
208217
NestedArray
@@ -262,7 +271,9 @@ public class StructArrayBuilder: ArrowArrayBuilderBase<
262271

263272
// MARK: List array builder.
264273

265-
/// A type which can build an `NestedArray`containing exactly `ItemType`.
274+
/// Builds a `NestedArray`containing lists of `ItemType`.
275+
///
276+
/// Both lists and items in lists are nullablie.
266277
public class ListArrayBuilder: ArrowArrayBuilderBase<
267278
ListBufferBuilder,
268279
NestedArray
@@ -271,18 +282,17 @@ public class ListArrayBuilder: ArrowArrayBuilderBase<
271282
let valueBuilder: any AnyArrowArrayBuilder
272283

273284
public override init(_ elementType: ArrowType) throws(ArrowError) {
274-
275285
guard case .list(let field) = elementType else {
276286
throw .invalid("Expected a field with type .list")
277287
}
278-
279288
self.valueBuilder = try ArrowArrayBuilders.loadBuilder(
280289
arrowType: field.type
281290
)
282291
try super.init(elementType)
283292
}
284293

285-
// Overrides the default
294+
// Overrides the protocol extension.
295+
// Swift currently provides no marker for this.
286296
public func append(_ values: [Any?]?) {
287297
self.bufferBuilder.append(values)
288298
if let vals = values {

Sources/Arrow/ArrowBuffer.swift

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
// Copyright 2025 The Apache Software Foundation
2-
// Copyright 2025 The Columnar-Swift Contributors
2+
// Copyright 2025 The Columnar Swift Contributors
33
//
44
// Licensed under the Apache License, Version 2.0 (the "License");
55
// you may not use this file except in compliance with the License.
@@ -24,7 +24,8 @@ public class ArrowBuffer {
2424
let isMemoryOwner: Bool
2525

2626
init(
27-
length: UInt, capacity: UInt, rawPointer: UnsafeMutableRawPointer,
27+
length: UInt, capacity: UInt,
28+
rawPointer: UnsafeMutableRawPointer,
2829
isMemoryOwner: Bool = true
2930
) {
3031
self.length = length

Sources/Arrow/ArrowSchema.swift

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@
1414

1515
import Foundation
1616

17-
public struct ArrowSchema: Sendable {
17+
// Note this is a reference type to reduce copying.
18+
public final class ArrowSchema: Sendable {
1819
public let fields: [ArrowField]
1920
public let fieldLookup: [String: Int]
2021
init(_ fields: [ArrowField]) {

Sources/Arrow/ArrowTable.swift

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
// Copyright 2025 The Apache Software Foundation
2+
// Copyright 2025 The Columnar Swift Contributors
23
//
34
// Licensed under the Apache License, Version 2.0 (the "License");
45
// you may not use this file except in compliance with the License.
@@ -244,6 +245,12 @@ public class RecordBatch {
244245
return self
245246
}
246247

248+
/// Add a column the `RecordBatch` builder.
249+
/// - Parameters:
250+
/// - field: The field describing the array.
251+
/// - arrowArray: The array to add to the reocrd batch.
252+
/// - Returns: The `RecordBatch.Builder` with the array appended and the field added to
253+
/// the schema.
247254
@discardableResult
248255
public func addColumn(
249256
_ field: ArrowField,
@@ -263,6 +270,17 @@ public class RecordBatch {
263270
}
264271
}
265272
}
273+
// Check nullability matches actual data
274+
let schema = self.schemaBuilder.finish()
275+
for (index, field) in schema.fields.enumerated() {
276+
let column = columns[index]
277+
if !field.isNullable && column.nullCount > 0 {
278+
return .failure(
279+
.invalid(
280+
"non-nullable column '\(field.name)' contains \(column.nullCount) null values."
281+
))
282+
}
283+
}
266284
return .success(
267285
RecordBatch(self.schemaBuilder.finish(), columns: self.columns)
268286
)

Sources/Arrow/ArrowType.swift

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,7 @@ public indirect enum ArrowType: Codable, Sendable, Equatable {
210210
/// of binary data in total.
211211
case binary
212212
/// Opaque binary data of fixed size.
213+
///
213214
/// Enum parameter specifies the number of bytes per value.
214215
case fixedSizeBinary(Int32)
215216
/// Opaque binary data of variable length and 64-bit offsets.

Tests/ArrowTests/RecordBatchTests.swift

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,4 +54,21 @@ struct RecordBatchTests {
5454
throw error
5555
}
5656
}
57+
58+
// Ensure that invalid record batches can't be built.
59+
@Test func schemaNullabilityChecked() throws {
60+
let stringBuilder = try ArrowArrayBuilders.loadStringArrayBuilder()
61+
stringBuilder.append("test10")
62+
stringBuilder.append(nil)
63+
stringBuilder.append("test33")
64+
let array = try stringBuilder.finish()
65+
66+
let field = ArrowField(name: "col1", dataType: .utf8, isNullable: false)
67+
let result = RecordBatch.Builder()
68+
.addColumn(field, arrowArray: array)
69+
.finish()
70+
if case .success(_) = result {
71+
Issue.record("Record batch should have rejected null data.")
72+
}
73+
}
5774
}

0 commit comments

Comments
 (0)