Skip to content

Commit 14aeed7

Browse files
committed
Data sources menu fixed..
1 parent ae63723 commit 14aeed7

File tree

6 files changed

+78
-29
lines changed

6 files changed

+78
-29
lines changed

Package.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ import PackageDescription
2020
let package = Package(
2121
name: "Arrow",
2222
platforms: [
23-
.macOS(.v26)
23+
.macOS(.v26), .iOS(.v26), .watchOS(.v26), .tvOS(.v26), .visionOS(.v26),
2424
],
2525
products: [
2626
.library(name: "Arrow", targets: ["Arrow"]),

Sources/Arrow/ArrowField.swift

Lines changed: 15 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@ public struct ArrowField: Codable, Sendable {
3131
///
3232
/// If true, the field *may* contain null values.
3333
public var isNullable: Bool
34-
public let orderedDict: Bool
3534
/// A map of key-value pairs containing additional custom meta data.
3635
public var metadata: [String: String]
3736
}
@@ -60,7 +59,6 @@ extension ArrowField {
6059
self.name = name
6160
self.type = dataType
6261
self.isNullable = isNullable
63-
self.orderedDict = false
6462
self.metadata = metadata
6563
}
6664

@@ -76,21 +74,20 @@ extension ArrowField {
7674
)
7775
}
7876

79-
/// Create a new `ArrowField` suitable for `ArrowType::Dictionary`.
80-
///
81-
public init(
82-
dictWithName: String,
83-
key: ArrowType,
84-
value: ArrowType,
85-
isNullable: Bool
86-
) {
87-
precondition(
88-
key.isDictionaryKeyType,
89-
"\(key) is not a valid dictionary key"
90-
)
91-
let dataType: ArrowType = .dictionary(key, value)
92-
self = Self(name: dictWithName, dataType: dataType, isNullable: isNullable)
93-
}
77+
// /// Create a new `ArrowField` suitable for `ArrowType::Dictionary`.
78+
// public init(
79+
// dictWithName: String,
80+
// key: ArrowType,
81+
// value: ArrowType,
82+
// isNullable: Bool
83+
// ) {
84+
// precondition(
85+
// key.isDictionaryKeyType,
86+
// "\(key) is not a valid dictionary key"
87+
// )
88+
// let dataType: ArrowType = .dictionary(key, value)
89+
// self = Self(name: dictWithName, dataType: dataType, isNullable: isNullable)
90+
// }
9491

9592
/// Create a new struct `ArrowField`.
9693
///
@@ -218,7 +215,7 @@ extension ArrowField {
218215
@inlinable
219216
public var dictIsOrdered: Bool {
220217
switch self.type {
221-
case .dictionary: return self.orderedDict
218+
case .dictionary(_, let isOrdered, _, _): return isOrdered
222219
default: return false
223220
}
224221
}

Sources/Arrow/ArrowType.swift

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -293,7 +293,7 @@ public indirect enum ArrowType: Codable, Sendable, Equatable {
293293
///
294294
/// This type mostly used to represent low cardinality string
295295
/// arrays or a limited set of primitive types as integers.
296-
case dictionary(ArrowType, ArrowType)
296+
case dictionary(id: Int64, isOrdered: Bool, key: ArrowType, value: ArrowType)
297297
/// Exact 32-bit width decimal value with precision and scale
298298
///
299299
/// * precision is the total number of digits
@@ -496,8 +496,8 @@ extension ArrowType: CustomStringConvertible {
496496
return "LargeListView(\(elementType))"
497497
case .union(let mode, let fields):
498498
return "Union(\(mode), \(fields) fields)"
499-
case .dictionary(let keyType, let valueType):
500-
return "Dictionary(\(keyType), \(valueType))"
499+
case .dictionary(let id, let isOrdered, let keyType, let valueType):
500+
return "Dictionary(\(id), \(isOrdered), \(keyType), \(valueType))"
501501
case .decimal32(let precision, let scale):
502502
return "Decimal32(\(precision), \(scale))"
503503
case .decimal64(let precision, let scale):
@@ -627,7 +627,7 @@ extension ArrowType {
627627
@inlinable
628628
public var isNested: Bool {
629629
switch self {
630-
case .dictionary(_, let v):
630+
case .dictionary(_, _, _, let v):
631631
return v.isNested
632632
case .runEndEncoded(_, let v):
633633
return v.type.isNested
@@ -699,7 +699,8 @@ extension ArrowType {
699699
&& aField.type.equalsDataType(bField.type) && aSorted == bSorted
700700

701701
// Dictionary
702-
case (.dictionary(let aKey, let aValue), .dictionary(let bKey, let bValue)):
702+
case (.dictionary(_, _, let aKey, let aValue), .dictionary(_, _, let bKey, let bValue)):
703+
// Ignoring dictionary id here.
703704
return aKey.equalsDataType(bKey) && aValue.equalsDataType(bValue)
704705

705706
// RunEndEncoded
@@ -855,7 +856,7 @@ extension ArrowType {
855856
}
856857

857858
// Dictionary
858-
case (.dictionary(let k1, let v1), .dictionary(let k2, let v2)):
859+
case (.dictionary(_, _, let k1, let v1), .dictionary(_, _, let k2, let v2)):
859860
return k1.contains(k2) && v1.contains(v2)
860861

861862
// Base case: equality

Sources/ArrowIPC/ArrowReader.swift

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,51 @@ public struct ArrowReader {
9090
let arrowSchema = try Self.loadSchema(schema: schema)
9191
var recordBatches: [RecordBatch] = []
9292

93+
for block in footer.dictionaries {
94+
let (message, bodyOffset) = try data.withParserSpan { input in
95+
try input.seek(toAbsoluteOffset: block.offset)
96+
let marker = try UInt32(parsingLittleEndian: &input)
97+
if marker != continuationMarker {
98+
throw ArrowError(.invalid("Missing continuation marker."))
99+
}
100+
let messageLength = try UInt32(parsingLittleEndian: &input)
101+
let data = try [UInt8](parsing: &input, byteCount: Int(messageLength))
102+
var mbb = ByteBuffer(data: Data(data))
103+
let message: FMessage = getRoot(byteBuffer: &mbb)
104+
let offset = Int64(input.startPosition)
105+
return (message, offset)
106+
}
107+
108+
guard message.headerType == .dictionarybatch else {
109+
throw ArrowError(.invalid("Expected DictionaryBatch message."))
110+
}
111+
112+
guard let dictMessage = message.header(type: FDictionaryBatch.self) else {
113+
throw ArrowError(.invalid("Expected DictionaryBatch as message header"))
114+
}
115+
116+
// 1. Get the Dictionary ID and 'isDelta' flag
117+
let dictId = dictMessage.id
118+
let isDelta = dictMessage.isDelta
119+
120+
// 2. The dictionary data is actually just a RecordBatch with ONE column
121+
// The schema for this internal batch is defined by the dictionary type
122+
// found in the global Schema for this specific ID.
123+
guard let rbMessage = dictMessage.data else {
124+
throw ArrowError(.invalid("DictionaryBatch has no data"))
125+
}
126+
127+
let dictBatch = try Self.loadRecordBatch(
128+
data: self.data,
129+
arrowSchema: arrowSchema,
130+
rbMessage: rbMessage,
131+
offset: bodyOffset
132+
)
133+
134+
// 4. Update the "Box" in your provider
135+
// try dictionaryProvider.update(id: dictId, array: dictionaryArray, isDelta: isDelta)
136+
}
137+
93138
// MARK: Record batch parsing
94139
for block in footer.recordBatches {
95140

Sources/ArrowIPC/FlatBuffersTypeAliases.swift

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,16 +13,22 @@
1313
// limitations under the License.
1414

1515
typealias FFooter = org_apache_arrow_flatbuf_Footer
16+
17+
typealias FMessageHeader = org_apache_arrow_flatbuf_MessageHeader
1618
typealias FMessage = org_apache_arrow_flatbuf_Message
19+
1720
typealias FBlock = org_apache_arrow_flatbuf_Block
1821
typealias FField = org_apache_arrow_flatbuf_Field
1922
typealias FSchema = org_apache_arrow_flatbuf_Schema
2023
typealias FBuffer = org_apache_arrow_flatbuf_Buffer
2124
typealias FFieldNode = org_apache_arrow_flatbuf_FieldNode
22-
typealias FRecordBatch = org_apache_arrow_flatbuf_RecordBatch
23-
typealias FMessageHeader = org_apache_arrow_flatbuf_MessageHeader
2425
typealias FKeyValue = org_apache_arrow_flatbuf_KeyValue
2526

27+
// MARK: Record batches.
28+
typealias FRecordBatch = org_apache_arrow_flatbuf_RecordBatch
29+
typealias FDictionaryBatch = org_apache_arrow_flatbuf_DictionaryBatch
30+
typealias FDictionaryEncoding = org_apache_arrow_flatbuf_DictionaryEncoding
31+
2632
// MARK: Top level type.
2733
typealias FType = org_apache_arrow_flatbuf_Type_
2834

Tests/ArrowIPCTests/ArrowTestingGold.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,10 +82,10 @@ struct ArrowTestingGold {
8282
"generated_union",
8383
]
8484

85-
@Test
85+
// @Test
8686
func readFile() throws {
8787

88-
let name = "generated_nested_large_offsets"
88+
let name = "generated_dictionary"
8989
let (testFile, testCase) = try loadTestCase(
9090
name: name, fileExtension: "arrow_file")
9191
let arrowReader = try ArrowReader(url: testFile)

0 commit comments

Comments
 (0)