Skip to content

Commit 6123c8b

Browse files
committed
[SPARK-51708] Add CaseInsensitiveDictionary
### What changes were proposed in this pull request? This PR aims to add `CaseInsensitiveDictionary` and use it in the following classes. - `DataFrameReader` - `DataFrameWriter` ### Why are the changes needed? For feature parity. ### Does this PR introduce _any_ user-facing change? No, this is a new addition to the unreleased version. ### How was this patch tested? Pass the CIs. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #40 from dongjoon-hyun/SPARK-51708. Authored-by: Dongjoon Hyun <[email protected]> Signed-off-by: Dongjoon Hyun <[email protected]>
1 parent f073e56 commit 6123c8b

File tree

4 files changed

+154
-6
lines changed

4 files changed

+154
-6
lines changed
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
//
2+
// Licensed to the Apache Software Foundation (ASF) under one
3+
// or more contributor license agreements. See the NOTICE file
4+
// distributed with this work for additional information
5+
// regarding copyright ownership. The ASF licenses this file
6+
// to you under the Apache License, Version 2.0 (the
7+
// "License"); you may not use this file except in compliance
8+
// with the License. You may obtain a copy of the License at
9+
//
10+
// http://www.apache.org/licenses/LICENSE-2.0
11+
//
12+
// Unless required by applicable law or agreed to in writing,
13+
// software distributed under the License is distributed on an
14+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
// KIND, either express or implied. See the License for the
16+
// specific language governing permissions and limitations
17+
// under the License.
18+
//
19+
import Foundation
20+
21+
/// A dictionary in which keys are case insensitive. The input dictionary can be
22+
/// accessed for cases where case-sensitive information is required.
23+
public struct CaseInsensitiveDictionary: Sendable {
24+
public var originalDictionary: [String: Sendable]
25+
private var keyLowerCasedDictionary: [String: Sendable] = [:]
26+
27+
init(_ originalDictionary: [String: Sendable] = [:]) {
28+
self.originalDictionary = originalDictionary
29+
for (key, value) in originalDictionary {
30+
keyLowerCasedDictionary[key.lowercased()] = value
31+
}
32+
}
33+
34+
subscript(key: String) -> Sendable? {
35+
get {
36+
return keyLowerCasedDictionary[key.lowercased()]
37+
}
38+
set {
39+
let lowerKey = key.lowercased()
40+
if let newValue = newValue {
41+
keyLowerCasedDictionary[lowerKey] = newValue
42+
} else {
43+
keyLowerCasedDictionary.removeValue(forKey: lowerKey)
44+
}
45+
originalDictionary = originalDictionary.filter { $0.key.lowercased() != lowerKey }
46+
if let newValue = newValue {
47+
originalDictionary[key] = newValue
48+
}
49+
}
50+
}
51+
52+
public func toDictionary() -> [String: Sendable] {
53+
return originalDictionary
54+
}
55+
56+
public func toStringDictionary() -> [String: String] {
57+
return originalDictionary.mapValues { String(describing: $0) }
58+
}
59+
60+
public var count: Int {
61+
return keyLowerCasedDictionary.count
62+
}
63+
}

Sources/SparkConnect/DataFrameReader.swift

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,7 @@ public actor DataFrameReader: Sendable {
3232

3333
var paths: [String] = []
3434

35-
// TODO: Case-insensitive Map
36-
var extraOptions: [String: String] = [:]
35+
var extraOptions: CaseInsensitiveDictionary = CaseInsensitiveDictionary([:])
3736

3837
let sparkSession: SparkSession
3938

@@ -84,7 +83,7 @@ public actor DataFrameReader: Sendable {
8483
var dataSource = DataSource()
8584
dataSource.format = self.source
8685
dataSource.paths = self.paths
87-
dataSource.options = self.extraOptions
86+
dataSource.options = self.extraOptions.toStringDictionary()
8887

8988
var read = Read()
9089
read.dataSource = dataSource

Sources/SparkConnect/DataFrameWriter.swift

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,7 @@ public actor DataFrameWriter: Sendable {
3232

3333
var saveMode: String = "default"
3434

35-
// TODO: Case-insensitive Map
36-
var extraOptions: [String: String] = [:]
35+
var extraOptions: CaseInsensitiveDictionary = CaseInsensitiveDictionary()
3736

3837
var partitioningColumns: [String]? = nil
3938

@@ -146,7 +145,7 @@ public actor DataFrameWriter: Sendable {
146145
write.bucketBy = bucketBy
147146
}
148147

149-
for option in self.extraOptions {
148+
for option in self.extraOptions.toStringDictionary() {
150149
write.options[option.key] = option.value
151150
}
152151

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
//
2+
// Licensed to the Apache Software Foundation (ASF) under one
3+
// or more contributor license agreements. See the NOTICE file
4+
// distributed with this work for additional information
5+
// regarding copyright ownership. The ASF licenses this file
6+
// to you under the Apache License, Version 2.0 (the
7+
// "License"); you may not use this file except in compliance
8+
// with the License. You may obtain a copy of the License at
9+
//
10+
// http://www.apache.org/licenses/LICENSE-2.0
11+
//
12+
// Unless required by applicable law or agreed to in writing,
13+
// software distributed under the License is distributed on an
14+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
// KIND, either express or implied. See the License for the
16+
// specific language governing permissions and limitations
17+
// under the License.
18+
//
19+
20+
import Foundation
21+
import Testing
22+
23+
@testable import SparkConnect
24+
25+
/// A test suite for `CaseInsensitiveDictionary`
26+
struct CaseInsensitiveDictionaryTests {
27+
@Test
28+
func empty() async throws {
29+
let dict = CaseInsensitiveDictionary([:])
30+
#expect(dict.count == 0)
31+
}
32+
33+
@Test
34+
func originalDictionary() async throws {
35+
let dict = CaseInsensitiveDictionary([
36+
"key1": "value1",
37+
"KEY1": "VALUE1",
38+
])
39+
#expect(dict.count == 1)
40+
#expect(dict.originalDictionary.count == 2)
41+
}
42+
43+
@Test
44+
func toDictionary() async throws {
45+
let dict = CaseInsensitiveDictionary([
46+
"key1": "value1",
47+
"KEY1": "VALUE1",
48+
])
49+
#expect(dict.toDictionary().count == 2)
50+
}
51+
52+
@Test
53+
func `subscript`() async throws {
54+
var dict = CaseInsensitiveDictionary([:])
55+
#expect(dict.count == 0)
56+
57+
dict["KEY1"] = "value1"
58+
#expect(dict.count == 1)
59+
#expect(dict["key1"] as! String == "value1")
60+
#expect(dict["KEY1"] as! String == "value1")
61+
#expect(dict["KeY1"] as! String == "value1")
62+
63+
dict["key2"] = false
64+
#expect(dict.count == 2)
65+
#expect(dict["kEy2"] as! Bool == false)
66+
67+
dict["key3"] = 2025
68+
#expect(dict.count == 3)
69+
#expect(dict["key3"] as! Int == 2025)
70+
}
71+
72+
@Test
73+
func updatedOriginalDictionary() async throws {
74+
var dict = CaseInsensitiveDictionary([
75+
"key1": "value1",
76+
"KEY1": "VALUE1",
77+
])
78+
#expect(dict.count == 1)
79+
#expect(dict.originalDictionary.count == 2)
80+
81+
dict["KEY1"] = "Swift"
82+
#expect(dict["KEY1"] as! String == "Swift")
83+
#expect(dict.count == 1)
84+
#expect(dict.originalDictionary.count == 1)
85+
#expect(dict.toDictionary().count == 1)
86+
}
87+
}

0 commit comments

Comments
 (0)