Skip to content

Commit b90002f

Browse files
committed
[SPARK-52373] Add CRC32 struct
### What changes were proposed in this pull request? This PR aims to add `CRC32` struct. ### Why are the changes needed? Apache Spark Connect requires `CRC32` checksum to implement APIs like `addArtifacts`. ### Does this PR introduce _any_ user-facing change? No behavior change. ### How was this patch tested? Pass the CIs. ### Was this patch authored or co-authored using generative AI tooling? Generated-by: `Gemini 2.5 Flash`. Ask to write ASF-licensed CRC32 Swift code. - `CRC32.swift` I verified it manually and wrote test suite, `CRC32Tests.swift`. Closes #191 from dongjoon-hyun/SPARK-52373. Authored-by: Dongjoon Hyun <[email protected]> Signed-off-by: Dongjoon Hyun <[email protected]>
1 parent f32fa79 commit b90002f

File tree

2 files changed

+128
-0
lines changed

2 files changed

+128
-0
lines changed

Sources/SparkConnect/CRC32.swift

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
//
2+
// Licensed to the Apache Software Foundation (ASF) under one
3+
// or more contributor license agreements. See the NOTICE file
4+
// distributed with this work for additional information
5+
// regarding copyright ownership. The ASF licenses this file
6+
// to you under the Apache License, Version 2.0 (the
7+
// "License"); you may not use this file except in compliance
8+
// with the License. You may obtain a copy of the License at
9+
//
10+
// http://www.apache.org/licenses/LICENSE-2.0
11+
//
12+
// Unless required by applicable law or agreed to in writing,
13+
// software distributed under the License is distributed on an
14+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
// KIND, either express or implied. See the License for the
16+
// specific language governing permissions and limitations
17+
// under the License.
18+
19+
import Foundation
20+
21+
public struct CRC32 {
22+
23+
/// Pre-computed CRC32 table
24+
private static let crcTable: [UInt32] = {
25+
var table = [UInt32](repeating: 0, count: 256)
26+
let polynomial: UInt32 = 0xEDB8_8320 // IEEE 802.3 polynomial
27+
28+
for i in 0..<256 {
29+
var c = UInt32(i)
30+
for _ in 0..<8 {
31+
if (c & 1) == 1 {
32+
c = polynomial ^ (c >> 1)
33+
} else {
34+
c = c >> 1
35+
}
36+
}
37+
table[i] = c
38+
}
39+
return table
40+
}()
41+
42+
/// Calculates the CRC32 checksum for the given Data.
43+
///
44+
/// - Parameter data: The Data object for which to calculate the checksum.
45+
/// - Returns: The calculated CRC32 checksum as a UInt32.
46+
public static func checksum(data: Data) -> UInt32 {
47+
var crc: UInt32 = 0xFFFF_FFFF
48+
49+
data.withUnsafeBytes { (pointer: UnsafeRawBufferPointer) in
50+
for byte in pointer.bindMemory(to: UInt8.self) {
51+
crc = (crc >> 8) ^ crcTable[Int((crc ^ UInt32(byte)) & 0xFF)]
52+
}
53+
}
54+
return ~crc
55+
}
56+
57+
/// Calculates the CRC32 checksum for the given String.
58+
///
59+
/// - Parameter string: The String object for which to calculate the checksum.
60+
/// - Parameter encoding: The encoding to use when converting the string to Data (defaults to .utf8).
61+
/// - Returns: The calculated CRC32 checksum as a UInt32. Returns nil if the string cannot be converted to Data.
62+
public static func checksum(string: String, encoding: String.Encoding = .utf8) -> UInt32? {
63+
guard let data = string.data(using: encoding) else {
64+
return nil
65+
}
66+
return checksum(data: data)
67+
}
68+
69+
/// Calculates the CRC32 checksum for the given array of bytes.
70+
///
71+
/// - Parameter bytes: The [UInt8] array for which to calculate the checksum.
72+
/// - Returns: The calculated CRC32 checksum as a UInt32.
73+
public static func checksum(bytes: [UInt8]) -> UInt32 {
74+
var crc: UInt32 = 0xFFFF_FFFF
75+
76+
for byte in bytes {
77+
crc = (crc >> 8) ^ crcTable[Int((crc ^ UInt32(byte)) & 0xFF)]
78+
}
79+
return ~crc
80+
}
81+
}
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
//
2+
// Licensed to the Apache Software Foundation (ASF) under one
3+
// or more contributor license agreements. See the NOTICE file
4+
// distributed with this work for additional information
5+
// regarding copyright ownership. The ASF licenses this file
6+
// to you under the Apache License, Version 2.0 (the
7+
// "License"); you may not use this file except in compliance
8+
// with the License. You may obtain a copy of the License at
9+
//
10+
// http://www.apache.org/licenses/LICENSE-2.0
11+
//
12+
// Unless required by applicable law or agreed to in writing,
13+
// software distributed under the License is distributed on an
14+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
// KIND, either express or implied. See the License for the
16+
// specific language governing permissions and limitations
17+
// under the License.
18+
//
19+
20+
import Foundation
21+
import SparkConnect
22+
import Testing
23+
24+
struct CRC32Tests {
25+
@Test
26+
func testChecksumWithEmptyData() async throws {
27+
#expect(CRC32.checksum(data: Data()) == 0)
28+
#expect(CRC32.checksum(string: "") == 0)
29+
#expect(CRC32.checksum(bytes: []) == 0)
30+
}
31+
32+
@Test
33+
func testChecksum() async throws {
34+
let str = "Apache Spark Connect Client for Swift"
35+
#expect(CRC32.checksum(string: str, encoding: .ascii) == 2_736_908_745)
36+
#expect(CRC32.checksum(data: str.data(using: .ascii)!) == 2_736_908_745)
37+
#expect(CRC32.checksum(bytes: [UInt8](str.data(using: .ascii)!)) == 2_736_908_745)
38+
}
39+
40+
@Test
41+
func testLongChecksum() async throws {
42+
let str = String(repeating: "Apache Spark Connect Client for Swift", count: 1000)
43+
#expect(CRC32.checksum(string: str, encoding: .ascii) == 1_985_943_888)
44+
#expect(CRC32.checksum(data: str.data(using: .ascii)!) == 1_985_943_888)
45+
#expect(CRC32.checksum(bytes: [UInt8](str.data(using: .ascii)!)) == 1_985_943_888)
46+
}
47+
}

0 commit comments

Comments
 (0)