Skip to content

Commit 69a4ac4

Browse files
committed
[SPARK-51825] Add SparkFileUtils
### What changes were proposed in this pull request? This PR aims to add `SparkFileUtils` like - https://github.com/apache/spark/blob/master/common/utils/src/main/scala/org/apache/spark/util/SparkFileUtils.scala ### Why are the changes needed? This is required to add more features like `addArtifact`. ### Does this PR introduce _any_ user-facing change? No, this is a new addition. ### How was this patch tested? Pass the CIs. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #64 from dongjoon-hyun/SPARK-51825. Authored-by: Dongjoon Hyun <[email protected]> Signed-off-by: Dongjoon Hyun <[email protected]>
1 parent 119eeea commit 69a4ac4

File tree

2 files changed

+180
-0
lines changed

2 files changed

+180
-0
lines changed
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
//
2+
// Licensed to the Apache Software Foundation (ASF) under one
3+
// or more contributor license agreements. See the NOTICE file
4+
// distributed with this work for additional information
5+
// regarding copyright ownership. The ASF licenses this file
6+
// to you under the Apache License, Version 2.0 (the
7+
// "License"); you may not use this file except in compliance
8+
// with the License. You may obtain a copy of the License at
9+
//
10+
// http://www.apache.org/licenses/LICENSE-2.0
11+
//
12+
// Unless required by applicable law or agreed to in writing,
13+
// software distributed under the License is distributed on an
14+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
// KIND, either express or implied. See the License for the
16+
// specific language governing permissions and limitations
17+
// under the License.
18+
//
19+
import Foundation
20+
21+
/// Utility functions like `org.apache.spark.util.SparkFileUtils`.
22+
public enum SparkFileUtils {
23+
24+
/// Return a well-formed URL for the file described by a user input string.
25+
///
26+
/// If the supplied path does not contain a scheme, or is a relative path, it will be
27+
/// converted into an absolute path with a file:// scheme.
28+
///
29+
/// - Parameter path: A path string.
30+
/// - Returns: An URL
31+
static func resolveURL(_ path: String) -> URL? {
32+
if let url = URL(string: path) {
33+
if url.scheme != nil {
34+
return url.absoluteURL
35+
}
36+
37+
// make sure to handle if the path has a fragment (applies to yarn
38+
// distributed cache)
39+
if let fragment = url.fragment {
40+
var components = URLComponents()
41+
components.scheme = "file"
42+
components.path = (path as NSString).expandingTildeInPath
43+
components.fragment = fragment
44+
return components.url?.absoluteURL
45+
}
46+
}
47+
return URL(fileURLWithPath: (path as NSString).expandingTildeInPath).absoluteURL
48+
}
49+
50+
/// Lists files recursively.
51+
/// - Parameter directory: <#directory description#>
52+
/// - Returns: <#description#>
53+
static func recursiveList(directory: URL) -> [URL] {
54+
let fileManager = FileManager.default
55+
var results: [URL] = []
56+
if let enumerator = fileManager.enumerator(at: directory, includingPropertiesForKeys: nil) {
57+
for case let fileURL as URL in enumerator {
58+
results.append(fileURL)
59+
}
60+
}
61+
return results
62+
}
63+
64+
/// Create a directory given the abstract pathname
65+
/// - Parameter url: An URL location.
66+
/// - Returns: Return true if the directory is successfully created; otherwise, return false.
67+
static func createDirectory(at url: URL) -> Bool {
68+
let fileManager = FileManager.default
69+
do {
70+
try fileManager.createDirectory(at: url, withIntermediateDirectories: true)
71+
var isDir: ObjCBool = false
72+
let exists = fileManager.fileExists(atPath: url.path, isDirectory: &isDir)
73+
return exists && isDir.boolValue
74+
} catch {
75+
print("Failed to create directory: \(url.path), error: \(error)")
76+
return false
77+
}
78+
}
79+
80+
/// Create a temporary directory inside the given parent directory.
81+
/// - Parameters:
82+
/// - root: A parent directory.
83+
/// - namePrefix: A prefix for a new directory name.
84+
/// - Returns: An URL for the created directory
85+
static func createDirectory(root: String, namePrefix: String = "spark") -> URL {
86+
let tempDir = URL(fileURLWithPath: root).appendingPathComponent(
87+
"\(namePrefix)-\(UUID().uuidString)")
88+
_ = createDirectory(at: tempDir)
89+
return tempDir
90+
}
91+
92+
/// Create a new temporary directory prefixed with `spark` inside ``NSTemporaryDirectory``.
93+
/// - Returns: An URL for the created directory
94+
static func createTempDir() -> URL {
95+
let dir = createDirectory(root: NSTemporaryDirectory(), namePrefix: "spark")
96+
97+
return dir
98+
}
99+
100+
/// Delete a file or directory and its contents recursively.
101+
/// Throws an exception if deletion is unsuccessful.
102+
/// - Parameter url: An URL location.
103+
static func deleteRecursively(_ url: URL) throws {
104+
let fileManager = FileManager.default
105+
if fileManager.fileExists(atPath: url.path) {
106+
try fileManager.removeItem(at: url)
107+
} else {
108+
throw SparkConnectError.InvalidArgumentException
109+
}
110+
}
111+
}
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
//
2+
// Licensed to the Apache Software Foundation (ASF) under one
3+
// or more contributor license agreements. See the NOTICE file
4+
// distributed with this work for additional information
5+
// regarding copyright ownership. The ASF licenses this file
6+
// to you under the Apache License, Version 2.0 (the
7+
// "License"); you may not use this file except in compliance
8+
// with the License. You may obtain a copy of the License at
9+
//
10+
// http://www.apache.org/licenses/LICENSE-2.0
11+
//
12+
// Unless required by applicable law or agreed to in writing,
13+
// software distributed under the License is distributed on an
14+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
// KIND, either express or implied. See the License for the
16+
// specific language governing permissions and limitations
17+
// under the License.
18+
//
19+
20+
import Foundation
21+
import Testing
22+
23+
@testable import SparkConnect
24+
25+
/// A test suite for `SparkFileUtils`
26+
struct SparkFileUtilsTests {
27+
let fm = FileManager.default
28+
29+
@Test
30+
func resolveURI() async throws {
31+
let fileNameURL = SparkFileUtils.resolveURL("jar1")
32+
#expect(fileNameURL!.absoluteString == "file://\(fm.currentDirectoryPath)/jar1")
33+
34+
let homeUrl = SparkFileUtils.resolveURL("~/jar1")
35+
#expect(homeUrl!.absoluteString == "\(fm.homeDirectoryForCurrentUser.absoluteString)jar1")
36+
37+
let absolutePath = SparkFileUtils.resolveURL("file:/jar1")
38+
#expect(absolutePath!.absoluteString == "file:/jar1")
39+
40+
let hdfsPath = SparkFileUtils.resolveURL("hdfs:/root/spark.jar")
41+
#expect(hdfsPath!.absoluteString == "hdfs:/root/spark.jar")
42+
43+
let s3aPath = SparkFileUtils.resolveURL("s3a:/bucket/spark.jar")
44+
#expect(s3aPath!.absoluteString == "s3a:/bucket/spark.jar")
45+
}
46+
47+
@Test
48+
func directory() async throws {
49+
// This tests three functions.
50+
// createTempDir -> createDirectory(root: String, namePrefix: String = "spark")
51+
// -> createDirectory(at: URL)
52+
let dir = SparkFileUtils.createTempDir()
53+
54+
var isDir: ObjCBool = false
55+
let exists = fm.fileExists(atPath: dir.path(), isDirectory: &isDir)
56+
#expect(exists && isDir.boolValue)
57+
58+
#expect(SparkFileUtils.recursiveList(directory: dir).isEmpty)
59+
60+
let emptyData = Data()
61+
try emptyData.write(to: URL(string: dir.absoluteString + "/1")!)
62+
63+
#expect(SparkFileUtils.recursiveList(directory: dir).count == 1)
64+
65+
try SparkFileUtils.deleteRecursively(dir)
66+
67+
#expect(!fm.fileExists(atPath: dir.path(), isDirectory: &isDir))
68+
}
69+
}

0 commit comments

Comments
 (0)