Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions Sources/SparkConnect/Extension.swift
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,18 @@ extension String {
default: .UNRECOGNIZED(-1)
}
}

var toDatasetType: DatasetType {
let mode =
switch self {
case "unspecified": DatasetType.unspecified
case "materializedView": DatasetType.materializedView
case "table": DatasetType.table
case "temporaryView": DatasetType.temporaryView
default: DatasetType.UNRECOGNIZED(-1)
}
return mode
}
}

extension [String: String] {
Expand Down
35 changes: 35 additions & 0 deletions Sources/SparkConnect/SparkConnectClient.swift
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,8 @@ public actor SparkConnectClient {
throw SparkConnectError.InvalidViewName
case let m where m.contains("DATA_SOURCE_NOT_FOUND"):
throw SparkConnectError.DataSourceNotFound
case let m where m.contains("DATASET_TYPE_UNSPECIFIED"):
throw SparkConnectError.DatasetTypeUnspecified
default:
throw error
}
Expand Down Expand Up @@ -1237,6 +1239,39 @@ public actor SparkConnectClient {
}
}

@discardableResult
func defineDataset(
_ dataflowGraphID: String,
_ datasetName: String,
_ datasetType: String,
_ comment: String? = nil
) async throws -> Bool {
try await withGPRC { client in
if UUID(uuidString: dataflowGraphID) == nil {
throw SparkConnectError.InvalidArgument
}

var defineDataset = Spark_Connect_PipelineCommand.DefineDataset()
defineDataset.dataflowGraphID = dataflowGraphID
defineDataset.datasetName = datasetName
defineDataset.datasetType = datasetType.toDatasetType
if let comment {
defineDataset.comment = comment
}

var pipelineCommand = Spark_Connect_PipelineCommand()
pipelineCommand.commandType = .defineDataset(defineDataset)

var command = Spark_Connect_Command()
command.commandType = .pipelineCommand(pipelineCommand)

let responses = try await execute(self.sessionID!, command)
return responses.contains {
$0.responseType == .pipelineCommandResult(Spark_Connect_PipelineCommandResult())
}
}
}

private enum URIParams {
static let PARAM_GRPC_MAX_MESSAGE_SIZE = "grpc_max_message_size"
static let PARAM_SESSION_ID = "session_id"
Expand Down
1 change: 1 addition & 0 deletions Sources/SparkConnect/SparkConnectError.swift
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ public enum SparkConnectError: Error {
case CatalogNotFound
case ColumnNotFound
case DataSourceNotFound
case DatasetTypeUnspecified
case InvalidArgument
case InvalidSessionID
case InvalidType
Expand Down
1 change: 1 addition & 0 deletions Sources/SparkConnect/TypeAliases.swift
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ typealias AnalyzePlanResponse = Spark_Connect_AnalyzePlanResponse
typealias Command = Spark_Connect_Command
typealias ConfigRequest = Spark_Connect_ConfigRequest
typealias DataSource = Spark_Connect_Read.DataSource
typealias DatasetType = Spark_Connect_DatasetType
typealias DataType = Spark_Connect_DataType
typealias DayTimeInterval = Spark_Connect_DataType.DayTimeInterval
typealias Drop = Spark_Connect_Drop
Expand Down
22 changes: 22 additions & 0 deletions Tests/SparkConnectTests/SparkConnectClientTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -124,4 +124,26 @@ struct SparkConnectClientTests {
}
await client.stop()
}

@Test
func defineDataset() async throws {
let client = SparkConnectClient(remote: TEST_REMOTE)
let response = try await client.connect(UUID().uuidString)

try await #require(throws: SparkConnectError.InvalidArgument) {
try await client.defineDataset("not-a-uuid-format", "ds1", "table")
}

if response.sparkVersion.version.starts(with: "4.1") {
let dataflowGraphID = try await client.createDataflowGraph()
#expect(UUID(uuidString: dataflowGraphID) != nil)
try await #require(throws: SparkConnectError.DatasetTypeUnspecified) {
try await client.defineDataset(dataflowGraphID, "ds1", "unspecified")
}
#expect(try await client.defineDataset(dataflowGraphID, "ds2", "materializedView"))
#expect(try await client.defineDataset(dataflowGraphID, "ds3", "table"))
#expect(try await client.defineDataset(dataflowGraphID, "ds4", "temporaryView"))
}
await client.stop()
}
}
Loading