diff --git a/CHANGELOG.md b/CHANGELOG.md index 493e0c60db..debf51634e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -49,6 +49,7 @@ request adding CHANGELOG notes for breaking (!) changes and possibly other secti ### New Features - Support credential vending for federated catalogs. `ALLOW_FEDERATED_CATALOGS_CREDENTIAL_VENDING` (default: true) was added to toggle this feature. +- Create a Spark 4.0 client. ### Changes diff --git a/plugins/pluginlibs.versions.toml b/plugins/pluginlibs.versions.toml index 37b7696b41..c766903ecd 100644 --- a/plugins/pluginlibs.versions.toml +++ b/plugins/pluginlibs.versions.toml @@ -20,5 +20,6 @@ [versions] iceberg = "1.10.0" spark35 = "3.5.6" +spark40 = "4.0.1" scala212 = "2.12.19" scala213 = "2.13.15" diff --git a/plugins/spark/README.md b/plugins/spark/README.md index 1bdfe3dd70..6e15dc8005 100644 --- a/plugins/spark/README.md +++ b/plugins/spark/README.md @@ -17,110 +17,8 @@ under the License. --> -# Polaris Spark Plugin +# Polaris Spark Plugins -The Polaris Spark plugin provides a SparkCatalog class, which communicates with the Polaris -REST endpoints, and provides implementations for Apache Spark's -[TableCatalog](https://github.com/apache/spark/blob/v3.5.6/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java), -[ViewCatalog](https://github.com/apache/spark/blob/v3.5.6/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewCatalog.java) classes. -[SupportsNamespaces](https://github.com/apache/spark/blob/v3.5.6/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsNamespaces.java), - -Right now, the plugin only provides support for Spark 3.5, Scala version 2.12 and 2.13, -and depends on iceberg-spark-runtime 1.9.1. - -# Start Spark with local Polaris service using the Polaris Spark plugin -The following command starts a Polaris server for local testing, it runs on localhost:8181 with default -realm `POLARIS` and root credentials `root:s3cr3t`: -```shell -./gradlew run -``` - -Once the local server is running, you can start Spark with the Polaris Spark plugin using either the `--packages` -option with the Polaris Spark package, or the `--jars` option with the Polaris Spark bundle JAR. - -The following sections explain how to build and run Spark with both the Polaris package and the bundle JAR. - -# Build and run with Polaris spark package locally -The Polaris Spark client source code is located in plugins/spark/v3.5/spark. To use the Polaris Spark package -with Spark, you first need to publish the source JAR to your local Maven repository. - -Run the following command to build the Polaris Spark project and publish the source JAR to your local Maven repository: -- `./gradlew assemble` -- build the whole Polaris project without running tests -- `./gradlew publishToMavenLocal` -- publish Polaris project source JAR to local Maven repository - -```shell -bin/spark-shell \ ---packages org.apache.polaris:polaris-spark-_:,org.apache.iceberg:iceberg-aws-bundle:1.10.0,io.delta:delta-spark_2.12:3.3.1 \ ---conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions,io.delta.sql.DeltaSparkSessionExtension \ ---conf spark.sql.catalog.spark_catalog=org.apache.spark.sql.delta.catalog.DeltaCatalog \ ---conf spark.sql.catalog..warehouse= \ ---conf spark.sql.catalog..header.X-Iceberg-Access-Delegation=vended-credentials \ ---conf spark.sql.catalog.=org.apache.polaris.spark.SparkCatalog \ ---conf spark.sql.catalog..uri=http://localhost:8181/api/catalog \ ---conf spark.sql.catalog..credential="root:secret" \ ---conf spark.sql.catalog..scope='PRINCIPAL_ROLE:ALL' \ ---conf spark.sql.catalog..token-refresh-enabled=true \ ---conf spark.sql.sources.useV1SourceList='' -``` - -The Polaris version is defined in the `versions.txt` file located in the root directory of the Polaris project. -Assume the following values: -- `spark_version`: 3.5 -- `scala_version`: 2.12 -- `polaris_version`: 1.2.0-incubating-SNAPSHOT -- `catalog-name`: `polaris` -The Spark command would look like following: - -```shell -bin/spark-shell \ ---packages org.apache.polaris:polaris-spark-3.5_2.12:1.2.0-incubating-SNAPSHOT,org.apache.iceberg:iceberg-aws-bundle:1.10.0,io.delta:delta-spark_2.12:3.3.1 \ ---conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions,io.delta.sql.DeltaSparkSessionExtension \ ---conf spark.sql.catalog.spark_catalog=org.apache.spark.sql.delta.catalog.DeltaCatalog \ ---conf spark.sql.catalog.polaris.warehouse=polaris \ ---conf spark.sql.catalog.polaris.header.X-Iceberg-Access-Delegation=vended-credentials \ ---conf spark.sql.catalog.polaris=org.apache.polaris.spark.SparkCatalog \ ---conf spark.sql.catalog.polaris.uri=http://localhost:8181/api/catalog \ ---conf spark.sql.catalog.polaris.credential="root:secret" \ ---conf spark.sql.catalog.polaris.scope='PRINCIPAL_ROLE:ALL' \ ---conf spark.sql.catalog.polaris.token-refresh-enabled=true \ ---conf spark.sql.sources.useV1SourceList='' -``` - -# Build and run with Polaris spark bundle JAR -The polaris-spark project also provides a Spark bundle JAR for the `--jars` use case. The resulting JAR will follow this naming format: -polaris-spark-_--bundle.jar -For example: -polaris-spark-bundle-3.5_2.12-1.2.0-incubating-SNAPSHOT-bundle.jar - -Run `./gradlew assemble` to build the entire Polaris project without running tests. After the build completes, -the bundle JAR can be found under: plugins/spark/v3.5/spark/build//libs/. -To start Spark using the bundle JAR, specify it with the `--jars` option as shown below: - -```shell -bin/spark-shell \ ---jars \ ---packages org.apache.iceberg:iceberg-aws-bundle:1.10.0,io.delta:delta-spark_2.12:3.3.1 \ ---conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions,io.delta.sql.DeltaSparkSessionExtension \ ---conf spark.sql.catalog.spark_catalog=org.apache.spark.sql.delta.catalog.DeltaCatalog \ ---conf spark.sql.catalog..warehouse= \ ---conf spark.sql.catalog..header.X-Iceberg-Access-Delegation=vended-credentials \ ---conf spark.sql.catalog.=org.apache.polaris.spark.SparkCatalog \ ---conf spark.sql.catalog..uri=http://localhost:8181/api/catalog \ ---conf spark.sql.catalog..credential="root:secret" \ ---conf spark.sql.catalog..scope='PRINCIPAL_ROLE:ALL' \ ---conf spark.sql.catalog..token-refresh-enabled=true \ ---conf spark.sql.sources.useV1SourceList='' -``` - -# Limitations -The Polaris Spark client supports catalog management for both Iceberg and Delta tables, it routes all Iceberg table -requests to the Iceberg REST endpoints, and routes all Delta table requests to the Generic Table REST endpoints. - -The Spark Client requires at least delta 3.2.1 to work with Delta tables, which requires at least Apache Spark 3.5.3. -Following describes the current functionality limitations of the Polaris Spark client: -1) Create table as select (CTAS) is not supported for Delta tables. As a result, the `saveAsTable` method of `Dataframe` - is also not supported, since it relies on the CTAS support. -2) Create a Delta table without explicit location is not supported. -3) Rename a Delta table is not supported. -4) ALTER TABLE ... SET LOCATION is not supported for DELTA table. -5) For other non-Iceberg tables like csv, it is not supported today. +This directory contains the Polaris Spark plugins. The plugins are built for specific versions of Spark: +- [Spark 3.5](./v3.5/README.md) +- [Spark 4.0](./v4.0/README.md) diff --git a/plugins/spark/spark-scala.properties b/plugins/spark/spark-scala.properties index 2ed71b574f..b2e66a1514 100644 --- a/plugins/spark/spark-scala.properties +++ b/plugins/spark/spark-scala.properties @@ -17,6 +17,9 @@ # under the License. # -sparkVersions=3.5 +sparkVersions=3.5,4.0 scalaVersions=2.12,2.13 + +# Spark 4.0 only supports Scala 2.13 +scalaVersions.4.0=2.13 diff --git a/plugins/spark/v3.5/README.md b/plugins/spark/v3.5/README.md new file mode 100644 index 0000000000..283d3b3a5b --- /dev/null +++ b/plugins/spark/v3.5/README.md @@ -0,0 +1,126 @@ + + +# Polaris Spark 3.5 Plugin + +The Polaris Spark plugin provides a SparkCatalog class, which communicates with the Polaris +REST endpoints, and provides implementations for Apache Spark's +[TableCatalog](https://github.com/apache/spark/blob/v3.5.6/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java), +[ViewCatalog](https://github.com/apache/spark/blob/v3.5.6/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewCatalog.java) classes. +[SupportsNamespaces](https://github.com/apache/spark/blob/v3.5.6/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsNamespaces.java), + +Right now, the plugin only provides support for Spark 3.5, Scala version 2.12 and 2.13, +and depends on iceberg-spark-runtime 1.9.1. + +# Start Spark with local Polaris service using the Polaris Spark plugin +The following command starts a Polaris server for local testing, it runs on localhost:8181 with default +realm `POLARIS` and root credentials `root:s3cr3t`: +```shell +./gradlew run +``` + +Once the local server is running, you can start Spark with the Polaris Spark plugin using either the `--packages` +option with the Polaris Spark package, or the `--jars` option with the Polaris Spark bundle JAR. + +The following sections explain how to build and run Spark with both the Polaris package and the bundle JAR. + +# Build and run with Polaris spark package locally +The Polaris Spark client source code is located in plugins/spark/v3.5/spark. To use the Polaris Spark package +with Spark, you first need to publish the source JAR to your local Maven repository. + +Run the following command to build the Polaris Spark project and publish the source JAR to your local Maven repository: +- `./gradlew assemble` -- build the whole Polaris project without running tests +- `./gradlew publishToMavenLocal` -- publish Polaris project source JAR to local Maven repository + +```shell +bin/spark-shell \ +--packages org.apache.polaris:polaris-spark-_:,org.apache.iceberg:iceberg-aws-bundle:1.10.0,io.delta:delta-spark_2.12:3.3.1 \ +--conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions,io.delta.sql.DeltaSparkSessionExtension \ +--conf spark.sql.catalog.spark_catalog=org.apache.spark.sql.delta.catalog.DeltaCatalog \ +--conf spark.sql.catalog..warehouse= \ +--conf spark.sql.catalog..header.X-Iceberg-Access-Delegation=vended-credentials \ +--conf spark.sql.catalog.=org.apache.polaris.spark.SparkCatalog \ +--conf spark.sql.catalog..uri=http://localhost:8181/api/catalog \ +--conf spark.sql.catalog..credential="root:secret" \ +--conf spark.sql.catalog..scope='PRINCIPAL_ROLE:ALL' \ +--conf spark.sql.catalog..token-refresh-enabled=true \ +--conf spark.sql.sources.useV1SourceList='' +``` + +The Polaris version is defined in the `versions.txt` file located in the root directory of the Polaris project. +Assume the following values: +- `spark_version`: 3.5 +- `scala_version`: 2.12 +- `polaris_version`: 1.2.0-incubating-SNAPSHOT +- `catalog-name`: `polaris` + The Spark command would look like following: + +```shell +bin/spark-shell \ +--packages org.apache.polaris:polaris-spark-3.5_2.12:1.2.0-incubating-SNAPSHOT,org.apache.iceberg:iceberg-aws-bundle:1.10.0,io.delta:delta-spark_2.12:3.3.1 \ +--conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions,io.delta.sql.DeltaSparkSessionExtension \ +--conf spark.sql.catalog.spark_catalog=org.apache.spark.sql.delta.catalog.DeltaCatalog \ +--conf spark.sql.catalog.polaris.warehouse=polaris \ +--conf spark.sql.catalog.polaris.header.X-Iceberg-Access-Delegation=vended-credentials \ +--conf spark.sql.catalog.polaris=org.apache.polaris.spark.SparkCatalog \ +--conf spark.sql.catalog.polaris.uri=http://localhost:8181/api/catalog \ +--conf spark.sql.catalog.polaris.credential="root:secret" \ +--conf spark.sql.catalog.polaris.scope='PRINCIPAL_ROLE:ALL' \ +--conf spark.sql.catalog.polaris.token-refresh-enabled=true \ +--conf spark.sql.sources.useV1SourceList='' +``` + +# Build and run with Polaris spark bundle JAR +The polaris-spark project also provides a Spark bundle JAR for the `--jars` use case. The resulting JAR will follow this naming format: +polaris-spark-_--bundle.jar +For example: +polaris-spark-bundle-3.5_2.12-1.2.0-incubating-SNAPSHOT-bundle.jar + +Run `./gradlew assemble` to build the entire Polaris project without running tests. After the build completes, +the bundle JAR can be found under: plugins/spark/v3.5/spark/build//libs/. +To start Spark using the bundle JAR, specify it with the `--jars` option as shown below: + +```shell +bin/spark-shell \ +--jars \ +--packages org.apache.iceberg:iceberg-aws-bundle:1.10.0,io.delta:delta-spark_2.12:3.3.1 \ +--conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions,io.delta.sql.DeltaSparkSessionExtension \ +--conf spark.sql.catalog.spark_catalog=org.apache.spark.sql.delta.catalog.DeltaCatalog \ +--conf spark.sql.catalog..warehouse= \ +--conf spark.sql.catalog..header.X-Iceberg-Access-Delegation=vended-credentials \ +--conf spark.sql.catalog.=org.apache.polaris.spark.SparkCatalog \ +--conf spark.sql.catalog..uri=http://localhost:8181/api/catalog \ +--conf spark.sql.catalog..credential="root:secret" \ +--conf spark.sql.catalog..scope='PRINCIPAL_ROLE:ALL' \ +--conf spark.sql.catalog..token-refresh-enabled=true \ +--conf spark.sql.sources.useV1SourceList='' +``` + +# Limitations +The Polaris Spark client supports catalog management for both Iceberg and Delta tables, it routes all Iceberg table +requests to the Iceberg REST endpoints, and routes all Delta table requests to the Generic Table REST endpoints. + +The Spark Client requires at least delta 3.2.1 to work with Delta tables, which requires at least Apache Spark 3.5.3. +Following describes the current functionality limitations of the Polaris Spark client: +1) Create table as select (CTAS) is not supported for Delta tables. As a result, the `saveAsTable` method of `Dataframe` + is also not supported, since it relies on the CTAS support. +2) Create a Delta table without explicit location is not supported. +3) Rename a Delta table is not supported. +4) ALTER TABLE ... SET LOCATION is not supported for DELTA table. +5) For other non-Iceberg tables like csv, it is not supported today. diff --git a/plugins/spark/v3.5/integration/build.gradle.kts b/plugins/spark/v3.5/integration/build.gradle.kts index f7c9892086..12d21131ef 100644 --- a/plugins/spark/v3.5/integration/build.gradle.kts +++ b/plugins/spark/v3.5/integration/build.gradle.kts @@ -35,6 +35,14 @@ val scalaLibraryVersion = pluginlibs.versions.scala213.get() } +configurations.all { + if (name != "checkstyle") { + resolutionStrategy { + force("org.antlr:antlr4-runtime:4.9.3") // Spark 3.5 and Delta 3.3 require ANTLR 4.9.3 + } + } +} + dependencies { // must be enforced to get a consistent and validated set of dependencies implementation(enforcedPlatform(libs.quarkus.bom)) { @@ -91,7 +99,7 @@ dependencies { testImplementation(enforcedPlatform("org.scala-lang:scala-library:${scalaLibraryVersion}")) testImplementation(enforcedPlatform("org.scala-lang:scala-reflect:${scalaLibraryVersion}")) testImplementation(libs.javax.servlet.api) - testImplementation(libs.antlr4.runtime) + // ANTLR version is determined by Spark/Delta dependencies, not enforced } tasks.named("intTest").configure { diff --git a/plugins/spark/v4.0/README.md b/plugins/spark/v4.0/README.md new file mode 100644 index 0000000000..09986ad4bf --- /dev/null +++ b/plugins/spark/v4.0/README.md @@ -0,0 +1,123 @@ + + +# Polaris Spark 4.0 Plugin + +The Polaris Spark 4.0 plugin provides a SparkCatalog class, which communicates with the Polaris +REST endpoints, and provides implementations for Apache Spark's +[TableCatalog](https://github.com/apache/spark/blob/v4.0.1/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java), +[ViewCatalog](https://github.com/apache/spark/blob/v4.0.1/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewCatalog.java), +[SupportsNamespaces](https://github.com/apache/spark/blob/v4.0.1/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsNamespaces.java). + +This plugin depends on iceberg-spark-runtime-4.0_2.13:1.10.0. + +# Start Spark with local Polaris service using the Polaris Spark plugin +The following command starts a Polaris server for local testing, it runs on localhost:8181 with default +realm `POLARIS` and root credentials `root:s3cr3t`: +```shell +./gradlew run +``` + +Once the local server is running, you can start Spark with the Polaris Spark plugin using either the `--packages` +option with the Polaris Spark package, or the `--jars` option with the Polaris Spark bundle JAR. + +The following sections explain how to build and run Spark with both the Polaris package and the bundle JAR. + +# Build and run with Polaris spark package locally +The Polaris Spark 4.0 client source code is located in plugins/spark/v4.0/spark. To use the Polaris Spark package +with Spark, you first need to publish the source JAR to your local Maven repository. + +Run the following command to build the Polaris Spark project and publish the source JAR to your local Maven repository: +- `./gradlew assemble` -- build the whole Polaris project without running tests +- `./gradlew publishToMavenLocal` -- publish Polaris project source JAR to local Maven repository + +```shell +bin/spark-shell \ +--packages org.apache.polaris:polaris-spark-_:,org.apache.iceberg:iceberg-aws-bundle:1.10.0,io.delta:delta-spark_2.13:4.0.0 \ +--conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions,io.delta.sql.DeltaSparkSessionExtension \ +--conf spark.sql.catalog.spark_catalog=org.apache.spark.sql.delta.catalog.DeltaCatalog \ +--conf spark.sql.catalog..warehouse= \ +--conf spark.sql.catalog..header.X-Iceberg-Access-Delegation=vended-credentials \ +--conf spark.sql.catalog.=org.apache.polaris.spark.SparkCatalog \ +--conf spark.sql.catalog..uri=http://localhost:8181/api/catalog \ +--conf spark.sql.catalog..credential="root:secret" \ +--conf spark.sql.catalog..scope='PRINCIPAL_ROLE:ALL' \ +--conf spark.sql.catalog..token-refresh-enabled=true \ +--conf spark.sql.sources.useV1SourceList='' +``` + +The Polaris version is defined in the `versions.txt` file located in the root directory of the Polaris project. +Assume the following values: +- `spark_version`: 4.0 +- `scala_version`: 2.13 (only Scala 2.13 is supported for Spark 4.0) +- `polaris_version`: 1.2.0-incubating-SNAPSHOT +- `catalog-name`: `polaris` + The Spark command would look like following: + +```shell +bin/spark-shell \ +--packages org.apache.polaris:polaris-spark-4.0_2.13:1.2.0-incubating-SNAPSHOT,org.apache.iceberg:iceberg-aws-bundle:1.10.0,io.delta:delta-spark_2.13:4.0.0 \ +--conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions,io.delta.sql.DeltaSparkSessionExtension \ +--conf spark.sql.catalog.spark_catalog=org.apache.spark.sql.delta.catalog.DeltaCatalog \ +--conf spark.sql.catalog.polaris.warehouse=polaris \ +--conf spark.sql.catalog.polaris.header.X-Iceberg-Access-Delegation=vended-credentials \ +--conf spark.sql.catalog.polaris=org.apache.polaris.spark.SparkCatalog \ +--conf spark.sql.catalog.polaris.uri=http://localhost:8181/api/catalog \ +--conf spark.sql.catalog.polaris.credential="root:secret" \ +--conf spark.sql.catalog.polaris.scope='PRINCIPAL_ROLE:ALL' \ +--conf spark.sql.catalog.polaris.token-refresh-enabled=true \ +--conf spark.sql.sources.useV1SourceList='' +``` + +# Build and run with Polaris spark bundle JAR +The polaris-spark project also provides a Spark bundle JAR for the `--jars` use case. The resulting JAR will follow this naming format: +polaris-spark-_--bundle.jar +For example: polaris-spark-4.0_2.13-1.2.0-incubating-SNAPSHOT-bundle.jar + +Run `./gradlew assemble` to build the entire Polaris project without running tests. After the build completes, +the bundle JAR can be found under: plugins/spark/v4.0/spark/build/2.13/libs/. +To start Spark using the bundle JAR, specify it with the `--jars` option as shown below: + +```shell +bin/spark-shell \ +--jars \ +--packages org.apache.iceberg:iceberg-aws-bundle:1.10.0,io.delta:delta-spark_2.13:4.0.0 \ +--conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions,io.delta.sql.DeltaSparkSessionExtension \ +--conf spark.sql.catalog.spark_catalog=org.apache.spark.sql.delta.catalog.DeltaCatalog \ +--conf spark.sql.catalog..warehouse= \ +--conf spark.sql.catalog..header.X-Iceberg-Access-Delegation=vended-credentials \ +--conf spark.sql.catalog.=org.apache.polaris.spark.SparkCatalog \ +--conf spark.sql.catalog..uri=http://localhost:8181/api/catalog \ +--conf spark.sql.catalog..credential="root:secret" \ +--conf spark.sql.catalog..scope='PRINCIPAL_ROLE:ALL' \ +--conf spark.sql.catalog..token-refresh-enabled=true \ +--conf spark.sql.sources.useV1SourceList='' +``` + +# Limitations +The Polaris Spark 4.0 client supports catalog management for both Iceberg and Delta tables, it routes all Iceberg table +requests to the Iceberg REST endpoints, and routes all Delta table requests to the Generic Table REST endpoints. + +The Spark 4.0 Client requires Delta Lake 4.0.0 or higher to work with Delta tables. +Following describes the current functionality limitations of the Polaris Spark 4.0 client: +1. Create table as select (CTAS) is not supported for Delta tables. As a result, the `saveAsTable` method of `Dataframe` is also not supported, since it relies on the CTAS support. +2. Create a Delta table without explicit location is not supported. +3. Rename a Delta table is not supported. +4. ALTER TABLE ... SET LOCATION is not supported for DELTA table. +5. For other non-Iceberg tables like csv, it is not supported today. diff --git a/plugins/spark/v4.0/integration/build.gradle.kts b/plugins/spark/v4.0/integration/build.gradle.kts new file mode 100644 index 0000000000..cd0552faee --- /dev/null +++ b/plugins/spark/v4.0/integration/build.gradle.kts @@ -0,0 +1,145 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +plugins { + alias(libs.plugins.quarkus) + id("org.kordamp.gradle.jandex") + id("polaris-runtime") +} + +// get version information +val sparkMajorVersion = "4.0" +val scalaVersion = getAndUseScalaVersionForProject() +val icebergVersion = pluginlibs.versions.iceberg.get() +val spark40Version = pluginlibs.versions.spark40.get() +val scalaLibraryVersion = + if (scalaVersion == "2.12") { + pluginlibs.versions.scala212.get() + } else { + pluginlibs.versions.scala213.get() + } + +configurations.all { + if (name != "checkstyle") { + resolutionStrategy { + force("org.antlr:antlr4-runtime:4.13.1") // Spark 4.0 and Delta 4.0 require ANTLR 4.13.1 + } + } +} + +dependencies { + // must be enforced to get a consistent and validated set of dependencies + implementation(enforcedPlatform(libs.quarkus.bom)) { + exclude(group = "org.antlr", module = "antlr4-runtime") + exclude(group = "org.scala-lang", module = "scala-library") + exclude(group = "org.scala-lang", module = "scala-reflect") + } + + // For test configurations, exclude jakarta.servlet-api from Quarkus BOM + // to allow Spark 4.0's version (5.0.0) which includes SingleThreadModel + testImplementation(platform(libs.quarkus.bom)) { + exclude(group = "jakarta.servlet", module = "jakarta.servlet-api") + } + + implementation(project(":polaris-runtime-service")) + + testImplementation( + "org.apache.iceberg:iceberg-spark-runtime-${sparkMajorVersion}_${scalaVersion}:${icebergVersion}" + ) + testImplementation(project(":polaris-spark-${sparkMajorVersion}_${scalaVersion}")) + + testImplementation(project(":polaris-api-management-model")) + + testImplementation(project(":polaris-runtime-test-common")) + + testImplementation("org.apache.spark:spark-sql_${scalaVersion}:${spark40Version}") { + // exclude log4j dependencies. Explicit dependencies for the log4j libraries are + // enforced below to ensure the version compatibility + exclude("org.apache.logging.log4j", "log4j-slf4j2-impl") + exclude("org.apache.logging.log4j", "log4j-1.2-api") + exclude("org.apache.logging.log4j", "log4j-core") + exclude("org.slf4j", "jul-to-slf4j") + } + // enforce the usage of log4j 2.24.3. This is for the log4j-api compatibility + // of spark-sql dependency + testRuntimeOnly("org.apache.logging.log4j:log4j-core:2.25.2") + + testImplementation("io.delta:delta-spark_${scalaVersion}:4.0.0") + + testImplementation(platform(libs.jackson.bom)) + testImplementation("com.fasterxml.jackson.jakarta.rs:jackson-jakarta-rs-json-provider") + + testImplementation(testFixtures(project(":polaris-runtime-service"))) + + testImplementation(platform(libs.quarkus.bom)) + testImplementation("io.quarkus:quarkus-junit5") + testImplementation("io.quarkus:quarkus-rest-client") + testImplementation("io.quarkus:quarkus-rest-client-jackson") + + testImplementation(platform(libs.awssdk.bom)) + testImplementation("software.amazon.awssdk:glue") + testImplementation("software.amazon.awssdk:kms") + testImplementation("software.amazon.awssdk:dynamodb") + + testImplementation(platform(libs.testcontainers.bom)) + testImplementation("org.testcontainers:testcontainers") + testImplementation(libs.s3mock.testcontainers) + + // Required for Spark integration tests + testImplementation(enforcedPlatform("org.scala-lang:scala-library:${scalaLibraryVersion}")) + testImplementation(enforcedPlatform("org.scala-lang:scala-reflect:${scalaLibraryVersion}")) + testImplementation(libs.javax.servlet.api) + // ANTLR version is determined by Spark/Delta dependencies, not enforced +} + +// Force jakarta.servlet-api to 5.0.0 for Spark 4.0 compatibility +// Spark 4.0 requires version 5.0.0 which includes SingleThreadModel +// Quarkus BOM forces it to 6.x which removed SingleThreadModel +configurations.named("intTestRuntimeClasspath") { + resolutionStrategy { force("jakarta.servlet:jakarta.servlet-api:5.0.0") } +} + +tasks.named("intTest").configure { + if (System.getenv("AWS_REGION") == null) { + environment("AWS_REGION", "us-west-2") + } + // Note: the test secrets are referenced in + // org.apache.polaris.service.it.ServerManager + environment("POLARIS_BOOTSTRAP_CREDENTIALS", "POLARIS,test-admin,test-secret") + jvmArgs("--add-exports", "java.base/sun.nio.ch=ALL-UNNAMED") + // Need to allow a java security manager after Java 21, for Subject.getSubject to work + // "getSubject is supported only if a security manager is allowed". + systemProperty("java.security.manager", "allow") + // Same issue as above: allow a java security manager after Java 21 + // (this setting is for the application under test, while the setting above is for test code). + systemProperty("quarkus.test.arg-line", "-Djava.security.manager=allow") + val logsDir = project.layout.buildDirectory.get().asFile.resolve("logs") + // delete files from previous runs + doFirst { + // delete log files written by Polaris + logsDir.deleteRecursively() + // delete quarkus.log file (captured Polaris stdout/stderr) + project.layout.buildDirectory.get().asFile.resolve("quarkus.log").delete() + } + // This property is not honored in a per-profile application.properties file, + // so we need to set it here. + systemProperty("quarkus.log.file.path", logsDir.resolve("polaris.log").absolutePath) + // For Spark integration tests + addSparkJvmOptions() +} diff --git a/plugins/spark/v4.0/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/PolarisManagementClient.java b/plugins/spark/v4.0/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/PolarisManagementClient.java new file mode 100644 index 0000000000..cc0f177f7e --- /dev/null +++ b/plugins/spark/v4.0/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/PolarisManagementClient.java @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.spark.quarkus.it; + +import static java.util.concurrent.TimeUnit.MINUTES; +import static org.apache.polaris.service.it.ext.PolarisServerManagerLoader.polarisServerManager; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.jakarta.rs.json.JacksonJsonProvider; +import jakarta.ws.rs.client.Client; +import jakarta.ws.rs.client.ClientBuilder; +import java.util.Map; +import java.util.Random; +import org.apache.iceberg.rest.HTTPClient; +import org.apache.iceberg.rest.RESTClient; +import org.apache.iceberg.rest.auth.AuthSession; +import org.apache.iceberg.rest.auth.OAuth2Util; +import org.apache.iceberg.rest.responses.OAuthTokenResponse; +import org.apache.polaris.service.it.env.ClientCredentials; +import org.apache.polaris.service.it.env.ManagementApi; +import org.apache.polaris.service.it.env.PolarisApiEndpoints; + +/** + * This class provides a REST client for the Polaris Management service endpoints and its auth-token + * endpoint, which is used in Spark client tests to run commands that Spark SQL can’t issue directly + * (e.g., createCatalog). + */ +public final class PolarisManagementClient implements AutoCloseable { + private final PolarisApiEndpoints endpoints; + private final Client client; + // Use an alphanumeric ID for widest compatibility in HTTP and SQL. + // Use MAX_RADIX for shorter output. + private final String clientId = + Long.toString(Math.abs(new Random().nextLong()), Character.MAX_RADIX); + // initialization an Iceberg rest client for fetch token + private final RESTClient restClient; + + private PolarisManagementClient(PolarisApiEndpoints endpoints) { + this.endpoints = endpoints; + + this.client = + ClientBuilder.newBuilder() + .readTimeout(5, MINUTES) + .connectTimeout(1, MINUTES) + .register(new JacksonJsonProvider(new ObjectMapper())) + .build(); + + this.restClient = HTTPClient.builder(Map.of()).uri(endpoints.catalogApiEndpoint()).build(); + } + + public static PolarisManagementClient managementClient(PolarisApiEndpoints endpoints) { + return new PolarisManagementClient(endpoints); + } + + /** This method should be used by test code to make top-level entity names. */ + public String newEntityName(String hint) { + return polarisServerManager().transformEntityName(hint + "_" + clientId); + } + + public ManagementApi managementApi(String authToken) { + return new ManagementApi(client, endpoints, authToken, endpoints.managementApiEndpoint()); + } + + public ManagementApi managementApi(ClientCredentials credentials) { + return managementApi(obtainToken(credentials)); + } + + /** Requests an access token from the Polaris server for the given {@link ClientCredentials}. */ + public String obtainToken(ClientCredentials credentials) { + OAuthTokenResponse response = + OAuth2Util.fetchToken( + restClient.withAuthSession(AuthSession.EMPTY), + Map.of(), + String.format("%s:%s", credentials.clientId(), credentials.clientSecret()), + "PRINCIPAL_ROLE:ALL", + endpoints.catalogApiEndpoint() + "/v1/oauth/tokens", + Map.of("grant_type", "client_credentials")); + return response.token(); + } + + @Override + public void close() throws Exception { + client.close(); + restClient.close(); + } +} diff --git a/plugins/spark/v4.0/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/SparkCatalogBaseIT.java b/plugins/spark/v4.0/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/SparkCatalogBaseIT.java new file mode 100644 index 0000000000..fb5dac805a --- /dev/null +++ b/plugins/spark/v4.0/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/SparkCatalogBaseIT.java @@ -0,0 +1,282 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.spark.quarkus.it; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import com.google.common.base.Preconditions; +import com.google.common.collect.Maps; +import io.quarkus.test.junit.QuarkusIntegrationTest; +import java.util.Arrays; +import java.util.Map; +import org.apache.iceberg.exceptions.BadRequestException; +import org.apache.iceberg.exceptions.NamespaceNotEmptyException; +import org.apache.iceberg.spark.SupportsReplaceView; +import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException; +import org.apache.spark.sql.catalyst.analysis.NoSuchViewException; +import org.apache.spark.sql.connector.catalog.CatalogPlugin; +import org.apache.spark.sql.connector.catalog.Identifier; +import org.apache.spark.sql.connector.catalog.NamespaceChange; +import org.apache.spark.sql.connector.catalog.StagingTableCatalog; +import org.apache.spark.sql.connector.catalog.SupportsNamespaces; +import org.apache.spark.sql.connector.catalog.View; +import org.apache.spark.sql.connector.catalog.ViewCatalog; +import org.apache.spark.sql.connector.catalog.ViewChange; +import org.apache.spark.sql.connector.catalog.ViewInfo; +import org.apache.spark.sql.types.StructType; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +/** + * This integration directly performs operations using the SparkCatalog instance, instead of going + * through Spark SQL interface. This provides a more direct testing capability against the Polaris + * SparkCatalog operations, some operations like listNamespaces under a namespace can not be + * triggered through a SQL interface directly with Spark. + */ +@QuarkusIntegrationTest +public abstract class SparkCatalogBaseIT extends SparkIntegrationBase { + private static StructType schema = new StructType().add("id", "long").add("name", "string"); + protected StagingTableCatalog tableCatalog = null; + protected SupportsNamespaces namespaceCatalog = null; + protected ViewCatalog viewCatalog = null; + protected SupportsReplaceView replaceViewCatalog = null; + + @BeforeEach + protected void loadCatalogs() { + Preconditions.checkArgument(spark != null, "No active spark found"); + Preconditions.checkArgument(catalogName != null, "No catalogName found"); + CatalogPlugin catalogPlugin = spark.sessionState().catalogManager().catalog(catalogName); + tableCatalog = (StagingTableCatalog) catalogPlugin; + namespaceCatalog = (SupportsNamespaces) catalogPlugin; + viewCatalog = (ViewCatalog) catalogPlugin; + replaceViewCatalog = (SupportsReplaceView) catalogPlugin; + } + + @Test + void testNamespaceOperations() throws Exception { + String[][] lv1ns = new String[][] {{"l1ns1"}, {"l1ns2"}}; + String[][] lv2ns1 = new String[][] {{"l1ns1", "l2ns1"}, {"l1ns1", "l2ns2"}}; + String[][] lv2ns2 = new String[][] {{"l1ns2", "l2ns3"}}; + + // create the namespaces + for (String[] namespace : lv1ns) { + namespaceCatalog.createNamespace(namespace, Maps.newHashMap()); + } + for (String[] namespace : lv2ns1) { + namespaceCatalog.createNamespace(namespace, Maps.newHashMap()); + } + for (String[] namespace : lv2ns2) { + namespaceCatalog.createNamespace(namespace, Maps.newHashMap()); + } + + // list namespaces under root + String[][] lv1nsResult = namespaceCatalog.listNamespaces(); + assertThat(lv1nsResult.length).isEqualTo(lv1ns.length); + for (String[] namespace : lv1ns) { + assertThat(Arrays.asList(lv1nsResult)).contains(namespace); + } + // list namespace under l1ns1 + String[][] lv2ns1Result = namespaceCatalog.listNamespaces(lv1ns[0]); + assertThat(lv2ns1Result.length).isEqualTo(lv2ns1.length); + for (String[] namespace : lv2ns1) { + assertThat(Arrays.asList(lv2ns1Result)).contains(namespace); + } + // list namespace under l1ns2 + String[][] lv2ns2Result = namespaceCatalog.listNamespaces(lv1ns[1]); + assertThat(lv2ns2Result.length).isEqualTo(lv2ns2.length); + for (String[] namespace : lv2ns2) { + assertThat(Arrays.asList(lv2ns2Result)).contains(namespace); + } + // no namespace under l1ns2.l2ns3 + assertThat(namespaceCatalog.listNamespaces(lv2ns2[0]).length).isEqualTo(0); + + // drop the nested namespace under lv1ns[1] + namespaceCatalog.dropNamespace(lv2ns2[0], true); + assertThat(namespaceCatalog.listNamespaces(lv1ns[1]).length).isEqualTo(0); + namespaceCatalog.dropNamespace(lv1ns[1], true); + assertThatThrownBy(() -> namespaceCatalog.listNamespaces(lv1ns[1])) + .isInstanceOf(NoSuchNamespaceException.class); + + // directly drop lv1ns[0] should fail + assertThatThrownBy(() -> namespaceCatalog.dropNamespace(lv1ns[0], true)) + .isInstanceOfAny( + BadRequestException.class, // Iceberg < 1.9.0 + NamespaceNotEmptyException.class // Iceberg >= 1.9.0 + ); + for (String[] namespace : lv2ns1) { + namespaceCatalog.dropNamespace(namespace, true); + } + namespaceCatalog.dropNamespace(lv1ns[0], true); + + // no more namespace available + assertThat(namespaceCatalog.listNamespaces().length).isEqualTo(0); + } + + @Test + void testAlterNamespace() throws Exception { + String[] namespace = new String[] {"ns1"}; + Map metadata = Maps.newHashMap(); + metadata.put("owner", "user1"); + + namespaceCatalog.createNamespace(namespace, metadata); + assertThat(namespaceCatalog.loadNamespaceMetadata(namespace)) + .contains(Map.entry("owner", "user1")); + + namespaceCatalog.alterNamespace(namespace, NamespaceChange.setProperty("owner", "new-user")); + assertThat(namespaceCatalog.loadNamespaceMetadata(namespace)) + .contains(Map.entry("owner", "new-user")); + + // drop the namespace + namespaceCatalog.dropNamespace(namespace, true); + } + + @Test + void testBasicViewOperations() throws Exception { + String[] namespace = new String[] {"ns"}; + namespaceCatalog.createNamespace(namespace, Maps.newHashMap()); + + Identifier viewIdentifier = Identifier.of(namespace, "test-view"); + String viewSql = "select id from test-table where id < 3"; + ViewInfo viewInfo = + new ViewInfo( + viewIdentifier, + viewSql, + catalogName, + namespace, + schema, + new String[0], + new String[0], + new String[0], + Maps.newHashMap()); + viewCatalog.createView(viewInfo); + + // load the view + View view = viewCatalog.loadView(viewIdentifier); + assertThat(view.query()).isEqualTo(viewSql); + assertThat(view.schema()).isEqualTo(schema); + + // alter the view properties + viewCatalog.alterView(viewIdentifier, ViewChange.setProperty("owner", "user1")); + view = viewCatalog.loadView(viewIdentifier); + assertThat(view.properties()).contains(Map.entry("owner", "user1")); + + // rename the view + Identifier newIdentifier = Identifier.of(namespace, "new-view"); + viewCatalog.renameView(viewIdentifier, newIdentifier); + assertThatThrownBy(() -> viewCatalog.loadView(viewIdentifier)) + .isInstanceOf(NoSuchViewException.class); + view = viewCatalog.loadView(newIdentifier); + assertThat(view.query()).isEqualTo(viewSql); + assertThat(view.schema()).isEqualTo(schema); + + // replace the view + String newSql = "select id from test-table where id == 3"; + Map properties = Maps.newHashMap(); + properties.put("owner", "test-user"); + replaceViewCatalog.replaceView( + newIdentifier, + newSql, + catalogName, + namespace, + schema, + new String[0], + new String[0], + new String[0], + properties); + view = viewCatalog.loadView(newIdentifier); + assertThat(view.query()).isEqualTo(newSql); + assertThat(view.properties()).contains(Map.entry("owner", "test-user")); + + // drop the view + viewCatalog.dropView(newIdentifier); + assertThatThrownBy(() -> viewCatalog.loadView(newIdentifier)) + .isInstanceOf(NoSuchViewException.class); + } + + @Test + void testListViews() throws Exception { + String[] l1ns = new String[] {"ns"}; + namespaceCatalog.createNamespace(l1ns, Maps.newHashMap()); + + // create a new namespace under the default NS + String[] l2ns = new String[] {"ns", "nsl2"}; + namespaceCatalog.createNamespace(l2ns, Maps.newHashMap()); + // create one view under l1 + String view1Name = "test-view1"; + String view1SQL = "select id from test-table where id >= 3"; + ViewInfo viewInfo1 = + new ViewInfo( + Identifier.of(l1ns, view1Name), + view1SQL, + catalogName, + l1ns, + schema, + new String[0], + new String[0], + new String[0], + Maps.newHashMap()); + viewCatalog.createView(viewInfo1); + // create two views under the l2 namespace + String[] nsl2ViewNames = new String[] {"test-view2", "test-view3"}; + String[] nsl2ViewSQLs = + new String[] { + "select id from test-table where id == 3", "select id from test-table where id < 3" + }; + for (int i = 0; i < nsl2ViewNames.length; i++) { + ViewInfo viewInfo = + new ViewInfo( + Identifier.of(l2ns, nsl2ViewNames[i]), + nsl2ViewSQLs[i], + catalogName, + l2ns, + schema, + new String[0], + new String[0], + new String[0], + Maps.newHashMap()); + viewCatalog.createView(viewInfo); + } + // list views under l1ns + Identifier[] l1Views = viewCatalog.listViews(l1ns); + assertThat(l1Views.length).isEqualTo(1); + assertThat(l1Views[0].name()).isEqualTo(view1Name); + + // list views under l2ns + Identifier[] l2Views = viewCatalog.listViews(l2ns); + assertThat(l2Views.length).isEqualTo(nsl2ViewSQLs.length); + for (String name : nsl2ViewNames) { + assertThat(Arrays.asList(l2Views)).contains(Identifier.of(l2ns, name)); + } + + // drop namespace fails since there are views under it + assertThatThrownBy(() -> namespaceCatalog.dropNamespace(l2ns, true)) + .isInstanceOfAny( + BadRequestException.class, // Iceberg < 1.9.0 + NamespaceNotEmptyException.class // Iceberg >= 1.9.0 + ); + // drop the views + for (String name : nsl2ViewNames) { + viewCatalog.dropView(Identifier.of(l2ns, name)); + } + namespaceCatalog.dropNamespace(l2ns, true); + viewCatalog.dropView(Identifier.of(l1ns, view1Name)); + namespaceCatalog.dropNamespace(l1ns, true); + } +} diff --git a/plugins/spark/v4.0/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/SparkCatalogIcebergIT.java b/plugins/spark/v4.0/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/SparkCatalogIcebergIT.java new file mode 100644 index 0000000000..812d8f19d5 --- /dev/null +++ b/plugins/spark/v4.0/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/SparkCatalogIcebergIT.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.spark.quarkus.it; + +import io.quarkus.test.junit.QuarkusIntegrationTest; +import org.apache.polaris.service.it.ext.SparkSessionBuilder; +import org.apache.spark.sql.SparkSession; + +@QuarkusIntegrationTest +public class SparkCatalogIcebergIT extends SparkCatalogBaseIT { + /** Initialize the spark catalog to use the iceberg spark catalog. */ + @Override + protected SparkSession buildSparkSession() { + return SparkSessionBuilder.buildWithTestDefaults() + .withWarehouse(warehouseDir) + .addCatalog(catalogName, "org.apache.iceberg.spark.SparkCatalog", endpoints, sparkToken) + .getOrCreate(); + } +} diff --git a/plugins/spark/v4.0/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/SparkCatalogPolarisIT.java b/plugins/spark/v4.0/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/SparkCatalogPolarisIT.java new file mode 100644 index 0000000000..97a4c222db --- /dev/null +++ b/plugins/spark/v4.0/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/SparkCatalogPolarisIT.java @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.spark.quarkus.it; + +import io.quarkus.test.junit.QuarkusIntegrationTest; + +@QuarkusIntegrationTest +public class SparkCatalogPolarisIT extends SparkCatalogBaseIT {} diff --git a/plugins/spark/v4.0/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/SparkDeltaIT.java b/plugins/spark/v4.0/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/SparkDeltaIT.java new file mode 100644 index 0000000000..7beacb1141 --- /dev/null +++ b/plugins/spark/v4.0/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/SparkDeltaIT.java @@ -0,0 +1,255 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.spark.quarkus.it; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import io.quarkus.test.junit.QuarkusIntegrationTest; +import java.io.File; +import java.nio.file.Path; +import java.util.Arrays; +import java.util.List; +import org.apache.commons.io.FileUtils; +import org.apache.polaris.service.it.env.IntegrationTestsHelper; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.RowFactory; +import org.apache.spark.sql.delta.DeltaAnalysisException; +import org.apache.spark.sql.types.DataTypes; +import org.apache.spark.sql.types.Metadata; +import org.apache.spark.sql.types.StructField; +import org.apache.spark.sql.types.StructType; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +@QuarkusIntegrationTest +public class SparkDeltaIT extends SparkIntegrationBase { + private String defaultNs; + private String tableRootDir; + + private String getTableLocation(String tableName) { + return String.format("%s/%s", tableRootDir, tableName); + } + + private String getTableNameWithRandomSuffix() { + return generateName("deltatb"); + } + + @BeforeEach + public void createDefaultResources(@TempDir Path tempDir) { + spark.sparkContext().setLogLevel("WARN"); + defaultNs = generateName("delta"); + // create a default namespace + sql("CREATE NAMESPACE %s", defaultNs); + sql("USE NAMESPACE %s", defaultNs); + tableRootDir = + IntegrationTestsHelper.getTemporaryDirectory(tempDir).resolve(defaultNs).getPath(); + } + + @AfterEach + public void cleanupDeltaData() { + // clean up delta data + File dirToDelete = new File(tableRootDir); + FileUtils.deleteQuietly(dirToDelete); + sql("DROP NAMESPACE %s", defaultNs); + } + + @Test + public void testBasicTableOperations() { + // create a regular delta table + String deltatb1 = "deltatb1"; + sql( + "CREATE TABLE %s (id INT, name STRING) USING DELTA LOCATION '%s'", + deltatb1, getTableLocation(deltatb1)); + sql("INSERT INTO %s VALUES (1, 'anna'), (2, 'bob')", deltatb1); + List results = sql("SELECT * FROM %s WHERE id > 1 ORDER BY id DESC", deltatb1); + assertThat(results.size()).isEqualTo(1); + assertThat(results.get(0)).isEqualTo(new Object[] {2, "bob"}); + + // create a detla table with partition + String deltatb2 = "deltatb2"; + sql( + "CREATE TABLE %s (name String, age INT, country STRING) USING DELTA PARTITIONED BY (country) LOCATION '%s'", + deltatb2, getTableLocation(deltatb2)); + sql( + "INSERT INTO %s VALUES ('anna', 10, 'US'), ('james', 32, 'US'), ('yan', 16, 'CHINA')", + deltatb2); + results = sql("SELECT name, country FROM %s ORDER BY age", deltatb2); + assertThat(results.size()).isEqualTo(3); + assertThat(results.get(0)).isEqualTo(new Object[] {"anna", "US"}); + assertThat(results.get(1)).isEqualTo(new Object[] {"yan", "CHINA"}); + assertThat(results.get(2)).isEqualTo(new Object[] {"james", "US"}); + + // verify the partition dir is created + List subDirs = listDirs(getTableLocation(deltatb2)); + assertThat(subDirs).contains("_delta_log", "country=CHINA", "country=US"); + + // test listTables + List tables = sql("SHOW TABLES"); + assertThat(tables.size()).isEqualTo(2); + assertThat(tables) + .contains( + new Object[] {defaultNs, deltatb1, false}, new Object[] {defaultNs, deltatb2, false}); + + sql("DROP TABLE %s", deltatb1); + sql("DROP TABLE %s", deltatb2); + tables = sql("SHOW TABLES"); + assertThat(tables.size()).isEqualTo(0); + } + + @Test + public void testAlterOperations() { + String deltatb = getTableNameWithRandomSuffix(); + sql( + "CREATE TABLE %s (id INT, name STRING) USING DELTA LOCATION '%s'", + deltatb, getTableLocation(deltatb)); + sql("INSERT INTO %s VALUES (1, 'anna'), (2, 'bob')", deltatb); + + // test alter columns + // add two new columns to the table + sql("Alter TABLE %s ADD COLUMNS (city STRING, age INT)", deltatb); + // add one more row to the table + sql("INSERT INTO %s VALUES (3, 'john', 'SFO', 20)", deltatb); + // verify the table now have 4 columns with correct result + List results = sql("SELECT * FROM %s ORDER BY id", deltatb); + assertThat(results.size()).isEqualTo(3); + assertThat(results).contains(new Object[] {1, "anna", null, null}); + assertThat(results).contains(new Object[] {2, "bob", null, null}); + assertThat(results).contains(new Object[] {3, "john", "SFO", 20}); + + // drop and rename column require set the delta.columnMapping property + sql("ALTER TABLE %s SET TBLPROPERTIES ('delta.columnMapping.mode' = 'name')", deltatb); + // drop column age + sql("Alter TABLE %s DROP COLUMN age", deltatb); + // verify the table now have 3 columns with correct result + results = sql("SELECT * FROM %s ORDER BY id", deltatb); + assertThat(results.size()).isEqualTo(3); + assertThat(results).contains(new Object[] {1, "anna", null}); + assertThat(results).contains(new Object[] {2, "bob", null}); + assertThat(results).contains(new Object[] {3, "john", "SFO"}); + + // rename column city to address + sql("Alter TABLE %s RENAME COLUMN city TO address", deltatb); + // verify column address exists + results = sql("SELECT id, address FROM %s ORDER BY id", deltatb); + assertThat(results.size()).isEqualTo(3); + assertThat(results).contains(new Object[] {1, null}); + assertThat(results).contains(new Object[] {2, null}); + assertThat(results).contains(new Object[] {3, "SFO"}); + + // test alter properties + sql( + "ALTER TABLE %s SET TBLPROPERTIES ('description' = 'people table', 'test-owner' = 'test-user')", + deltatb); + List tableInfo = sql("DESCRIBE TABLE EXTENDED %s", deltatb); + // find the table properties result + String properties = null; + for (Object[] info : tableInfo) { + if (info[0].equals("Table Properties")) { + properties = (String) info[1]; + break; + } + } + assertThat(properties).contains("description=people table,test-owner=test-user"); + sql("DROP TABLE %s", deltatb); + } + + @Test + public void testUnsupportedAlterTableOperations() { + String deltatb = getTableNameWithRandomSuffix(); + sql( + "CREATE TABLE %s (name String, age INT, country STRING) USING DELTA PARTITIONED BY (country) LOCATION '%s'", + deltatb, getTableLocation(deltatb)); + + // ALTER TABLE ... RENAME TO ... fails + assertThatThrownBy(() -> sql("ALTER TABLE %s RENAME TO new_delta", deltatb)) + .isInstanceOf(UnsupportedOperationException.class); + + // ALTER TABLE ... SET LOCATION ... fails + assertThatThrownBy(() -> sql("ALTER TABLE %s SET LOCATION '/tmp/new/path'", deltatb)) + .isInstanceOf(DeltaAnalysisException.class); + + sql("DROP TABLE %s", deltatb); + } + + @Test + public void testUnsupportedTableCreateOperations() { + String deltatb = getTableNameWithRandomSuffix(); + // create delta table with no location + assertThatThrownBy(() -> sql("CREATE TABLE %s (id INT, name STRING) USING DELTA", deltatb)) + .isInstanceOf(UnsupportedOperationException.class); + + // CTAS fails + assertThatThrownBy( + () -> + sql( + "CREATE TABLE %s USING DELTA LOCATION '%s' AS SELECT 1 AS id", + deltatb, getTableLocation(deltatb))) + .isInstanceOf(IllegalArgumentException.class); + } + + @Test + public void testDataframeSaveOperations() { + List data = Arrays.asList(RowFactory.create("Alice", 30), RowFactory.create("Bob", 25)); + StructType schema = + new StructType( + new StructField[] { + new StructField("name", DataTypes.StringType, false, Metadata.empty()), + new StructField("age", DataTypes.IntegerType, false, Metadata.empty()) + }); + Dataset df = spark.createDataFrame(data, schema); + + String deltatb = getTableNameWithRandomSuffix(); + // saveAsTable requires support for delta requires CTAS support for third party catalog + // in delta catalog, which is currently not supported. + assertThatThrownBy( + () -> + df.write() + .format("delta") + .option("path", getTableLocation(deltatb)) + .saveAsTable(deltatb)) + .isInstanceOf(IllegalArgumentException.class); + + // verify regular dataframe saving still works + df.write().format("delta").save(getTableLocation(deltatb)); + + // verify the partition dir is created + List subDirs = listDirs(getTableLocation(deltatb)); + assertThat(subDirs).contains("_delta_log"); + + // verify we can create a table out of the existing delta location + sql("CREATE TABLE %s USING DELTA LOCATION '%s'", deltatb, getTableLocation(deltatb)); + List tables = sql("SHOW TABLES"); + assertThat(tables.size()).isEqualTo(1); + assertThat(tables).contains(new Object[] {defaultNs, deltatb, false}); + + sql("INSERT INTO %s VALUES ('Anna', 11)", deltatb); + + List results = sql("SELECT * FROM %s ORDER BY name", deltatb); + assertThat(results.size()).isEqualTo(3); + assertThat(results.get(0)).isEqualTo(new Object[] {"Alice", 30}); + assertThat(results.get(1)).isEqualTo(new Object[] {"Anna", 11}); + assertThat(results.get(2)).isEqualTo(new Object[] {"Bob", 25}); + + sql("DROP TABLE %s", deltatb); + } +} diff --git a/plugins/spark/v4.0/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/SparkIT.java b/plugins/spark/v4.0/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/SparkIT.java new file mode 100644 index 0000000000..a4e060a52f --- /dev/null +++ b/plugins/spark/v4.0/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/SparkIT.java @@ -0,0 +1,204 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.spark.quarkus.it; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import io.quarkus.test.junit.QuarkusIntegrationTest; +import java.io.File; +import java.nio.file.Path; +import java.util.List; +import org.apache.commons.io.FileUtils; +import org.apache.polaris.service.it.env.IntegrationTestsHelper; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +@QuarkusIntegrationTest +public class SparkIT extends SparkIntegrationBase { + @Test + public void testNamespaces() { + List namespaces = sql("SHOW NAMESPACES"); + assertThat(namespaces.size()).isEqualTo(0); + + String[] l1NS = new String[] {"l1ns1", "l1ns2"}; + for (String ns : l1NS) { + sql("CREATE NAMESPACE %s", ns); + } + namespaces = sql("SHOW NAMESPACES"); + assertThat(namespaces.size()).isEqualTo(2); + for (String ns : l1NS) { + assertThat(namespaces).contains(new Object[] {ns}); + } + String l2ns = "l2ns"; + // create a nested namespace + sql("CREATE NAMESPACE %s.%s", l1NS[0], l2ns); + // spark show namespace only shows + namespaces = sql("SHOW NAMESPACES"); + assertThat(namespaces.size()).isEqualTo(2); + + // can not drop l1NS before the nested namespace is dropped + assertThatThrownBy(() -> sql("DROP NAMESPACE %s", l1NS[0])) + .hasMessageContaining(String.format("Namespace %s is not empty", l1NS[0])); + sql("DROP NAMESPACE %s.%s", l1NS[0], l2ns); + + for (String ns : l1NS) { + sql("DROP NAMESPACE %s", ns); + } + + // no namespace available after all drop + namespaces = sql("SHOW NAMESPACES"); + assertThat(namespaces.size()).isEqualTo(0); + } + + @Test + public void testCreatDropView() { + String namespace = generateName("ns"); + // create namespace ns + sql("CREATE NAMESPACE %s", namespace); + sql("USE %s", namespace); + + // create two views under the namespace + String view1Name = "testView1"; + String view2Name = "testView2"; + sql("CREATE VIEW %s AS SELECT 1 AS id", view1Name); + sql("CREATE VIEW %s AS SELECT 10 AS id", view2Name); + List views = sql("SHOW VIEWS"); + assertThat(views.size()).isEqualTo(2); + assertThat(views).contains(new Object[] {namespace, view1Name, false}); + assertThat(views).contains(new Object[] {namespace, view2Name, false}); + + // drop the views + sql("DROP VIEW %s", view1Name); + views = sql("SHOW VIEWS"); + assertThat(views.size()).isEqualTo(1); + assertThat(views).contains(new Object[] {namespace, view2Name, false}); + + sql("DROP VIEW %s", view2Name); + views = sql("SHOW VIEWS"); + assertThat(views.size()).isEqualTo(0); + + sql("DROP NAMESPACE %s", namespace); + } + + @Test + public void renameIcebergViewAndTable() { + String namespace = generateName("ns"); + sql("CREATE NAMESPACE %s", namespace); + sql("USE %s", namespace); + + // create one view and one table + String viewName = "originalView"; + sql("CREATE VIEW %s AS SELECT 1 AS id", viewName); + + String icebergTable = "iceberg_table"; + sql("CREATE TABLE %s (col1 int, col2 string)", icebergTable); + + // verify view and table is showing correctly + List views = sql("SHOW VIEWS"); + assertThat(views.size()).isEqualTo(1); + assertThat(views).contains(new Object[] {namespace, viewName, false}); + + List tables = sql("SHOW TABLES"); + assertThat(tables.size()).isEqualTo(1); + assertThat(tables).contains(new Object[] {namespace, icebergTable, false}); + + // rename the view + String renamedView = "renamedView"; + sql("ALTER VIEW %s RENAME TO %s", viewName, renamedView); + views = sql("SHOW VIEWS"); + assertThat(views.size()).isEqualTo(1); + assertThat(views).contains(new Object[] {namespace, renamedView, false}); + + // rename the table + String newIcebergTable = "iceberg_table_new"; + sql("ALTER TABLE %s RENAME TO %s", icebergTable, newIcebergTable); + tables = sql("SHOW TABLES"); + assertThat(tables.size()).isEqualTo(1); + assertThat(tables).contains(new Object[] {namespace, newIcebergTable, false}); + + // clean up the resources + sql("DROP VIEW %s", renamedView); + sql("DROP TABLE %s", newIcebergTable); + sql("DROP NAMESPACE %s", namespace); + } + + @Test + public void testMixedTableAndViews(@TempDir Path tempDir) { + String namespace = generateName("ns"); + sql("CREATE NAMESPACE %s", namespace); + sql("USE %s", namespace); + + // create one iceberg table, iceberg view and one delta table + String icebergTable = "icebergtb"; + sql("CREATE TABLE %s (col1 int, col2 String)", icebergTable); + sql("INSERT INTO %s VALUES (1, 'a'), (2, 'b')", icebergTable); + + String viewName = "icebergview"; + sql("CREATE VIEW %s AS SELECT col1 + 2 AS col1, col2 FROM %s", viewName, icebergTable); + + String deltaTable = "deltatb"; + String deltaDir = + IntegrationTestsHelper.getTemporaryDirectory(tempDir).resolve(namespace).getPath(); + sql( + "CREATE TABLE %s (col1 int, col2 int) using delta location '%s/%s'", + deltaTable, deltaDir, deltaTable); + sql("INSERT INTO %s VALUES (1, 3), (2, 5), (11, 20)", deltaTable); + // join the iceberg and delta table + List joinResult = + sql( + "SELECT icebergtb.col1 as id, icebergtb.col2 as str_col, deltatb.col2 as int_col from icebergtb inner join deltatb on icebergtb.col1 = deltatb.col1 order by id"); + assertThat(joinResult.get(0)).isEqualTo(new Object[] {1, "a", 3}); + assertThat(joinResult.get(1)).isEqualTo(new Object[] {2, "b", 5}); + + // show tables shows all tables + List tables = sql("SHOW TABLES"); + assertThat(tables.size()).isEqualTo(2); + assertThat(tables) + .contains( + new Object[] {namespace, icebergTable, false}, + new Object[] {namespace, deltaTable, false}); + + // verify the table and view content + List results = sql("SELECT * FROM %s ORDER BY col1", icebergTable); + assertThat(results.size()).isEqualTo(2); + assertThat(results.get(0)).isEqualTo(new Object[] {1, "a"}); + assertThat(results.get(1)).isEqualTo(new Object[] {2, "b"}); + + // verify the table and view content + results = sql("SELECT * FROM %s ORDER BY col1", viewName); + assertThat(results.size()).isEqualTo(2); + assertThat(results.get(0)).isEqualTo(new Object[] {3, "a"}); + assertThat(results.get(1)).isEqualTo(new Object[] {4, "b"}); + + List views = sql("SHOW VIEWS"); + assertThat(views.size()).isEqualTo(1); + assertThat(views).contains(new Object[] {namespace, viewName, false}); + + // drop views and tables + sql("DROP TABLE %s", icebergTable); + sql("DROP TABLE %s", deltaTable); + sql("DROP VIEW %s", viewName); + sql("DROP NAMESPACE %s", namespace); + + // clean up delta directory + File dirToDelete = new File(deltaDir); + FileUtils.deleteQuietly(dirToDelete); + } +} diff --git a/plugins/spark/v4.0/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/SparkIntegrationBase.java b/plugins/spark/v4.0/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/SparkIntegrationBase.java new file mode 100644 index 0000000000..7eda4f704a --- /dev/null +++ b/plugins/spark/v4.0/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/SparkIntegrationBase.java @@ -0,0 +1,215 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.spark.quarkus.it; + +import com.google.common.collect.ImmutableList; +import com.google.errorprone.annotations.FormatMethod; +import java.io.File; +import java.io.IOException; +import java.net.URI; +import java.nio.file.Path; +import java.util.List; +import java.util.UUID; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.filefilter.DirectoryFileFilter; +import org.apache.commons.io.filefilter.FalseFileFilter; +import org.apache.polaris.core.admin.model.AwsStorageConfigInfo; +import org.apache.polaris.core.admin.model.Catalog; +import org.apache.polaris.core.admin.model.CatalogProperties; +import org.apache.polaris.core.admin.model.PolarisCatalog; +import org.apache.polaris.core.admin.model.StorageConfigInfo; +import org.apache.polaris.service.it.env.ClientCredentials; +import org.apache.polaris.service.it.env.IntegrationTestsHelper; +import org.apache.polaris.service.it.env.ManagementApi; +import org.apache.polaris.service.it.env.PolarisApiEndpoints; +import org.apache.polaris.service.it.ext.PolarisIntegrationTestExtension; +import org.apache.polaris.service.it.ext.SparkSessionBuilder; +import org.apache.polaris.test.commons.s3mock.S3Mock; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.SparkSession; +import org.intellij.lang.annotations.Language; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.api.io.TempDir; +import org.slf4j.LoggerFactory; + +@ExtendWith(PolarisIntegrationTestExtension.class) +public abstract class SparkIntegrationBase { + protected static final S3Mock s3Container = new S3Mock(); + protected static SparkSession spark; + protected PolarisApiEndpoints endpoints; + protected PolarisManagementClient client; + protected ManagementApi managementApi; + protected String catalogName; + protected String sparkToken; + + protected URI warehouseDir; + + @BeforeAll + public static void setup() throws IOException { + s3Container.start(); + } + + @AfterAll + public static void cleanup() { + s3Container.stop(); + } + + @BeforeEach + public void before( + PolarisApiEndpoints apiEndpoints, ClientCredentials credentials, @TempDir Path tempDir) { + endpoints = apiEndpoints; + client = PolarisManagementClient.managementClient(endpoints); + sparkToken = client.obtainToken(credentials); + managementApi = client.managementApi(credentials); + + warehouseDir = IntegrationTestsHelper.getTemporaryDirectory(tempDir).resolve("spark-warehouse"); + + catalogName = client.newEntityName("spark_catalog"); + + AwsStorageConfigInfo awsConfigModel = + AwsStorageConfigInfo.builder() + .setRoleArn("arn:aws:iam::123456789012:role/my-role") + .setExternalId("externalId") + .setUserArn("userArn") + .setStorageType(StorageConfigInfo.StorageTypeEnum.S3) + .setAllowedLocations(List.of("s3://my-old-bucket/path/to/data")) + .build(); + CatalogProperties props = new CatalogProperties("s3://my-bucket/path/to/data"); + props.putAll(s3Container.getS3ConfigProperties()); + props.put("polaris.config.drop-with-purge.enabled", "true"); + props.put("polaris.config.namespace-custom-location.enabled", "true"); + Catalog catalog = + PolarisCatalog.builder() + .setType(Catalog.TypeEnum.INTERNAL) + .setName(catalogName) + .setProperties(props) + .setStorageConfigInfo(awsConfigModel) + .build(); + + managementApi.createCatalog(catalog); + + spark = buildSparkSession(); + + onSpark("USE " + catalogName); + } + + protected SparkSession buildSparkSession() { + return SparkSessionBuilder.buildWithTestDefaults() + .withExtensions( + "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions,io.delta.sql.DeltaSparkSessionExtension") + .withConfig( + "spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") + .withWarehouse(warehouseDir) + .addCatalog(catalogName, "org.apache.polaris.spark.SparkCatalog", endpoints, sparkToken) + .getOrCreate(); + } + + @AfterEach + public void after() throws Exception { + cleanupCatalog(catalogName); + try { + SparkSession.clearDefaultSession(); + SparkSession.clearActiveSession(); + spark.close(); + } catch (Exception e) { + LoggerFactory.getLogger(getClass()).error("Unable to close spark session", e); + } + + client.close(); + } + + protected void cleanupCatalog(String catalogName) { + onSpark("USE " + catalogName); + List namespaces = onSpark("SHOW NAMESPACES").collectAsList(); + for (Row namespace : namespaces) { + List tables = onSpark("SHOW TABLES IN " + namespace.getString(0)).collectAsList(); + for (Row table : tables) { + onSpark("DROP TABLE " + namespace.getString(0) + "." + table.getString(1)); + } + List views = onSpark("SHOW VIEWS IN " + namespace.getString(0)).collectAsList(); + for (Row view : views) { + onSpark("DROP VIEW " + namespace.getString(0) + "." + view.getString(1)); + } + onSpark("DROP NAMESPACE " + namespace.getString(0)); + } + + managementApi.deleteCatalog(catalogName); + } + + @FormatMethod + protected List sql(String query, Object... args) { + List rows = spark.sql(String.format(query, args)).collectAsList(); + if (rows.isEmpty()) { + return ImmutableList.of(); + } + return rowsToJava(rows); + } + + protected List rowsToJava(List rows) { + return rows.stream().map(this::toJava).collect(Collectors.toList()); + } + + private Object[] toJava(Row row) { + return IntStream.range(0, row.size()) + .mapToObj( + pos -> { + if (row.isNullAt(pos)) { + return null; + } + + Object value = row.get(pos); + if (value instanceof Row valueRow) { + return toJava(valueRow); + } else if (value instanceof scala.collection.Seq) { + return row.getList(pos); + } else if (value instanceof scala.collection.Map) { + return row.getJavaMap(pos); + } else { + return value; + } + }) + .toArray(Object[]::new); + } + + /** List the name of directories under a given path non-recursively. */ + protected List listDirs(String path) { + File directory = new File(path); + return FileUtils.listFilesAndDirs( + directory, FalseFileFilter.INSTANCE, DirectoryFileFilter.DIRECTORY) + .stream() + .map(File::getName) + .toList(); + } + + /** Generate a string name with given prefix and a random suffix */ + protected String generateName(String prefix) { + return prefix + "_" + UUID.randomUUID().toString().replaceAll("-", ""); + } + + protected static Dataset onSpark(@Language("SQL") String sql) { + return spark.sql(sql); + } +} diff --git a/plugins/spark/v4.0/integration/src/intTest/resources/META-INF/services/org.apache.polaris.service.it.ext.PolarisServerManager b/plugins/spark/v4.0/integration/src/intTest/resources/META-INF/services/org.apache.polaris.service.it.ext.PolarisServerManager new file mode 100644 index 0000000000..b3dd7d7c06 --- /dev/null +++ b/plugins/spark/v4.0/integration/src/intTest/resources/META-INF/services/org.apache.polaris.service.it.ext.PolarisServerManager @@ -0,0 +1,20 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +org.apache.polaris.service.it.ServerManager diff --git a/plugins/spark/v4.0/regtests/Dockerfile b/plugins/spark/v4.0/regtests/Dockerfile new file mode 100755 index 0000000000..f095aa0349 --- /dev/null +++ b/plugins/spark/v4.0/regtests/Dockerfile @@ -0,0 +1,49 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +FROM docker.io/apache/spark:4.0.1-scala2.13-java17-ubuntu +ARG POLARIS_HOST=polaris +ENV POLARIS_HOST=$POLARIS_HOST +ENV SPARK_HOME=/opt/spark +ENV CURRENT_SCALA_VERSION='2.13' +ENV LANGUAGE='en_US:en' +ENV HOME=/home/spark + +USER root +RUN apt update +RUN apt-get install -y diffutils wget curl +RUN mkdir -p /home/spark && \ + chown -R spark /home/spark && \ + mkdir -p /tmp/polaris-regtests && \ + chown -R spark /tmp/polaris-regtests +RUN mkdir /opt/spark/conf && chmod -R 777 /opt/spark/conf + +USER spark + +WORKDIR /home/spark/polaris + +COPY --chown=spark ./v4.0 /home/spark/polaris/v4.0 + +# /home/spark/regtests might not be writable in all situations, see https://github.com/apache/polaris/pull/205 +USER root +RUN chmod -R go+rwx /home/spark/polaris +RUN chmod -R 777 ./v4.0/regtests +USER spark + +ENTRYPOINT ["./v4.0/regtests/run.sh"] diff --git a/plugins/spark/v4.0/regtests/README.md b/plugins/spark/v4.0/regtests/README.md new file mode 100755 index 0000000000..6587b8ccdb --- /dev/null +++ b/plugins/spark/v4.0/regtests/README.md @@ -0,0 +1,83 @@ + + +# End-to-end regression tests + +regtests provides basic end-to-end tests for spark_sql using spark client jars. + +Regression tests are either run in Docker, using docker-compose to orchestrate the tests, or +locally. + +**NOTE** regtests are supposed to be a light-weight testing to ensure jars can be used to start +spark and run basic SQL commands. Please use integration for detailed testing. + +## Prerequisites + +It is recommended to clean the `regtests/output` directory before running tests. This can be done by +running: + +```shell +rm -rf ./plugins/spark/v4.0/regtests/output && mkdir -p ./plugins/spark/v4.0/regtests/output && chmod -R 777 ./plugins/spark/v4.0/regtests/output +``` + +## Run Tests With Docker Compose + +Tests can be run with docker-compose using the provided `./plugins/spark/v4.0/regtests/docker-compose.yml` file, as +follows: + +```shell +./gradlew assemble publishToMavenLocal +./gradlew \ + :polaris-server:assemble \ + :polaris-server:quarkusAppPartsBuild --rerun \ + -Dquarkus.container-image.build=true +docker compose -f ./plugins/spark/v4.0/regtests/docker-compose.yml up --build --exit-code-from regtest +``` + +In this setup, a Polaris container will be started in a docker-compose group, using the image +previously built by the Gradle build. Then another container, including a Spark SQL shell, will run +the tests. The exit code will be the same as the exit code of the Spark container. + +This is the flow used in CI and should be done locally before pushing to GitHub to ensure that no +environmental factors contribute to the outcome of the tests. + +**Important**: if you are also using minikube, for example to test the Helm chart, you may need to +_unset_ the Docker environment that was pointing to the Minikube Docker daemon, otherwise the image +will be built by the Minikube Docker daemon and will not be available to the local Docker daemon. +This can be done by running, _before_ building the image and running the tests: + +```shell +eval $(minikube -p minikube docker-env --unset) +``` + +## Run Tests Locally + +Regression tests can be run locally as well, using the test harness. + +To run regression tests locally, run the following: +- `./gradlew assemble publishToMavenLocal` -- build the Polaris project and Spark Client jars. Publish the binary to local maven repo. +- `./gradlew run` -- start a Polaris server on localhost:8181. +- `env POLARIS_HOST=localhost ./plugins/spark/v4.0/regtests/run.sh` -- run regtests. + +Note: the regression tests expect Polaris to run with certain options, e.g. with support for `FILE` +storage, default realm `POLARIS` and root credentials `root:secret`; if you run the above command, +this will be the case. If you run Polaris in a different way, make sure that Polaris is configured +appropriately. diff --git a/plugins/spark/v4.0/regtests/docker-compose.yml b/plugins/spark/v4.0/regtests/docker-compose.yml new file mode 100755 index 0000000000..32381d1408 --- /dev/null +++ b/plugins/spark/v4.0/regtests/docker-compose.yml @@ -0,0 +1,50 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +services: + polaris: + image: apache/polaris:latest + ports: + - "8181" + - "8182" + environment: + AWS_REGION: us-west-2 + POLARIS_BOOTSTRAP_CREDENTIALS: POLARIS,root,s3cr3t + quarkus.log.file.enable: "false" + quarkus.otel.sdk.disabled: "true" + polaris.features."ALLOW_INSECURE_STORAGE_TYPES": "true" + polaris.features."SUPPORTED_CATALOG_STORAGE_TYPES": "[\"FILE\",\"S3\",\"GCS\",\"AZURE\"]" + polaris.readiness.ignore-severe-issues: "true" + healthcheck: + test: ["CMD", "curl", "http://localhost:8182/q/health"] + interval: 10s + timeout: 10s + retries: 5 + regtest: + build: + context: ../.. + dockerfile: v4.0/regtests/Dockerfile + args: + POLARIS_HOST: polaris + depends_on: + polaris: + condition: service_healthy + volumes: + - ~/.m2:/home/spark/.m2 + - ./output:/tmp/polaris-regtests/ diff --git a/plugins/spark/v4.0/regtests/run.sh b/plugins/spark/v4.0/regtests/run.sh new file mode 100755 index 0000000000..184145757b --- /dev/null +++ b/plugins/spark/v4.0/regtests/run.sh @@ -0,0 +1,155 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# Run without args to run all tests. +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +SPARK_ROOT_DIR=$(dirname ${SCRIPT_DIR}) +export SPARK_LOCAL_HOSTNAME=localhost # avoid VPN messing up driver local IP address binding + +FMT_RED='\033[0;31m' +FMT_GREEN='\033[0;32m' +FMT_NC='\033[0m' + +function loginfo() { + echo "$(date): ${@}" +} +function loggreen() { + echo -e "${FMT_GREEN}$(date): ${@}${FMT_NC}" +} +function logred() { + echo -e "${FMT_RED}$(date): ${@}${FMT_NC}" +} + +# Allow bearer token to be provided if desired +if [[ -z "$REGTEST_ROOT_BEARER_TOKEN" ]]; then + if ! output=$(curl -X POST -H "Polaris-Realm: POLARIS" "http://${POLARIS_HOST:-localhost}:8181/api/catalog/v1/oauth/tokens" \ + -d "grant_type=client_credentials" \ + -d "client_id=root" \ + -d "client_secret=s3cr3t" \ + -d "scope=PRINCIPAL_ROLE:ALL"); then + logred "Error: Failed to retrieve bearer token" + exit 1 + fi + + token=$(echo "$output" | awk -F\" '{print $4}') + + if [ "$token" == "unauthorized_client" ]; then + logred "Error: Failed to retrieve bearer token" + exit 1 + fi + + export REGTEST_ROOT_BEARER_TOKEN=$token +fi + +echo "Root bearer token: ${REGTEST_ROOT_BEARER_TOKEN}" + +NUM_FAILURES=0 + +SCALA_VERSIONS=("2.12" "2.13") +if [[ -n "$CURRENT_SCALA_VERSION" ]]; then + SCALA_VERSIONS=("${CURRENT_SCALA_VERSION}") +fi +SPARK_MAJOR_VERSION="4.0" +SPARK_VERSION="4.0.1" + +SPARK_SHELL_OPTIONS=("PACKAGE" "JAR") + +for SCALA_VERSION in "${SCALA_VERSIONS[@]}"; do + echo "RUN REGRESSION TEST FOR SPARK_MAJOR_VERSION=${SPARK_MAJOR_VERSION}, SPARK_VERSION=${SPARK_VERSION}, SCALA_VERSION=${SCALA_VERSION}" + # find the project jar + SPARK_DIR=${SPARK_ROOT_DIR}/spark + JAR_PATH=$(find ${SPARK_DIR} -name "polaris-spark-${SPARK_MAJOR_VERSION}_${SCALA_VERSION}-*.*-bundle.jar" -print -quit) + echo "find jar ${JAR_PATH}" + + # extract the polaris + JAR_NAME=$(basename "$JAR_PATH") + echo "JAR_NAME=${JAR_NAME}" + POLARIS_VERSION=$(echo "$JAR_NAME" | sed -n 's/.*-\([0-9][^-]*.*\)-bundle\.jar/\1/p') + echo "$POLARIS_VERSION" + + SPARK_EXISTS="TRUE" + if [ -z "${SPARK_HOME}" ]; then + SPARK_EXISTS="FALSE" + fi + + for SPARK_SHELL_OPTION in "${SPARK_SHELL_OPTIONS[@]}"; do + # clean up the default configuration if exists + if [ -f "${SPARK_HOME}" ]; then + SPARK_CONF="${SPARK_HOME}/conf/spark-defaults.conf" + if [ -f ${SPARK_CONF} ]; then + rm ${SPARK_CONF} + fi + fi + + if [ "${SPARK_SHELL_OPTION}" == "PACKAGE" ]; then + # run the setup without jar configuration + source ${SCRIPT_DIR}/setup.sh --sparkVersion ${SPARK_VERSION} --scalaVersion ${SCALA_VERSION} --polarisVersion ${POLARIS_VERSION} + else + source ${SCRIPT_DIR}/setup.sh --sparkVersion ${SPARK_VERSION} --scalaVersion ${SCALA_VERSION} --polarisVersion ${POLARIS_VERSION} --jar ${JAR_PATH} + fi + + # run the spark_sql test + loginfo "Starting test spark_sql.sh" + + TEST_FILE="spark_sql.sh" + TEST_SHORTNAME="spark_sql" + TEST_TMPDIR="/tmp/polaris-spark-regtests/${TEST_SHORTNAME}_${SPARK_MAJOR_VERSION}_${SCALA_VERSION}" + TEST_STDERR="${TEST_TMPDIR}/${TEST_SHORTNAME}.stderr" + TEST_STDOUT="${TEST_TMPDIR}/${TEST_SHORTNAME}.stdout" + + mkdir -p ${TEST_TMPDIR} + if (( ${VERBOSE} )); then + ${SCRIPT_DIR}/${TEST_FILE} 2>${TEST_STDERR} | grep -v 'loading settings' | tee ${TEST_STDOUT} + else + ${SCRIPT_DIR}/${TEST_FILE} 2>${TEST_STDERR} | grep -v 'loading settings' > ${TEST_STDOUT} + fi + loginfo "Test run concluded for ${TEST_SUITE}:${TEST_SHORTNAME}" + + TEST_REF="$(realpath ${SCRIPT_DIR})/${TEST_SHORTNAME}.ref" + if cmp --silent ${TEST_STDOUT} ${TEST_REF}; then + loggreen "Test SUCCEEDED: ${TEST_SUITE}:${TEST_SHORTNAME}" + else + logred "Test FAILED: ${TEST_SUITE}:${TEST_SHORTNAME}" + echo '#!/bin/bash' > ${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh + echo "meld ${TEST_STDOUT} ${TEST_REF}" >> ${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh + chmod 750 ${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh + logred "To compare and fix diffs (if 'meld' installed): ${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh" + logred "Or manually diff: diff ${TEST_STDOUT} ${TEST_REF}" + logred "See stderr from test run for additional diagnostics: ${TEST_STDERR}" + diff ${TEST_STDOUT} ${TEST_REF} + NUM_FAILURES=$(( NUM_FAILURES + 1 )) + fi + done + + # clean up + if [ "${SPARK_EXISTS}" = "FALSE" ]; then + rm -rf ${SPARK_HOME} + export SPARK_HOME="" + fi +done + +# clean the output dir +rm -rf ${SCRIPT_DIR}/output + +loginfo "Tests completed with ${NUM_FAILURES} failures" +if (( ${NUM_FAILURES} > 0 )); then + exit 1 +else + exit 0 +fi diff --git a/plugins/spark/v4.0/regtests/setup.sh b/plugins/spark/v4.0/regtests/setup.sh new file mode 100755 index 0000000000..6564809cb1 --- /dev/null +++ b/plugins/spark/v4.0/regtests/setup.sh @@ -0,0 +1,194 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +################################### +# Idempotent setup for spark regression tests. Run manually or let run.sh auto-run. +# +# Warning - first time setup may download large amounts of files +# Warning - may clobber conf/spark-defaults.conf +# Warning - it will set the SPARK_HOME environment variable with the spark setup +# +# The script can be called independently like following +# ./setup.sh --sparkVersion ${SPARK_VERSION} --scalaVersion ${SCALA_VERSION} --jar ${JAR_PATH} +# Required Parameters: +# --sparkVersion : the spark version to setup +# --scalaVersion : the scala version of spark to setup +# --jar : path to the local Polaris Spark client jar +# + +set -x + +# Fix HOME directory for Ivy cache (Apache Spark Docker image sets HOME to /nonexistent) +export HOME=/home/spark + +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) + +SPARK_VERSION=4.0.1 +SCALA_VERSION=2.12 +POLARIS_CLIENT_JAR="" +POLARIS_VERSION="" +while [[ $# -gt 0 ]]; do + case "$1" in + --sparkVersion) + SPARK_VERSION="$2" + shift # past argument + shift # past value + ;; + --scalaVersion) + SCALA_VERSION="$2" + shift # past argument + shift # past value + ;; + --polarisVersion) + POLARIS_VERSION="$2" + shift # past argument + shift # past value + ;; + --jar) + POLARIS_CLIENT_JAR="$2" + shift # past argument + shift # past value + ;; + --) shift; + break + ;; + esac +done + +echo "SET UP FOR SPARK_VERSION=${SPARK_VERSION} SCALA_VERSION=${SCALA_VERSION} POLARIS_VERSION=${POLARIS_VERSION} POLARIS_CLIENT_JAR=${POLARIS_CLIENT_JAR}" + +if [ "$SCALA_VERSION" == "2.12" ]; then + SPARK_DISTRIBUTION=spark-${SPARK_VERSION}-bin-hadoop3 +else + SPARK_DISTRIBUTION=spark-${SPARK_VERSION}-bin-hadoop3-scala${SCALA_VERSION} +fi + +echo "Getting spark distribution ${SPARK_DISTRIBUTION}" + +if [ -z "${SPARK_HOME}" ]; then + SPARK_HOME=$(realpath ~/${SPARK_DISTRIBUTION}) +fi +SPARK_CONF="${SPARK_HOME}/conf/spark-defaults.conf" +DERBY_HOME="/tmp/derby" + +# Ensure binaries are downloaded locally +echo 'Verifying Spark binaries...' +if ! [ -f ${SPARK_HOME}/bin/spark-sql ]; then + echo 'Setting up Spark...' + if [ -z "${SPARK_VERSION}" ] || [ -z "${SPARK_DISTRIBUTION}" ]; then + echo 'SPARK_VERSION or SPARK_DISTRIBUTION not set. Please set SPARK_VERSION and SPARK_DISTRIBUTION to the desired version.' + exit 1 + fi + if ! [ -f ~/${SPARK_DISTRIBUTION}.tgz ]; then + echo 'Downloading spark distro...' + wget -O ~/${SPARK_DISTRIBUTION}.tgz https://www.apache.org/dyn/closer.lua/spark/spark-${SPARK_VERSION}/${SPARK_DISTRIBUTION}.tgz?action=download + if ! [ -f ~/${SPARK_DISTRIBUTION}.tgz ]; then + if [[ "${OSTYPE}" == "darwin"* ]]; then + echo "Detected OS: mac. Running 'brew install wget' to try again." + brew install wget + wget -O ~/${SPARK_DISTRIBUTION}.tgz https://www.apache.org/dyn/closer.lua/spark/spark-${SPARK_VERSION}/${SPARK_DISTRIBUTION}.tgz?action=download + fi + fi + else + echo 'Found existing Spark tarball' + fi + # check if the download was successful + if ! [ -f ~/${SPARK_DISTRIBUTION}.tgz ]; then + echo 'Failed to download Spark distribution. Please check the logs.' + exit 1 + fi + tar xzvf ~/${SPARK_DISTRIBUTION}.tgz -C ~/${TEST_ROOT_DIR} + if [ $? -ne 0 ]; then + echo 'Failed to extract Spark distribution. Please check the logs.' + exit 1 + else + echo 'Extracted Spark distribution.' + rm ~/${SPARK_DISTRIBUTION}.tgz + fi + SPARK_HOME=$(realpath ~/${SPARK_DISTRIBUTION}) + SPARK_CONF="${SPARK_HOME}/conf/spark-defaults.conf" +else + echo 'Verified Spark distro already installed.' +fi + +echo "SPARK_HOME=${SPARK_HOME}" +echo "SPARK_CONF=${SPARK_CONF}" + +# Ensure Spark boilerplate conf is set +echo 'Verifying Spark conf...' +if grep 'POLARIS_TESTCONF_V5' ${SPARK_CONF} 2>/dev/null; then + echo 'Verified spark conf' +else + echo 'Setting spark conf...' + # Instead of clobbering existing spark conf, just comment it all out in case it was customized carefully. + sed -i 's/^/# /' ${SPARK_CONF} + +# If POLARIS_CLIENT_JAR is provided, set the spark conf to use the jars configuration. +# Otherwise use the packages setting +if [[ -z "$POLARIS_CLIENT_JAR" ]]; then + cat << EOF >> ${SPARK_CONF} +# POLARIS Spark client test conf +spark.jars.packages org.apache.polaris:polaris-spark-4.0_$SCALA_VERSION:$POLARIS_VERSION,io.delta:delta-spark_${SCALA_VERSION}:3.2.1 +EOF +else + cat << EOF >> ${SPARK_CONF} +# POLARIS Spark client test conf +spark.jars $POLARIS_CLIENT_JAR +spark.jars.packages io.delta:delta-spark_${SCALA_VERSION}:3.2.1 +EOF +fi + +cat << EOF >> ${SPARK_CONF} + +spark.sql.variable.substitute true + +spark.driver.extraJavaOptions -Dderby.system.home=${DERBY_HOME} + +# Set Ivy cache directory to a writable location +spark.jars.ivy /home/spark/.ivy2 + +spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions,io.delta.sql.DeltaSparkSessionExtension +# this configuration is needed for delta table +spark.sql.catalog.spark_catalog=org.apache.spark.sql.delta.catalog.DeltaCatalog +spark.sql.catalog.polaris=org.apache.polaris.spark.SparkCatalog +spark.sql.catalog.polaris.uri=http://${POLARIS_HOST:-localhost}:8181/api/catalog +# this configuration is currently only used for iceberg tables, generic tables currently +# don't support credential vending +spark.sql.catalog.polaris.header.X-Iceberg-Access-Delegation=vended-credentials +spark.sql.catalog.polaris.client.region=us-west-2 +# configuration required to ensure DataSourceV2 load works correctly for +# different table formats +spark.sql.sources.useV1SourceList='' +EOF + echo 'Success!' +fi + +# cleanup derby home if existed +if [ -d "${DERBY_HOME}" ]; then + echo "Directory ${DERBY_HOME} exists. Deleting it..." + rm -rf "${DERBY_HOME}" +fi + +echo "Launch spark-sql at ${SPARK_HOME}/bin/spark-sql" +# bootstrap dependencies so that future queries don't need to wait for the downloads. +# this is mostly useful for building the Docker image with all needed dependencies +${SPARK_HOME}/bin/spark-sql -e "SELECT 1" + +# ensure SPARK_HOME is setup for later tests +export SPARK_HOME=$SPARK_HOME diff --git a/plugins/spark/v4.0/regtests/spark_sql.ref b/plugins/spark/v4.0/regtests/spark_sql.ref new file mode 100755 index 0000000000..7d9c3efa48 --- /dev/null +++ b/plugins/spark/v4.0/regtests/spark_sql.ref @@ -0,0 +1,57 @@ +{"defaults":{"default-base-location":"file:///tmp/spark_catalog"},"overrides":{"prefix":"spark_sql_catalog"},"endpoints":["GET /v1/{prefix}/namespaces","GET /v1/{prefix}/namespaces/{namespace}","HEAD /v1/{prefix}/namespaces/{namespace}","POST /v1/{prefix}/namespaces","POST /v1/{prefix}/namespaces/{namespace}/properties","DELETE /v1/{prefix}/namespaces/{namespace}","GET /v1/{prefix}/namespaces/{namespace}/tables","GET /v1/{prefix}/namespaces/{namespace}/tables/{table}","HEAD /v1/{prefix}/namespaces/{namespace}/tables/{table}","POST /v1/{prefix}/namespaces/{namespace}/tables","POST /v1/{prefix}/namespaces/{namespace}/tables/{table}","DELETE /v1/{prefix}/namespaces/{namespace}/tables/{table}","POST /v1/{prefix}/tables/rename","POST /v1/{prefix}/namespaces/{namespace}/register","POST /v1/{prefix}/namespaces/{namespace}/tables/{table}/metrics","POST /v1/{prefix}/transactions/commit","GET /v1/{prefix}/namespaces/{namespace}/views","GET /v1/{prefix}/namespaces/{namespace}/views/{view}","HEAD /v1/{prefix}/namespaces/{namespace}/views/{view}","POST /v1/{prefix}/namespaces/{namespace}/views","POST /v1/{prefix}/namespaces/{namespace}/views/{view}","DELETE /v1/{prefix}/namespaces/{namespace}/views/{view}","POST /v1/{prefix}/views/rename","GET polaris/v1/{prefix}/namespaces/{namespace}/generic-tables","POST polaris/v1/{prefix}/namespaces/{namespace}/generic-tables","DELETE polaris/v1/{prefix}/namespaces/{namespace}/generic-tables/{generic-table}","GET polaris/v1/{prefix}/namespaces/{namespace}/generic-tables/{generic-table}","GET /polaris/v1/{prefix}/namespaces/{namespace}/policies","POST /polaris/v1/{prefix}/namespaces/{namespace}/policies","GET /polaris/v1/{prefix}/namespaces/{namespace}/policies/{policy-name}","PUT /polaris/v1/{prefix}/namespaces/{namespace}/policies/{policy-name}","DELETE /polaris/v1/{prefix}/namespaces/{namespace}/policies/{policy-name}","PUT /polaris/v1/{prefix}/namespaces/{namespace}/policies/{policy-name}/mappings","POST /polaris/v1/{prefix}/namespaces/{namespace}/policies/{policy-name}/mappings","GET /polaris/v1/{prefix}/applicable-policies"]} +Catalog created +spark-sql ()> use polaris; +spark-sql ()> create namespace db1; +spark-sql ()> create namespace db2; +spark-sql ()> show namespaces; +db1 +db2 +spark-sql ()> + > create namespace db1.schema1; +spark-sql ()> show namespaces in db1; +db1.schema1 +spark-sql ()> + > create table db1.schema1.iceberg_tb (col1 int); +spark-sql ()> show tables in db1; +spark-sql ()> show tables in db1.schema1; +iceberg_tb +spark-sql ()> + > use db1.schema1; +spark-sql (db1.schema1)> insert into iceberg_tb values (123), (234), (111); +spark-sql (db1.schema1)> select * from iceberg_tb order by col1; +111 +123 +234 +spark-sql (db1.schema1)> + > create table delta_tb1(col1 string) using delta location 'file:///tmp/spark_catalog/delta_tb1'; +spark-sql (db1.schema1)> insert into delta_tb1 values ('ab'), ('bb'), ('dd'); +spark-sql (db1.schema1)> select * from delta_tb1 order by col1; +ab +bb +dd +spark-sql (db1.schema1)> + > show tables; +iceberg_tb +delta_tb1 +spark-sql (db1.schema1)> + > use db1; +spark-sql (db1)> create table delta_tb2(col1 int) using delta location 'file:///tmp/spark_catalog/delta_tb2'; +spark-sql (db1)> insert into delta_tb2 values (1), (2), (3) order by col1; +spark-sql (db1)> select * from delta_tb2; +1 +2 +3 +spark-sql (db1)> + > show tables; +delta_tb2 +spark-sql (db1)> show tables in db1.schema1; +iceberg_tb +delta_tb1 +spark-sql (db1)> + > drop table db1.schema1.iceberg_tb; +spark-sql (db1)> drop table db1.schema1.delta_tb1; +spark-sql (db1)> drop namespace db1.schema1; +spark-sql (db1)> drop table delta_tb2; +spark-sql (db1)> drop namespace db1; +spark-sql (db1)> drop namespace db2; +spark-sql (db1)> diff --git a/plugins/spark/v4.0/regtests/spark_sql.sh b/plugins/spark/v4.0/regtests/spark_sql.sh new file mode 100755 index 0000000000..fe036664cd --- /dev/null +++ b/plugins/spark/v4.0/regtests/spark_sql.sh @@ -0,0 +1,81 @@ +#!/bin/bash + +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +SPARK_BEARER_TOKEN="${REGTEST_ROOT_BEARER_TOKEN}" + +CATALOG_NAME="spark_sql_catalog" +curl -i -X POST -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \ + http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs \ + -d '{"name": "spark_sql_catalog", "id": 100, "type": "INTERNAL", "readOnly": false, "properties": {"default-base-location": "file:///tmp/spark_catalog"}, "storageConfigInfo": {"storageType": "FILE", "allowedLocations": ["file:///tmp"]}}' > /dev/stderr + +# Add TABLE_WRITE_DATA to the catalog's catalog_admin role since by default it can only manage access and metadata +curl -i -X PUT -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \ + http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs/${CATALOG_NAME}/catalog-roles/catalog_admin/grants \ + -d '{"type": "catalog", "privilege": "TABLE_WRITE_DATA"}' > /dev/stderr + +curl -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \ + "http://${POLARIS_HOST:-localhost}:8181/api/catalog/v1/config?warehouse=${CATALOG_NAME}" +echo +echo "Catalog created" +cat << EOF | ${SPARK_HOME}/bin/spark-sql -S --conf spark.sql.catalog.polaris.token="${SPARK_BEARER_TOKEN}" --conf spark.sql.catalog.polaris.warehouse=${CATALOG_NAME} +use polaris; +create namespace db1; +create namespace db2; +show namespaces; + +create namespace db1.schema1; +show namespaces in db1; + +create table db1.schema1.iceberg_tb (col1 int); +show tables in db1; +show tables in db1.schema1; + +use db1.schema1; +insert into iceberg_tb values (123), (234), (111); +select * from iceberg_tb order by col1; + +create table delta_tb1(col1 string) using delta location 'file:///tmp/spark_catalog/delta_tb1'; +insert into delta_tb1 values ('ab'), ('bb'), ('dd'); +select * from delta_tb1 order by col1; + +show tables; + +use db1; +create table delta_tb2(col1 int) using delta location 'file:///tmp/spark_catalog/delta_tb2'; +insert into delta_tb2 values (1), (2), (3) order by col1; +select * from delta_tb2; + +show tables; +show tables in db1.schema1; + +drop table db1.schema1.iceberg_tb; +drop table db1.schema1.delta_tb1; +drop namespace db1.schema1; +drop table delta_tb2; +drop namespace db1; +drop namespace db2; +EOF + +# clean up the spark_catalog dir +rm -rf /tmp/spark_catalog/ + +curl -i -X DELETE -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \ + http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs/${CATALOG_NAME} > /dev/stderr diff --git a/plugins/spark/v4.0/spark/BUNDLE-LICENSE b/plugins/spark/v4.0/spark/BUNDLE-LICENSE new file mode 100644 index 0000000000..05c71437e3 --- /dev/null +++ b/plugins/spark/v4.0/spark/BUNDLE-LICENSE @@ -0,0 +1,583 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +-------------------------------------------------------------------------------- + +This product includes code from Apache Iceberg. + +* plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/PolarisRESTCatalog.java +* plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/SparkCatalog.java + +Copyright: 2017-2025 The Apache Software Foundation +Home page: https://iceberg.apache.org +License: https://www.apache.org/licenses/LICENSE-2.0 + +-------------------------------------------------------------------------------- + +This binary artifact contains Apache Iceberg. + +Copyright: 2017-2025 The Apache Software Foundation +Project URL: https://iceberg.apache.org/ +License: Apache License 2.0 - https://www.apache.org/licenses/LICENSE-2.0.txt + +-------------------------------------------------------------------------------- + +This binary artifact contains Apache Spark. + +Copyright: 2014 and onwards The Apache Software Foundation +Project URL: https://spark.apache.org/ +License: Apache License 2.0 - https://www.apache.org/licenses/LICENSE-2.0.txt + +-------------------------------------------------------------------------------- + +This binary artifact contains Guava. + +Copyright: 2006-2020 The Guava Authors +Project URL: https://github.com/google/guava +License: Apache License 2.0 - https://www.apache.org/licenses/LICENSE-2.0.txt + +-------------------------------------------------------------------------------- + +This binary artifact contains jspecify. + +Copyright: Google LLC - SpotBugs Team +Project URL: https://github.com/jspecify/jspecify +License: Apache License 2.0 - https://www.apache.org/licenses/LICENSE-2.0.txt + +-------------------------------------------------------------------------------- + +This binary artifact contains Apache Datasketches. + +Copyright: 2020 The Apache Software Foundation + 2015-2018 Yahoo + 2019 Verizon Media +Project URL: https://datasketches.apache.org/ +License: Apache License 2.0 - https://www.apache.org/licenses/LICENSE-2.0.txt + +-------------------------------------------------------------------------------- + +This binary artifact contains Apache Parquet. + +Copyright: 2014-2024 The Apache Software Foundation +Project URL: https://parquet.apache.org/ +License: Apache License 2.0 - https://www.apache.org/licenses/LICENSE-2.0.txt + +-------------------------------------------------------------------------------- + +This binary artifact contains FastUtil. + +Copyright: 2002-2014 Sebastiano Vigna +Project URL: http://fastutil.di.unimi.it/ +License: Apache License 2.0 - https://www.apache.org/licenses/LICENSE-2.0.txt + +-------------------------------------------------------------------------------- + +This binary artifact contains Apache ORC. + +Copyright: 2013 and onwards The Apache Software Foundation. +Project URL: https://orc.apache.org +License: Apache License 2.0 - https://www.apache.org/licenses/LICENSE-2.0.txt + +-------------------------------------------------------------------------------- + +This binary artifact contains Apache Arrow. + +Copyright: 2016-2025 The Apache Software Foundation +Project URL: https://arrow.apache.org +License: Apache License 2.0 - https://www.apache.org/licenses/LICENSE-2.0.txt + +-------------------------------------------------------------------------------- + +This binary artifact contains Caffeine by Ben Manes. + +Copyright: 2014-2019 Ben Manes and contributors +Project URL: https://github.com/ben-manes/caffeine +License: Apache License 2.0 - https://www.apache.org/licenses/LICENSE-2.0.txt + +-------------------------------------------------------------------------------- + +This binary artifact contains RoaringBitmap. + +Copyright: (c) 2013-... the RoaringBitmap authors +Project URL: https://github.com/RoaringBitmap/RoaringBitmap +License: Apache License 2.0 - https://www.apache.org/licenses/LICENSE-2.0.txt + +-------------------------------------------------------------------------------- + +This binary artifact contains failsafe. + +Copyright: Jonathan Halterman and friends +Project URL: https://failsafe.dev/ +License: Apache License 2.0 - https://www.apache.org/licenses/LICENSE-2.0.txt + +-------------------------------------------------------------------------------- + +This binary artifact contains Project Nessie. + +Copyright: 2015-2025 Dremio Corporation +Project URL: https://projectnessie.org/ +License: Apache License 2.0 - https://www.apache.org/licenses/LICENSE-2.0.txt + +-------------------------------------------------------------------------------- + +This binary artifact contains Apache Avro. + +Copyright: 2010-2019 The Apache Software Foundation +Project URL: https://avro.apache.org +License: Apache License 2.0 - https://www.apache.org/licenses/LICENSE-2.0.txt + +-------------------------------------------------------------------------------- + +This binary artifact contains the Jackson JSON processor. + +Copyright: 2007-2020 Tatu Saloranta and other contributors +Project URL: http://jackson.codehaus.org/ +License: Apache License 2.0 - https://www.apache.org/licenses/LICENSE-2.0.txt + +-------------------------------------------------------------------------------- + +This binary artifact contains Airlift Aircompressor. + +Copyright: 2011-2020 Aircompressor authors. +Project URL: https://github.com/airlift/aircompressor +License: Apache License 2.0 - https://www.apache.org/licenses/LICENSE-2.0.txt + +-------------------------------------------------------------------------------- + +This binary artifact contains Apache HttpComponents Client. + +Copyright: 1999-2022 The Apache Software Foundation. +Project URL: https://hc.apache.org/ +License: Apache License 2.0 - https://www.apache.org/licenses/LICENSE-2.0.txt + +-------------------------------------------------------------------------------- + +This binary artifact contains Netty's buffer library. + +Copyright: 2014-2020 The Netty Project +Project URL: https://netty.io/ +License: Apache License 2.0 - https://www.apache.org/licenses/LICENSE-2.0.txt + +-------------------------------------------------------------------------------- + +This binary artifact contains Apache Thrift. + +Copyright: 2006-2017 The Apache Software Foundation. +Project URL: https://thrift.apache.org/ +License: Apache License 2.0 - https://www.apache.org/licenses/LICENSE-2.0.txt + +-------------------------------------------------------------------------------- + +This binary artifact contains Jetbrains Annotations. + +Copyright: 2000-2020 JetBrains s.r.o. +Project URL: https://github.com/JetBrains/java-annotations +License: Apache License 2.0 - https://www.apache.org/licenses/LICENSE-2.0.txt + +-------------------------------------------------------------------------------- + +This binary artifact contains Google FlatBuffers. + +Copyright: 2013-2020 Google Inc. +Home page: https://google.github.io/flatbuffers/ +License: http://www.apache.org/licenses/LICENSE-2.0 + +-------------------------------------------------------------------------------- + +This binary artifact contains ThreeTen. + +Copyright: 2007-present, Stephen Colebourne & Michael Nascimento Santos. +Project URL: https://www.threeten.org/threeten-extra/ +License: BSD 3-Clause +| All rights reserved. +| +| * Redistribution and use in source and binary forms, with or without +| modification, are permitted provided that the following conditions are met: +| +| * Redistributions of source code must retain the above copyright notice, +| this list of conditions and the following disclaimer. +| +| * Redistributions in binary form must reproduce the above copyright notice, +| this list of conditions and the following disclaimer in the documentation +| and/or other materials provided with the distribution. +| +| * Neither the name of JSR-310 nor the names of its contributors +| may be used to endorse or promote products derived from this software +| without specific prior written permission. +| +| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +| "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +| LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +| A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +| CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +| EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +| PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +| PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +| LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +| NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +| SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +This binary artifact contains Eclipse Collections. + +Copyright: 2021 Goldman Sachs. +Project URL: https://github.com/eclipse-collections/eclipse-collections/ +License: EDL 1.0 - http://www.eclipse.org/org/documents/edl-v10.php + +-------------------------------------------------------------------------------- + +This binary artifact contains checkerframework checker-qual Annotations. + +Copyright: 2004-2019 the Checker Framework developers +Project URL: https://github.com/typetools/checker-framework +License: MIT License +| The annotations are licensed under the MIT License. (The text of this +| license appears below.) More specifically, all the parts of the Checker +| Framework that you might want to include with your own program use the +| MIT License. This is the checker-qual.jar file and all the files that +| appear in it: every file in a qual/ directory, plus utility files such +| as NullnessUtil.java, RegexUtil.java, SignednessUtil.java, etc. +| In addition, the cleanroom implementations of third-party annotations, +| which the Checker Framework recognizes as aliases for its own +| annotations, are licensed under the MIT License. +| +| Permission is hereby granted, free of charge, to any person obtaining a copy +| of this software and associated documentation files (the "Software"), to deal +| in the Software without restriction, including without limitation the rights +| to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +| copies of the Software, and to permit persons to whom the Software is +| furnished to do so, subject to the following conditions: +| +| The above copyright notice and this permission notice shall be included in +| all copies or substantial portions of the Software. +| +| THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +| IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +| FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +| AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +| LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +| OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +| THE SOFTWARE. + +-------------------------------------------------------------------------------- + +This binary artifact contains Google Error Prone Annotations. + +Copyright: Copyright 2011-2019 The Error Prone Authors +Project URL: https://github.com/google/error-prone +License: Apache License 2.0 - https://www.apache.org/licenses/LICENSE-2.0.txt + +-------------------------------------------------------------------------------- + +This binary artifact contains Eclipse MicroProfile OpenAPI. + +Copyright: 2017 Contributors to the Eclipse Foundation +Project URL: https://github.com/microprofile/microprofile-open-api +License: Apache License 2.0 - https://www.apache.org/licenses/LICENSE-2.0.txt + +-------------------------------------------------------------------------------- + +This binary artifact contains Jakarta Annotation. + +Project URL: https://projects.eclipse.org/projects/ee4j.ca +License: EPL 2.0 - https://www.eclipse.org/legal/epl-2.0 + +-------------------------------------------------------------------------------- + +This binary artifact contains Jakarta Validation. + +Project URL: https://beanvalidation.org +License: Apache License 2.0 - https://www.apache.org/licenses/LICENSE-2.0.txt + +-------------------------------------------------------------------------------- + +This binary artifact contains Jakarta Servlet. + +Project URL: https://projects.eclipse.org/projects/ee4j.servlet +License: EPL 2.0 - https://www.eclipse.org/legal/epl-2.0 + +-------------------------------------------------------------------------------- + +This binary artifact contains Micrometer. + +Copyright: 2017-Present VMware, Inc. All Rights Reserved. +Project URL: https://github.com/micrometer-metrics/micrometer +License: Apache License 2.0 - https://www.apache.org/licenses/LICENSE-2.0.txt + +-------------------------------------------------------------------------------- + +This binary artifact contains Commons Compress. + +Copyright: 2002-2025 The Apache Software Foundation +Project URL: https://commons.apache.org/proper/commons-compress/ +License: Apache License 2.0 - https://www.apache.org/licenses/LICENSE-2.0.txt + +-------------------------------------------------------------------------------- + +This binary artifact contains Commons Codec. + +Copyright: 2002-2025 The Apache Software Foundation +Project URL: https://commons.apache.org/proper/commons-codec/ +License: Apache License 2.0 - https://www.apache.org/licenses/LICENSE-2.0.txt + +-------------------------------------------------------------------------------- + +This binary artifact contains SLF4J. + +Copyright: 2004-2022 QOS.ch Sarl (Switzerland) +Project URL: http://www.slf4j.org +License: MIT License +| Copyright (c) 2004-2022 QOS.ch Sarl (Switzerland) +| All rights reserved. +| +| Permission is hereby granted, free of charge, to any person obtaining +| a copy of this software and associated documentation files (the +| "Software"), to deal in the Software without restriction, including +| without limitation the rights to use, copy, modify, merge, publish, +| distribute, sublicense, and/or sell copies of the Software, and to +| permit persons to whom the Software is furnished to do so, subject to +| the following conditions: +| +| The above copyright notice and this permission notice shall be +| included in all copies or substantial portions of the Software. +| +| THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +| EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +| MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +| NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +| LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +| OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +| WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +-------------------------------------------------------------------------------- + +This binary artifact contains j2objc. + +Project URL: https://github.com/google/j2objc/ +License: Apache License 2.0 - https://www.apache.org/licenses/LICENSE-2.0.txt + +-------------------------------------------------------------------------------- + +This binary artifact contains Commons IO. + +Copyright: 2002-2025 The Apache Software Foundation +Project URL: https://commons.apache.org/proper/commons-io/ +License: Apache License 2.0 - https://www.apache.org/licenses/LICENSE-2.0.txt + +-------------------------------------------------------------------------------- + +This binary artifact contains Commons Lang3. + +Copyright: 2001-2025 The Apache Software Foundation +Project URL: https://commons.apache.org/proper/commons-lang/ +License: Apache License 2.0 - https://www.apache.org/licenses/LICENSE-2.0.txt + +-------------------------------------------------------------------------------- + +This binary artifact contains OpenHFT. + +Copyright: 2014 Higher Frequency Trading http://www.higherfrequencytrading.com +Project URL: https://github.com/OpenHFT +License: Apache License 2.0 - https://www.apache.org/licenses/LICENSE-2.0.txt + +-------------------------------------------------------------------------------- diff --git a/plugins/spark/v4.0/spark/BUNDLE-NOTICE b/plugins/spark/v4.0/spark/BUNDLE-NOTICE new file mode 100644 index 0000000000..9138e5e894 --- /dev/null +++ b/plugins/spark/v4.0/spark/BUNDLE-NOTICE @@ -0,0 +1,483 @@ +Apache Polaris (incubating) +Copyright 2025 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + +The initial code for the Polaris project was donated +to the ASF by Snowflake Inc. (https://www.snowflake.com/) copyright 2024. + +-------------------------------------------------------------------------------- + +This binary artifact includes Project Nessie with the following in its NOTICE +file: +| Nessie +| Copyright 2015-2025 Dremio Corporation +| +| --------------------------------------- +| This project includes code from Apache Polaris (incubating), with the following in its NOTICE file: +| +| | Apache Polaris (incubating) +| | Copyright 2024 The Apache Software Foundation +| | +| | This product includes software developed at +| | The Apache Software Foundation (http://www.apache.org/). +| | +| | The initial code for the Polaris project was donated +| | to the ASF by Snowflake Inc. (https://www.snowflake.com/) copyright 2024. + +-------------------------------------------------------------------------------- + +This binary artifact contains the Jackson JSON processor with the following in its NOTICE +file: +| # Jackson JSON processor +| +| Jackson is a high-performance, Free/Open Source JSON processing library. +| It was originally written by Tatu Saloranta (tatu.saloranta@iki.fi), and has +| been in development since 2007. +| It is currently developed by a community of developers. +| +| ## Copyright +| +| Copyright 2007-, Tatu Saloranta (tatu.saloranta@iki.fi) +| +| ## Licensing +| +| Jackson 2.x core and extension components are licensed under Apache License 2.0 +| To find the details that apply to this artifact see the accompanying LICENSE file. +| +| ## Credits +| +| A list of contributors may be found from CREDITS(-2.x) file, which is included +| in some artifacts (usually source distributions); but is always available +| from the source code management (SCM) system project uses. +| +| ## FastDoubleParser +| +| jackson-core bundles a shaded copy of FastDoubleParser . +| That code is available under an MIT license +| under the following copyright. +| +| Copyright © 2023 Werner Randelshofer, Switzerland. MIT License. +| +| See FastDoubleParser-NOTICE for details of other source code included in FastDoubleParser +| and the licenses and copyrights that apply to that code. + +-------------------------------------------------------------------------------- + +This binary artifact contains Airlift Aircompressor with the following in its NOTICE +file: + +| Snappy Copyright Notices +| ========================= +| +| * Copyright 2011 Dain Sundstrom +| * Copyright 2011, Google Inc. +| +| +| Snappy License +| =============== +| Copyright 2011, Google Inc. +| All rights reserved. +| +| Redistribution and use in source and binary forms, with or without +| modification, are permitted provided that the following conditions are +| met: +| +| * Redistributions of source code must retain the above copyright +| notice, this list of conditions and the following disclaimer. +| * Redistributions in binary form must reproduce the above +| copyright notice, this list of conditions and the following disclaimer +| in the documentation and/or other materials provided with the +| distribution. +| * Neither the name of Google Inc. nor the names of its +| contributors may be used to endorse or promote products derived from +| this software without specific prior written permission. +| +| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +| "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +| LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +| A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +| OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +| SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +| LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +| DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +| THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +| (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +| OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +This binary artifact contains Netty's buffer library with the following in its NOTICE +file: +| The Netty Project +| ================= +| +| Please visit the Netty web site for more information: +| +| * https://netty.io/ +| +| Copyright 2014 The Netty Project +| +| The Netty Project licenses this file to you under the Apache License, +| version 2.0 (the "License"); you may not use this file except in compliance +| with the License. You may obtain a copy of the License at: +| +| http://www.apache.org/licenses/LICENSE-2.0 +| +| Unless required by applicable law or agreed to in writing, software +| distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +| WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +| License for the specific language governing permissions and limitations +| under the License. +| +| Also, please refer to each LICENSE..txt file, which is located in +| the 'license' directory of the distribution file, for the license terms of the +| components that this product depends on. +| +| ------------------------------------------------------------------------------- +| This product contains the extensions to Java Collections Framework which has +| been derived from the works by JSR-166 EG, Doug Lea, and Jason T. Greene: +| +| * LICENSE: +| * license/LICENSE.jsr166y.txt (Public Domain) +| * HOMEPAGE: +| * http://gee.cs.oswego.edu/cgi-bin/viewcvs.cgi/jsr166/ +| * http://viewvc.jboss.org/cgi-bin/viewvc.cgi/jbosscache/experimental/jsr166/ +| +| This product contains a modified version of Robert Harder's Public Domain +| Base64 Encoder and Decoder, which can be obtained at: +| +| * LICENSE: +| * license/LICENSE.base64.txt (Public Domain) +| * HOMEPAGE: +| * http://iharder.sourceforge.net/current/java/base64/ +| +| This product contains a modified portion of 'Webbit', an event based +| WebSocket and HTTP server, which can be obtained at: +| +| * LICENSE: +| * license/LICENSE.webbit.txt (BSD License) +| * HOMEPAGE: +| * https://github.com/joewalnes/webbit +| +| This product contains a modified portion of 'SLF4J', a simple logging +| facade for Java, which can be obtained at: +| +| * LICENSE: +| * license/LICENSE.slf4j.txt (MIT License) +| * HOMEPAGE: +| * http://www.slf4j.org/ +| +| This product contains a modified portion of 'Apache Harmony', an open source +| Java SE, which can be obtained at: +| +| * NOTICE: +| * license/NOTICE.harmony.txt +| * LICENSE: +| * license/LICENSE.harmony.txt (Apache License 2.0) +| * HOMEPAGE: +| * http://archive.apache.org/dist/harmony/ +| +| This product contains a modified portion of 'jbzip2', a Java bzip2 compression +| and decompression library written by Matthew J. Francis. It can be obtained at: +| +| * LICENSE: +| * license/LICENSE.jbzip2.txt (MIT License) +| * HOMEPAGE: +| * https://code.google.com/p/jbzip2/ +| +| This product contains a modified portion of 'libdivsufsort', a C API library to construct +| the suffix array and the Burrows-Wheeler transformed string for any input string of +| a constant-size alphabet written by Yuta Mori. It can be obtained at: +| +| * LICENSE: +| * license/LICENSE.libdivsufsort.txt (MIT License) +| * HOMEPAGE: +| * https://github.com/y-256/libdivsufsort +| +| This product contains a modified portion of Nitsan Wakart's 'JCTools', Java Concurrency Tools for the JVM, +| which can be obtained at: +| +| * LICENSE: +| * license/LICENSE.jctools.txt (ASL2 License) +| * HOMEPAGE: +| * https://github.com/JCTools/JCTools +| +| This product optionally depends on 'JZlib', a re-implementation of zlib in +| pure Java, which can be obtained at: +| +| * LICENSE: +| * license/LICENSE.jzlib.txt (BSD style License) +| * HOMEPAGE: +| * http://www.jcraft.com/jzlib/ +| +| This product optionally depends on 'Compress-LZF', a Java library for encoding and +| decoding data in LZF format, written by Tatu Saloranta. It can be obtained at: +| +| * LICENSE: +| * license/LICENSE.compress-lzf.txt (Apache License 2.0) +| * HOMEPAGE: +| * https://github.com/ning/compress +| +| This product optionally depends on 'lz4', a LZ4 Java compression +| and decompression library written by Adrien Grand. It can be obtained at: +| +| * LICENSE: +| * license/LICENSE.lz4.txt (Apache License 2.0) +| * HOMEPAGE: +| * https://github.com/jpountz/lz4-java +| +| This product optionally depends on 'lzma-java', a LZMA Java compression +| and decompression library, which can be obtained at: +| +| * LICENSE: +| * license/LICENSE.lzma-java.txt (Apache License 2.0) +| * HOMEPAGE: +| * https://github.com/jponge/lzma-java +| +| This product contains a modified portion of 'jfastlz', a Java port of FastLZ compression +| and decompression library written by William Kinney. It can be obtained at: +| +| * LICENSE: +| * license/LICENSE.jfastlz.txt (MIT License) +| * HOMEPAGE: +| * https://code.google.com/p/jfastlz/ +| +| This product contains a modified portion of and optionally depends on 'Protocol Buffers', Google's data +| interchange format, which can be obtained at: +| +| * LICENSE: +| * license/LICENSE.protobuf.txt (New BSD License) +| * HOMEPAGE: +| * https://github.com/google/protobuf +| +| This product optionally depends on 'Bouncy Castle Crypto APIs' to generate +| a temporary self-signed X.509 certificate when the JVM does not provide the +| equivalent functionality. It can be obtained at: +| +| * LICENSE: +| * license/LICENSE.bouncycastle.txt (MIT License) +| * HOMEPAGE: +| * http://www.bouncycastle.org/ +| +| This product optionally depends on 'Snappy', a compression library produced +| by Google Inc, which can be obtained at: +| +| * LICENSE: +| * license/LICENSE.snappy.txt (New BSD License) +| * HOMEPAGE: +| * https://github.com/google/snappy +| +| This product optionally depends on 'JBoss Marshalling', an alternative Java +| serialization API, which can be obtained at: +| +| * LICENSE: +| * license/LICENSE.jboss-marshalling.txt (Apache License 2.0) +| * HOMEPAGE: +| * https://github.com/jboss-remoting/jboss-marshalling +| +| This product optionally depends on 'Caliper', Google's micro- +| benchmarking framework, which can be obtained at: +| +| * LICENSE: +| * license/LICENSE.caliper.txt (Apache License 2.0) +| * HOMEPAGE: +| * https://github.com/google/caliper +| +| This product optionally depends on 'Apache Commons Logging', a logging +| framework, which can be obtained at: +| +| * LICENSE: +| * license/LICENSE.commons-logging.txt (Apache License 2.0) +| * HOMEPAGE: +| * http://commons.apache.org/logging/ +| +| This product optionally depends on 'Apache Log4J', a logging framework, which +| can be obtained at: +| +| * LICENSE: +| * license/LICENSE.log4j.txt (Apache License 2.0) +| * HOMEPAGE: +| * http://logging.apache.org/log4j/ +| +| This product optionally depends on 'Aalto XML', an ultra-high performance +| non-blocking XML processor, which can be obtained at: +| +| * LICENSE: +| * license/LICENSE.aalto-xml.txt (Apache License 2.0) +| * HOMEPAGE: +| * http://wiki.fasterxml.com/AaltoHome +| +| This product contains a modified version of 'HPACK', a Java implementation of +| the HTTP/2 HPACK algorithm written by Twitter. It can be obtained at: +| +| * LICENSE: +| * license/LICENSE.hpack.txt (Apache License 2.0) +| * HOMEPAGE: +| * https://github.com/twitter/hpack +| +| This product contains a modified version of 'HPACK', a Java implementation of +| the HTTP/2 HPACK algorithm written by Cory Benfield. It can be obtained at: +| +| * LICENSE: +| * license/LICENSE.hyper-hpack.txt (MIT License) +| * HOMEPAGE: +| * https://github.com/python-hyper/hpack/ +| +| This product contains a modified version of 'HPACK', a Java implementation of +| the HTTP/2 HPACK algorithm written by Tatsuhiro Tsujikawa. It can be obtained at: +| +| * LICENSE: +| * license/LICENSE.nghttp2-hpack.txt (MIT License) +| * HOMEPAGE: +| * https://github.com/nghttp2/nghttp2/ +| +| This product contains a modified portion of 'Apache Commons Lang', a Java library +| provides utilities for the java.lang API, which can be obtained at: +| +| * LICENSE: +| * license/LICENSE.commons-lang.txt (Apache License 2.0) +| * HOMEPAGE: +| * https://commons.apache.org/proper/commons-lang/ +| +| +| This product contains the Maven wrapper scripts from 'Maven Wrapper', that provides an easy way to ensure a user has everything necessary to run the Maven build. +| +| * LICENSE: +| * license/LICENSE.mvn-wrapper.txt (Apache License 2.0) +| * HOMEPAGE: +| * https://github.com/takari/maven-wrapper +| +| This product contains the dnsinfo.h header file, that provides a way to retrieve the system DNS configuration on MacOS. +| This private header is also used by Apple's open source +| mDNSResponder (https://opensource.apple.com/tarballs/mDNSResponder/). +| +| * LICENSE: +| * license/LICENSE.dnsinfo.txt (Apache License 2.0) +| * HOMEPAGE: +| * http://www.opensource.apple.com/source/configd/configd-453.19/dnsinfo/dnsinfo.h + +-------------------------------------------------------------------------------- + +This binary artifact contains Eclipse MicroProfile OpenAPI with the following in its NOTICE +file: +| ========================================================================= +| == NOTICE file corresponding to section 4(d) of the Apache License, == +| == Version 2.0, in this case for MicroProfile OpenAPI == +| ========================================================================= +| +| The majority of this software were originally based on the following: +| * Swagger Core +| https://github.com/swagger-api/swagger-core +| under Apache License, v2.0 +| +| +| SPDXVersion: SPDX-2.1 +| PackageName: Eclipse MicroProfile +| PackageHomePage: http://www.eclipse.org/microprofile +| PackageLicenseDeclared: Apache-2.0 +| +| PackageCopyrightText: +| Arthur De Magalhaes arthurdm@ca.ibm.com +| + +-------------------------------------------------------------------------------- + +This binary artifact contains Jakarta Validation with the following in its NOTICE +file: +| # Notices for Eclipse Jakarta Validation +| +| This content is produced and maintained by the Eclipse Jakarta Validation +| project. +| +| * Project home: https://projects.eclipse.org/projects/ee4j.validation +| +| ## Trademarks +| +| Jakarta Validation is a trademark of the Eclipse Foundation. +| +| ## Copyright +| +| All content is the property of the respective authors or their employers. For +| more information regarding authorship of content, please consult the listed +| source code repository logs. +| +| ## Declared Project Licenses +| +| This program and the accompanying materials are made available under the terms +| of the Apache License, Version 2.0 which is available at +| https://www.apache.org/licenses/LICENSE-2.0. +| +| SPDX-License-Identifier: Apache-2.0 +| +| ## Source Code +| +| The project maintains the following source code repositories: +| +| * [The specification repository](https://github.com/jakartaee/validation-spec) +| * [The API repository](https://github.com/jakartaee/validation) +| * [The TCK repository](https://github.com/jakartaee/validation-tck) +| +| ## Third-party Content +| +| This project leverages the following third party content. +| +| Test dependencies: +| +| * [TestNG](https://github.com/cbeust/testng) - Apache License 2.0 +| * [JCommander](https://github.com/cbeust/jcommander) - Apache License 2.0 +| * [SnakeYAML](https://bitbucket.org/asomov/snakeyaml/src) - Apache License 2.0 +| + +-------------------------------------------------------------------------------- + +This binary artifact contains Micrometer with the following in its NOTICE +file: +| Micrometer +| +| Copyright (c) 2017-Present VMware, Inc. All Rights Reserved. +| +| Licensed under the Apache License, Version 2.0 (the "License"); +| you may not use this file except in compliance with the License. +| You may obtain a copy of the License at +| +| https://www.apache.org/licenses/LICENSE-2.0 +| +| Unless required by applicable law or agreed to in writing, software +| distributed under the License is distributed on an "AS IS" BASIS, +| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +| See the License for the specific language governing permissions and +| limitations under the License. +| +| ------------------------------------------------------------------------------- +| +| This product contains a modified portion of 'io.netty.util.internal.logging', +| in the Netty/Common library distributed by The Netty Project: +| +| * Copyright 2013 The Netty Project +| * License: Apache License v2.0 +| * Homepage: https://netty.io +| +| This product contains a modified portion of 'StringUtils.isBlank()', +| in the Commons Lang library distributed by The Apache Software Foundation: +| +| * Copyright 2001-2019 The Apache Software Foundation +| * License: Apache License v2.0 +| * Homepage: https://commons.apache.org/proper/commons-lang/ +| +| This product contains a modified portion of 'JsonUtf8Writer', +| in the Moshi library distributed by Square, Inc: +| +| * Copyright 2010 Google Inc. +| * License: Apache License v2.0 +| * Homepage: https://github.com/square/moshi +| +| This product contains a modified portion of the 'org.springframework.lang' +| package in the Spring Framework library, distributed by VMware, Inc: +| +| * Copyright 2002-2019 the original author or authors. +| * License: Apache License v2.0 +| * Homepage: https://spring.io/projects/spring-framework + +-------------------------------------------------------------------------------- diff --git a/plugins/spark/v4.0/spark/build.gradle.kts b/plugins/spark/v4.0/spark/build.gradle.kts new file mode 100644 index 0000000000..6be6cb5ae4 --- /dev/null +++ b/plugins/spark/v4.0/spark/build.gradle.kts @@ -0,0 +1,113 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import com.github.jengelman.gradle.plugins.shadow.tasks.ShadowJar + +plugins { id("polaris-client") } + +checkstyle { + configProperties = + mapOf( + "org.checkstyle.google.suppressionfilter.config" to + project.file("checkstyle_suppressions.xml").absolutePath + ) +} + +// get version information +val sparkMajorVersion = "4.0" +val scalaVersion = getAndUseScalaVersionForProject() +val icebergVersion = pluginlibs.versions.iceberg.get() +val spark40Version = pluginlibs.versions.spark40.get() + +val scalaLibraryVersion = pluginlibs.versions.scala213.get() + +dependencies { + // TODO: extract a polaris-rest module as a thin layer for + // client to depends on. + implementation(project(":polaris-core")) { isTransitive = false } + + implementation( + "org.apache.iceberg:iceberg-spark-runtime-${sparkMajorVersion}_${scalaVersion}:${icebergVersion}" + ) + + compileOnly("org.scala-lang:scala-library:${scalaLibraryVersion}") + compileOnly("org.scala-lang:scala-reflect:${scalaLibraryVersion}") + compileOnly("org.apache.spark:spark-sql_${scalaVersion}:${spark40Version}") { + // exclude log4j dependencies + exclude("org.apache.logging.log4j", "log4j-slf4j2-impl") + exclude("org.apache.logging.log4j", "log4j-api") + exclude("org.apache.logging.log4j", "log4j-1.2-api") + exclude("org.slf4j", "jul-to-slf4j") + } + + compileOnly(libs.jakarta.annotation.api) + compileOnly(libs.jakarta.validation.api) + + testImplementation(platform(libs.junit.bom)) + testImplementation("org.junit.jupiter:junit-jupiter") + testImplementation(libs.assertj.core) + testImplementation(libs.mockito.core) + + testImplementation( + "org.apache.iceberg:iceberg-spark-runtime-${sparkMajorVersion}_${scalaVersion}:${icebergVersion}" + ) + testImplementation("org.apache.spark:spark-sql_${scalaVersion}:${spark40Version}") { + // exclude log4j dependencies. Explicit dependencies for the log4j libraries are + // enforced below to ensure the version compatibility + exclude("org.apache.logging.log4j", "log4j-slf4j2-impl") + exclude("org.apache.logging.log4j", "log4j-api") + exclude("org.apache.logging.log4j", "log4j-1.2-api") + exclude("org.apache.logging.log4j", "log4j-core") + exclude("org.slf4j", "jul-to-slf4j") + } + // enforce the usage of log4j 2.25.2. This is for the log4j-api compatibility + // of spark-sql dependency + testRuntimeOnly("org.apache.logging.log4j:log4j-api:2.25.2") + testRuntimeOnly("org.apache.logging.log4j:log4j-core:2.25.2") +} + +tasks.register("createPolarisSparkJar") { + archiveClassifier = "bundle" + isZip64 = true + + // pack both the source code and dependencies + from(sourceSets.main.get().output) + configurations = listOf(project.configurations.runtimeClasspath.get()) + + // recursively remove all LICENSE and NOTICE file under META-INF, includes + // directories contains 'license' in the name + exclude("META-INF/**/*LICENSE*") + exclude("META-INF/**/*NOTICE*") + // exclude the top level LICENSE, LICENSE-*.txt and NOTICE + exclude("LICENSE*") + exclude("NOTICE*") + + // add polaris customized LICENSE and NOTICE for the bundle jar at top level. Note that the + // customized LICENSE and NOTICE file are called BUNDLE-LICENSE and BUNDLE-NOTICE, + // and renamed to LICENSE and NOTICE after include, this is to avoid the file + // being excluded due to the exclude pattern matching used above. + from("${projectDir}/BUNDLE-LICENSE") { rename { "LICENSE" } } + from("${projectDir}/BUNDLE-NOTICE") { rename { "NOTICE" } } +} + +// ensure the shadow jar job (which will automatically run license addition) is run for both +// `assemble` and `build` task +tasks.named("assemble") { dependsOn("createPolarisSparkJar") } + +tasks.named("build") { dependsOn("createPolarisSparkJar") } diff --git a/plugins/spark/v4.0/spark/checkstyle_suppressions.xml b/plugins/spark/v4.0/spark/checkstyle_suppressions.xml new file mode 100644 index 0000000000..d6f9482ea7 --- /dev/null +++ b/plugins/spark/v4.0/spark/checkstyle_suppressions.xml @@ -0,0 +1,32 @@ + + + + + + + + diff --git a/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/PolarisCatalog.java b/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/PolarisCatalog.java new file mode 100644 index 0000000000..08116c9e66 --- /dev/null +++ b/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/PolarisCatalog.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.spark; + +import java.util.List; +import java.util.Map; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.polaris.spark.rest.GenericTable; + +public interface PolarisCatalog { + List listGenericTables(Namespace ns); + + GenericTable loadGenericTable(TableIdentifier identifier); + + boolean dropGenericTable(TableIdentifier identifier); + + GenericTable createGenericTable( + TableIdentifier identifier, + String format, + String baseLocation, + String doc, + Map props); +} diff --git a/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/PolarisRESTCatalog.java b/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/PolarisRESTCatalog.java new file mode 100644 index 0000000000..5be0f6952e --- /dev/null +++ b/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/PolarisRESTCatalog.java @@ -0,0 +1,247 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.spark; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Maps; +import java.io.Closeable; +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.function.Function; +import org.apache.iceberg.CatalogProperties; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.exceptions.NoSuchTableException; +import org.apache.iceberg.io.CloseableGroup; +import org.apache.iceberg.rest.Endpoint; +import org.apache.iceberg.rest.ErrorHandlers; +import org.apache.iceberg.rest.HTTPClient; +import org.apache.iceberg.rest.RESTClient; +import org.apache.iceberg.rest.ResourcePaths; +import org.apache.iceberg.rest.auth.OAuth2Util; +import org.apache.iceberg.rest.responses.ConfigResponse; +import org.apache.iceberg.util.EnvironmentUtil; +import org.apache.iceberg.util.PropertyUtil; +import org.apache.polaris.core.rest.PolarisEndpoints; +import org.apache.polaris.core.rest.PolarisResourcePaths; +import org.apache.polaris.spark.rest.CreateGenericTableRESTRequest; +import org.apache.polaris.spark.rest.CreateGenericTableRequest; +import org.apache.polaris.spark.rest.GenericTable; +import org.apache.polaris.spark.rest.ListGenericTablesRESTResponse; +import org.apache.polaris.spark.rest.LoadGenericTableRESTResponse; + +/** + * [[PolarisRESTCatalog]] talks to Polaris REST APIs, and implements the PolarisCatalog interfaces, + * which are generic table related APIs at this moment. This class doesn't interact with any Spark + * objects. + */ +public class PolarisRESTCatalog implements PolarisCatalog, Closeable { + public static final String REST_PAGE_SIZE = "rest-page-size"; + + private final Function, RESTClient> clientBuilder; + + private RESTClient restClient = null; + private CloseableGroup closeables = null; + private Set endpoints; + private OAuth2Util.AuthSession catalogAuth = null; + private PolarisResourcePaths pathGenerator = null; + private Integer pageSize = null; + + // the default endpoints to config if server doesn't specify the 'endpoints' configuration. + private static final Set DEFAULT_ENDPOINTS = PolarisEndpoints.GENERIC_TABLE_ENDPOINTS; + + public PolarisRESTCatalog() { + this(config -> HTTPClient.builder(config).uri(config.get(CatalogProperties.URI)).build()); + } + + public PolarisRESTCatalog(Function, RESTClient> clientBuilder) { + this.clientBuilder = clientBuilder; + } + + public void initialize(Map unresolved, OAuth2Util.AuthSession catalogAuth) { + Preconditions.checkArgument(unresolved != null, "Invalid configuration: null"); + + // Resolve any configuration that is supplied by environment variables. + // For example: if we have an entity ("key", "env:envVar") in the unresolved, + // and envVar is configured to envValue in system env. After resolve, we got + // entity ("key", "envValue"). + Map props = EnvironmentUtil.resolveAll(unresolved); + + // TODO: switch to use authManager once iceberg dependency is updated to 1.9.0 + this.catalogAuth = catalogAuth; + + ConfigResponse config; + try (RESTClient initClient = clientBuilder.apply(props).withAuthSession(catalogAuth)) { + config = fetchConfig(initClient, catalogAuth.headers(), props); + } catch (IOException e) { + throw new UncheckedIOException("Failed to close HTTP client", e); + } + + // call getConfig to get the server configurations + Map mergedProps = config.merge(props); + if (config.endpoints().isEmpty()) { + this.endpoints = DEFAULT_ENDPOINTS; + } else { + this.endpoints = ImmutableSet.copyOf(config.endpoints()); + } + + this.pathGenerator = PolarisResourcePaths.forCatalogProperties(mergedProps); + this.restClient = clientBuilder.apply(mergedProps).withAuthSession(catalogAuth); + + this.pageSize = PropertyUtil.propertyAsNullableInt(mergedProps, REST_PAGE_SIZE); + if (pageSize != null) { + Preconditions.checkArgument( + pageSize > 0, "Invalid value for %s, must be a positive integer", REST_PAGE_SIZE); + } + + this.closeables = new CloseableGroup(); + this.closeables.addCloseable(this.restClient); + this.closeables.setSuppressCloseFailure(true); + } + + protected static ConfigResponse fetchConfig( + RESTClient client, Map headers, Map properties) { + // send the client's warehouse location to the service to keep in sync + // this is needed for cases where the warehouse is configured at client side, + // and used by Polaris server as catalog name. + ImmutableMap.Builder queryParams = ImmutableMap.builder(); + if (properties.containsKey(CatalogProperties.WAREHOUSE_LOCATION)) { + queryParams.put( + CatalogProperties.WAREHOUSE_LOCATION, + properties.get(CatalogProperties.WAREHOUSE_LOCATION)); + } + + ConfigResponse configResponse = + client.get( + ResourcePaths.config(), + queryParams.build(), + ConfigResponse.class, + headers, + ErrorHandlers.defaultErrorHandler()); + configResponse.validate(); + return configResponse; + } + + @Override + public void close() throws IOException { + if (closeables != null) { + closeables.close(); + } + } + + @Override + public List listGenericTables(Namespace ns) { + Endpoint.check(endpoints, PolarisEndpoints.V1_LIST_GENERIC_TABLES); + + Map queryParams = Maps.newHashMap(); + ImmutableList.Builder tables = ImmutableList.builder(); + String pageToken = ""; + if (pageSize != null) { + queryParams.put("pageSize", String.valueOf(pageSize)); + } + + do { + queryParams.put("pageToken", pageToken); + ListGenericTablesRESTResponse response = + restClient + .withAuthSession(this.catalogAuth) + .get( + pathGenerator.genericTables(ns), + queryParams, + ListGenericTablesRESTResponse.class, + Map.of(), + ErrorHandlers.namespaceErrorHandler()); + pageToken = response.getNextPageToken(); + tables.addAll(response.getIdentifiers()); + } while (pageToken != null); + + return tables.build(); + } + + @Override + public boolean dropGenericTable(TableIdentifier identifier) { + Endpoint.check(endpoints, PolarisEndpoints.V1_DELETE_GENERIC_TABLE); + + try { + restClient + .withAuthSession(this.catalogAuth) + .delete( + pathGenerator.genericTable(identifier), + null, + Map.of(), + ErrorHandlers.tableErrorHandler()); + return true; + } catch (NoSuchTableException e) { + return false; + } + } + + @Override + public GenericTable createGenericTable( + TableIdentifier identifier, + String format, + String baseLocation, + String doc, + Map props) { + Endpoint.check(endpoints, PolarisEndpoints.V1_CREATE_GENERIC_TABLE); + CreateGenericTableRESTRequest request = + new CreateGenericTableRESTRequest( + CreateGenericTableRequest.builder() + .setName(identifier.name()) + .setFormat(format) + .setBaseLocation(baseLocation) + .setDoc(doc) + .setProperties(props) + .build()); + + LoadGenericTableRESTResponse response = + restClient + .withAuthSession(this.catalogAuth) + .post( + pathGenerator.genericTables(identifier.namespace()), + request, + LoadGenericTableRESTResponse.class, + Map.of(), + ErrorHandlers.tableErrorHandler()); + + return response.getTable(); + } + + @Override + public GenericTable loadGenericTable(TableIdentifier identifier) { + Endpoint.check(endpoints, PolarisEndpoints.V1_LOAD_GENERIC_TABLE); + LoadGenericTableRESTResponse response = + restClient + .withAuthSession(this.catalogAuth) + .get( + pathGenerator.genericTable(identifier), + null, + LoadGenericTableRESTResponse.class, + Map.of(), + ErrorHandlers.tableErrorHandler()); + + return response.getTable(); + } +} diff --git a/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/PolarisSparkCatalog.java b/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/PolarisSparkCatalog.java new file mode 100644 index 0000000000..771c191c05 --- /dev/null +++ b/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/PolarisSparkCatalog.java @@ -0,0 +1,154 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.spark; + +import java.util.Map; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.exceptions.AlreadyExistsException; +import org.apache.iceberg.spark.Spark3Util; +// Use the spec class defined at client side under the rest package. +// The spec classes used at client side and server side are different in +// terms of import, where the client side uses the shaded jackson library +// from iceberg-spark-runtime. +import org.apache.polaris.spark.rest.GenericTable; +import org.apache.polaris.spark.utils.PolarisCatalogUtils; +import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException; +import org.apache.spark.sql.catalyst.analysis.NoSuchTableException; +import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException; +import org.apache.spark.sql.connector.catalog.Identifier; +import org.apache.spark.sql.connector.catalog.Table; +import org.apache.spark.sql.connector.catalog.TableCatalog; +import org.apache.spark.sql.connector.catalog.TableChange; +import org.apache.spark.sql.connector.expressions.Transform; +import org.apache.spark.sql.types.StructType; +import org.apache.spark.sql.util.CaseInsensitiveStringMap; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A spark TableCatalog Implementation interacts with Polaris specific APIs only. The APIs it + * interacts with is generic table APIs, and all table operations performed in this class are + * expected to be for non-iceberg tables. + */ +public class PolarisSparkCatalog implements TableCatalog { + private static final Logger LOGGER = LoggerFactory.getLogger(PolarisSparkCatalog.class); + + private PolarisCatalog polarisCatalog = null; + private String catalogName = null; + + public PolarisSparkCatalog(PolarisCatalog polarisCatalog) { + this.polarisCatalog = polarisCatalog; + } + + @Override + public void initialize(String name, CaseInsensitiveStringMap options) { + this.catalogName = name; + } + + @Override + public String name() { + return catalogName; + } + + @Override + public Table loadTable(Identifier identifier) throws NoSuchTableException { + try { + GenericTable genericTable = + this.polarisCatalog.loadGenericTable(Spark3Util.identifierToTableIdentifier(identifier)); + return PolarisCatalogUtils.loadSparkTable(genericTable); + } catch (org.apache.iceberg.exceptions.NoSuchTableException e) { + throw new NoSuchTableException(identifier); + } + } + + @Override + @SuppressWarnings({"deprecation", "RedundantSuppression"}) + public Table createTable( + Identifier identifier, + StructType schema, + Transform[] transforms, + Map properties) + throws TableAlreadyExistsException, NoSuchNamespaceException { + try { + String format = properties.get(PolarisCatalogUtils.TABLE_PROVIDER_KEY); + + String baseLocation; + // Extract the base table location from the spark properties. + // Spark pass the table base location either with the + // TableCatalog.PROP_LOCATION key, or with "path" key if created + // with the path option. + if (properties.get(TableCatalog.PROP_LOCATION) != null) { + baseLocation = properties.get(TableCatalog.PROP_LOCATION); + if (properties.get(PolarisCatalogUtils.TABLE_PATH_KEY) != null) { + LOGGER.debug( + "Both location and path are propagated in the table properties, location {}, path {}", + baseLocation, + properties.get(PolarisCatalogUtils.TABLE_PATH_KEY)); + } + } else { + baseLocation = properties.get(PolarisCatalogUtils.TABLE_PATH_KEY); + } + GenericTable genericTable = + this.polarisCatalog.createGenericTable( + Spark3Util.identifierToTableIdentifier(identifier), + format, + baseLocation, + null, + properties); + return PolarisCatalogUtils.loadSparkTable(genericTable); + } catch (AlreadyExistsException e) { + throw new TableAlreadyExistsException(identifier); + } + } + + @Override + public Table alterTable(Identifier identifier, TableChange... changes) + throws NoSuchTableException { + // alterTable currently is not supported for generic tables + throw new UnsupportedOperationException("alterTable operation is not supported"); + } + + @Override + public boolean purgeTable(Identifier ident) { + // purgeTable for generic table will only do a drop without purge + return dropTable(ident); + } + + @Override + public boolean dropTable(Identifier identifier) { + return this.polarisCatalog.dropGenericTable(Spark3Util.identifierToTableIdentifier(identifier)); + } + + @Override + public void renameTable(Identifier from, Identifier to) + throws NoSuchTableException, TableAlreadyExistsException { + throw new UnsupportedOperationException("renameTable operation is not supported"); + } + + @Override + public Identifier[] listTables(String[] namespace) { + try { + return this.polarisCatalog.listGenericTables(Namespace.of(namespace)).stream() + .map(ident -> Identifier.of(ident.namespace().levels(), ident.name())) + .toArray(Identifier[]::new); + } catch (UnsupportedOperationException ex) { + return new Identifier[0]; + } + } +} diff --git a/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/SparkCatalog.java b/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/SparkCatalog.java new file mode 100644 index 0000000000..ab7ff21026 --- /dev/null +++ b/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/SparkCatalog.java @@ -0,0 +1,354 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.spark; + +import com.google.common.base.Preconditions; +import com.google.common.collect.Maps; +import java.util.Arrays; +import java.util.Map; +import java.util.stream.Stream; +import org.apache.arrow.util.VisibleForTesting; +import org.apache.iceberg.CatalogProperties; +import org.apache.iceberg.CatalogUtil; +import org.apache.iceberg.rest.auth.OAuth2Util; +import org.apache.iceberg.spark.SupportsReplaceView; +import org.apache.iceberg.util.PropertyUtil; +import org.apache.polaris.spark.utils.DeltaHelper; +import org.apache.polaris.spark.utils.PolarisCatalogUtils; +import org.apache.spark.sql.catalyst.analysis.NamespaceAlreadyExistsException; +import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException; +import org.apache.spark.sql.catalyst.analysis.NoSuchTableException; +import org.apache.spark.sql.catalyst.analysis.NoSuchViewException; +import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException; +import org.apache.spark.sql.catalyst.analysis.ViewAlreadyExistsException; +import org.apache.spark.sql.connector.catalog.Identifier; +import org.apache.spark.sql.connector.catalog.NamespaceChange; +import org.apache.spark.sql.connector.catalog.StagedTable; +import org.apache.spark.sql.connector.catalog.StagingTableCatalog; +import org.apache.spark.sql.connector.catalog.SupportsNamespaces; +import org.apache.spark.sql.connector.catalog.Table; +import org.apache.spark.sql.connector.catalog.TableCatalog; +import org.apache.spark.sql.connector.catalog.TableChange; +import org.apache.spark.sql.connector.catalog.View; +import org.apache.spark.sql.connector.catalog.ViewCatalog; +import org.apache.spark.sql.connector.catalog.ViewChange; +import org.apache.spark.sql.connector.catalog.ViewInfo; +import org.apache.spark.sql.connector.expressions.Transform; +import org.apache.spark.sql.types.StructType; +import org.apache.spark.sql.util.CaseInsensitiveStringMap; + +/** + * SparkCatalog Implementation that is able to interact with both Iceberg SparkCatalog and Polaris + * SparkCatalog. All namespaces and view related operations continue goes through the Iceberg + * SparkCatalog. For table operations, depends on the table format, the operation can be achieved + * with interaction with both Iceberg and Polaris SparkCatalog. + */ +public class SparkCatalog + implements StagingTableCatalog, + TableCatalog, + SupportsNamespaces, + ViewCatalog, + SupportsReplaceView { + + @VisibleForTesting protected String catalogName = null; + @VisibleForTesting protected org.apache.iceberg.spark.SparkCatalog icebergsSparkCatalog = null; + @VisibleForTesting protected PolarisSparkCatalog polarisSparkCatalog = null; + @VisibleForTesting protected DeltaHelper deltaHelper = null; + + @Override + public String name() { + return catalogName; + } + + /** + * Check whether invalid catalog configuration is provided, and return an option map with catalog + * type configured correctly. This function mainly validates two parts: 1) No customized catalog + * implementation is provided. 2) No non-rest catalog type is configured. + */ + @VisibleForTesting + public CaseInsensitiveStringMap validateAndResolveCatalogOptions( + CaseInsensitiveStringMap options) { + Preconditions.checkArgument( + options.get(CatalogProperties.CATALOG_IMPL) == null, + "Customized catalog implementation is not supported and not needed, please remove the configuration!"); + + String catalogType = + PropertyUtil.propertyAsString( + options, CatalogUtil.ICEBERG_CATALOG_TYPE, CatalogUtil.ICEBERG_CATALOG_TYPE_REST); + Preconditions.checkArgument( + catalogType.equals(CatalogUtil.ICEBERG_CATALOG_TYPE_REST), + "Only rest catalog type is allowed, but got catalog type: " + + catalogType + + ". Either configure the type to rest or remove the config"); + + Map resolvedOptions = Maps.newHashMap(); + resolvedOptions.putAll(options); + // when no catalog type is configured, iceberg uses hive by default. Here, we make sure the + // type is set to rest since we only support rest catalog. + resolvedOptions.put(CatalogUtil.ICEBERG_CATALOG_TYPE, CatalogUtil.ICEBERG_CATALOG_TYPE_REST); + + return new CaseInsensitiveStringMap(resolvedOptions); + } + + /** + * Initialize REST Catalog for Iceberg and Polaris, this is the only catalog type supported by + * Polaris at this moment. + */ + private void initRESTCatalog(String name, CaseInsensitiveStringMap options) { + CaseInsensitiveStringMap resolvedOptions = validateAndResolveCatalogOptions(options); + + // initialize the icebergSparkCatalog + this.icebergsSparkCatalog = new org.apache.iceberg.spark.SparkCatalog(); + this.icebergsSparkCatalog.initialize(name, resolvedOptions); + + // initialize the polaris spark catalog + OAuth2Util.AuthSession catalogAuth = + PolarisCatalogUtils.getAuthSession(this.icebergsSparkCatalog); + PolarisRESTCatalog restCatalog = new PolarisRESTCatalog(); + restCatalog.initialize(options, catalogAuth); + this.polarisSparkCatalog = new PolarisSparkCatalog(restCatalog); + this.polarisSparkCatalog.initialize(name, resolvedOptions); + } + + @Override + public void initialize(String name, CaseInsensitiveStringMap options) { + this.catalogName = name; + initRESTCatalog(name, options); + this.deltaHelper = new DeltaHelper(options); + } + + @Override + public Table loadTable(Identifier ident) throws NoSuchTableException { + try { + return this.icebergsSparkCatalog.loadTable(ident); + } catch (NoSuchTableException e) { + return this.polarisSparkCatalog.loadTable(ident); + } + } + + @Override + @SuppressWarnings({"deprecation"}) + public Table createTable( + Identifier ident, StructType schema, Transform[] transforms, Map properties) + throws TableAlreadyExistsException, NoSuchNamespaceException { + String provider = properties.get(PolarisCatalogUtils.TABLE_PROVIDER_KEY); + if (PolarisCatalogUtils.useIceberg(provider)) { + return this.icebergsSparkCatalog.createTable(ident, schema, transforms, properties); + } else { + if (PolarisCatalogUtils.isTableWithSparkManagedLocation(properties)) { + throw new UnsupportedOperationException( + "Create table without location key is not supported by Polaris. Please provide location or path on table creation."); + } + + if (PolarisCatalogUtils.useDelta(provider)) { + // For delta table, we load the delta catalog to help dealing with the + // delta log creation. + TableCatalog deltaCatalog = deltaHelper.loadDeltaCatalog(this.polarisSparkCatalog); + return deltaCatalog.createTable(ident, schema, transforms, properties); + } else { + return this.polarisSparkCatalog.createTable(ident, schema, transforms, properties); + } + } + } + + @Override + public Table alterTable(Identifier ident, TableChange... changes) throws NoSuchTableException { + try { + return this.icebergsSparkCatalog.alterTable(ident, changes); + } catch (NoSuchTableException e) { + Table table = this.polarisSparkCatalog.loadTable(ident); + String provider = table.properties().get(PolarisCatalogUtils.TABLE_PROVIDER_KEY); + if (PolarisCatalogUtils.useDelta(provider)) { + // For delta table, most of the alter operations is a delta log manipulation, + // we load the delta catalog to help handling the alter table operation. + // NOTE: This currently doesn't work for changing file location and file format + // using ALTER TABLE ...SET LOCATION, and ALTER TABLE ... SET FILEFORMAT. + TableCatalog deltaCatalog = deltaHelper.loadDeltaCatalog(this.polarisSparkCatalog); + return deltaCatalog.alterTable(ident, changes); + } + return this.polarisSparkCatalog.alterTable(ident); + } + } + + @Override + public boolean dropTable(Identifier ident) { + return this.icebergsSparkCatalog.dropTable(ident) || this.polarisSparkCatalog.dropTable(ident); + } + + @Override + public void renameTable(Identifier from, Identifier to) + throws NoSuchTableException, TableAlreadyExistsException { + try { + this.icebergsSparkCatalog.renameTable(from, to); + } catch (NoSuchTableException e) { + this.polarisSparkCatalog.renameTable(from, to); + } + } + + @Override + public void invalidateTable(Identifier ident) { + this.icebergsSparkCatalog.invalidateTable(ident); + } + + @Override + public boolean purgeTable(Identifier ident) { + if (this.icebergsSparkCatalog.purgeTable(ident)) { + return true; + } else { + return this.polarisSparkCatalog.purgeTable(ident); + } + } + + @Override + public Identifier[] listTables(String[] namespace) { + Identifier[] icebergIdents = this.icebergsSparkCatalog.listTables(namespace); + Identifier[] genericTableIdents = this.polarisSparkCatalog.listTables(namespace); + + return Stream.concat(Arrays.stream(icebergIdents), Arrays.stream(genericTableIdents)) + .toArray(Identifier[]::new); + } + + @Override + @SuppressWarnings({"deprecation", "RedundantSuppression"}) + public StagedTable stageCreate( + Identifier ident, StructType schema, Transform[] transforms, Map properties) + throws TableAlreadyExistsException { + return this.icebergsSparkCatalog.stageCreate(ident, schema, transforms, properties); + } + + @Override + public StagedTable stageReplace( + Identifier ident, StructType schema, Transform[] transforms, Map properties) + throws NoSuchTableException { + return this.icebergsSparkCatalog.stageReplace(ident, schema, transforms, properties); + } + + @Override + public StagedTable stageCreateOrReplace( + Identifier ident, StructType schema, Transform[] transforms, Map properties) { + return this.icebergsSparkCatalog.stageCreateOrReplace(ident, schema, transforms, properties); + } + + @Override + public String[] defaultNamespace() { + return this.icebergsSparkCatalog.defaultNamespace(); + } + + @Override + public String[][] listNamespaces() { + return this.icebergsSparkCatalog.listNamespaces(); + } + + @Override + public String[][] listNamespaces(String[] namespace) throws NoSuchNamespaceException { + return this.icebergsSparkCatalog.listNamespaces(namespace); + } + + @Override + public Map loadNamespaceMetadata(String[] namespace) + throws NoSuchNamespaceException { + return this.icebergsSparkCatalog.loadNamespaceMetadata(namespace); + } + + @Override + public void createNamespace(String[] namespace, Map metadata) + throws NamespaceAlreadyExistsException { + this.icebergsSparkCatalog.createNamespace(namespace, metadata); + } + + @Override + public void alterNamespace(String[] namespace, NamespaceChange... changes) + throws NoSuchNamespaceException { + this.icebergsSparkCatalog.alterNamespace(namespace, changes); + } + + @Override + public boolean dropNamespace(String[] namespace, boolean cascade) + throws NoSuchNamespaceException { + return this.icebergsSparkCatalog.dropNamespace(namespace, cascade); + } + + @Override + public Identifier[] listViews(String... namespace) { + return this.icebergsSparkCatalog.listViews(namespace); + } + + @Override + public View loadView(Identifier ident) throws NoSuchViewException { + return this.icebergsSparkCatalog.loadView(ident); + } + + @Override + public View createView(ViewInfo viewInfo) + throws ViewAlreadyExistsException, NoSuchNamespaceException { + return this.icebergsSparkCatalog.createView(viewInfo); + } + + @Override + public View alterView(Identifier ident, ViewChange... changes) + throws NoSuchViewException, IllegalArgumentException { + return this.icebergsSparkCatalog.alterView(ident, changes); + } + + @Override + public boolean dropView(Identifier ident) { + return this.icebergsSparkCatalog.dropView(ident); + } + + @Override + public void renameView(Identifier fromIdentifier, Identifier toIdentifier) + throws NoSuchViewException, ViewAlreadyExistsException { + this.icebergsSparkCatalog.renameView(fromIdentifier, toIdentifier); + } + + @Override + public View replaceView(ViewInfo viewInfo, boolean orCreate) + throws NoSuchNamespaceException, NoSuchViewException { + return this.icebergsSparkCatalog.replaceView(viewInfo, orCreate); + } + + // This method is required by SupportsReplaceView interface from Iceberg + // It provides backward compatibility with the old API signature + @Override + public View replaceView( + Identifier ident, + String sql, + String currentCatalog, + String[] currentNamespace, + StructType schema, + String[] queryColumnNames, + String[] columnAliases, + String[] columnComments, + Map properties) + throws NoSuchNamespaceException, NoSuchViewException { + // Delegate to the new API by creating a ViewInfo object + ViewInfo viewInfo = + new ViewInfo( + ident, + sql, + currentCatalog, + currentNamespace, + schema, + queryColumnNames, + columnAliases, + columnComments, + properties); + return this.icebergsSparkCatalog.replaceView(viewInfo, false); + } +} diff --git a/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/rest/CreateGenericTableRESTRequest.java b/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/rest/CreateGenericTableRESTRequest.java new file mode 100644 index 0000000000..644fcc1c1d --- /dev/null +++ b/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/rest/CreateGenericTableRESTRequest.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.spark.rest; + +import java.util.Map; +import org.apache.iceberg.rest.RESTRequest; +import org.apache.iceberg.shaded.com.fasterxml.jackson.annotation.JsonCreator; +import org.apache.iceberg.shaded.com.fasterxml.jackson.annotation.JsonProperty; + +/** + * RESTRequest definition for CreateGenericTable which extends the iceberg RESTRequest. This is + * currently required because the Iceberg HTTPClient requires the request and response to be a class + * of RESTRequest and RESTResponse. + */ +public class CreateGenericTableRESTRequest extends CreateGenericTableRequest + implements RESTRequest { + + @JsonCreator + public CreateGenericTableRESTRequest( + @JsonProperty(value = "name", required = true) String name, + @JsonProperty(value = "format", required = true) String format, + @JsonProperty(value = "base-location") String baseLocation, + @JsonProperty(value = "doc") String doc, + @JsonProperty(value = "properties") Map properties) { + super(name, format, baseLocation, doc, properties); + } + + public CreateGenericTableRESTRequest(CreateGenericTableRequest request) { + this( + request.getName(), + request.getFormat(), + request.getBaseLocation(), + request.getDoc(), + request.getProperties()); + } + + @Override + public void validate() {} +} diff --git a/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/rest/CreateGenericTableRequest.java b/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/rest/CreateGenericTableRequest.java new file mode 100644 index 0000000000..9d4021012f --- /dev/null +++ b/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/rest/CreateGenericTableRequest.java @@ -0,0 +1,187 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.spark.rest; + +import jakarta.validation.constraints.NotNull; +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; +import org.apache.iceberg.shaded.com.fasterxml.jackson.annotation.JsonCreator; +import org.apache.iceberg.shaded.com.fasterxml.jackson.annotation.JsonProperty; + +// TODO: auto generate the class based on spec +public class CreateGenericTableRequest { + + @NotNull private final String name; + @NotNull private final String format; + private final String baseLocation; + private final String doc; + private final Map properties; + + /** */ + @JsonProperty(value = "name", required = true) + public String getName() { + return name; + } + + /** */ + @JsonProperty(value = "format", required = true) + public String getFormat() { + return format; + } + + /** */ + @JsonProperty(value = "base-location") + public String getBaseLocation() { + return baseLocation; + } + + /** */ + @JsonProperty(value = "doc") + public String getDoc() { + return doc; + } + + /** */ + @JsonProperty(value = "properties") + public Map getProperties() { + return properties; + } + + @JsonCreator + public CreateGenericTableRequest( + @JsonProperty(value = "name", required = true) String name, + @JsonProperty(value = "format", required = true) String format, + @JsonProperty(value = "base-location") String baseLocation, + @JsonProperty(value = "doc") String doc, + @JsonProperty(value = "properties") Map properties) { + this.name = name; + this.format = format; + this.baseLocation = baseLocation; + this.doc = doc; + this.properties = Objects.requireNonNullElse(properties, new HashMap<>()); + } + + public CreateGenericTableRequest(String name, String format) { + this.name = name; + this.format = format; + this.baseLocation = null; + this.doc = null; + this.properties = new HashMap<>(); + } + + public static Builder builder() { + return new Builder(); + } + + public static Builder builder(String name, String format) { + return new Builder(name, format); + } + + public static final class Builder { + private String name; + private String format; + private String baseLocation; + private String doc; + private Map properties; + + private Builder() {} + + private Builder(String name, String format) { + this.name = name; + this.format = format; + } + + public Builder setName(String name) { + this.name = name; + return this; + } + + public Builder setFormat(String format) { + this.format = format; + return this; + } + + public Builder setBaseLocation(String baseLocation) { + this.baseLocation = baseLocation; + return this; + } + + public Builder setDoc(String doc) { + this.doc = doc; + return this; + } + + public Builder setProperties(Map properties) { + this.properties = properties; + return this; + } + + public CreateGenericTableRequest build() { + CreateGenericTableRequest inst = + new CreateGenericTableRequest(name, format, baseLocation, doc, properties); + return inst; + } + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (!(o instanceof CreateGenericTableRequest)) { + return false; + } + CreateGenericTableRequest createGenericTableRequest = (CreateGenericTableRequest) o; + return Objects.equals(this.name, createGenericTableRequest.name) + && Objects.equals(this.format, createGenericTableRequest.format) + && Objects.equals(this.baseLocation, createGenericTableRequest.baseLocation) + && Objects.equals(this.doc, createGenericTableRequest.doc) + && Objects.equals(this.properties, createGenericTableRequest.properties); + } + + @Override + public int hashCode() { + return Objects.hash(name, format, baseLocation, doc, properties); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("class CreateGenericTableRequest {\n"); + + sb.append(" name: ").append(toIndentedString(name)).append("\n"); + sb.append(" format: ").append(toIndentedString(format)).append("\n"); + sb.append(" baseLocation: ").append(toIndentedString(baseLocation)).append("\n"); + sb.append(" doc: ").append(toIndentedString(doc)).append("\n"); + sb.append(" properties: ").append(toIndentedString(properties)).append("\n"); + sb.append("}"); + return sb.toString(); + } + + /** + * Convert the given object to string with each line indented by 4 spaces (except the first line). + */ + private String toIndentedString(Object o) { + if (o == null) { + return "null"; + } + return o.toString().replace("\n", "\n "); + } +} diff --git a/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/rest/GenericTable.java b/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/rest/GenericTable.java new file mode 100644 index 0000000000..27ad3bab6f --- /dev/null +++ b/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/rest/GenericTable.java @@ -0,0 +1,186 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.spark.rest; + +import jakarta.validation.constraints.NotNull; +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; +import org.apache.iceberg.shaded.com.fasterxml.jackson.annotation.JsonCreator; +import org.apache.iceberg.shaded.com.fasterxml.jackson.annotation.JsonProperty; + +// TODO: auto generate the class based on spec +public class GenericTable { + + @NotNull private final String name; + @NotNull private final String format; + private final String baseLocation; + private final String doc; + private final Map properties; + + /** */ + @JsonProperty(value = "name", required = true) + public String getName() { + return name; + } + + /** */ + @JsonProperty(value = "format", required = true) + public String getFormat() { + return format; + } + + /** */ + @JsonProperty(value = "base-location") + public String getBaseLocation() { + return baseLocation; + } + + /** */ + @JsonProperty(value = "doc") + public String getDoc() { + return doc; + } + + /** */ + @JsonProperty(value = "properties") + public Map getProperties() { + return properties; + } + + @JsonCreator + public GenericTable( + @JsonProperty(value = "name", required = true) String name, + @JsonProperty(value = "format", required = true) String format, + @JsonProperty(value = "base-location") String baseLocation, + @JsonProperty(value = "doc") String doc, + @JsonProperty(value = "properties") Map properties) { + this.name = name; + this.format = format; + this.baseLocation = baseLocation; + this.doc = doc; + this.properties = Objects.requireNonNullElse(properties, new HashMap<>()); + } + + public GenericTable(String name, String format) { + this.name = name; + this.format = format; + this.baseLocation = null; + this.doc = null; + this.properties = new HashMap<>(); + } + + public static Builder builder() { + return new Builder(); + } + + public static Builder builder(String name, String format) { + return new Builder(name, format); + } + + public static final class Builder { + private String name; + private String format; + private String baseLocation; + private String doc; + private Map properties; + + private Builder() {} + + private Builder(String name, String format) { + this.name = name; + this.format = format; + } + + public Builder setName(String name) { + this.name = name; + return this; + } + + public Builder setFormat(String format) { + this.format = format; + return this; + } + + public Builder setBaseLocation(String baseLocation) { + this.baseLocation = baseLocation; + return this; + } + + public Builder setDoc(String doc) { + this.doc = doc; + return this; + } + + public Builder setProperties(Map properties) { + this.properties = properties; + return this; + } + + public GenericTable build() { + GenericTable inst = new GenericTable(name, format, baseLocation, doc, properties); + return inst; + } + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (!(o instanceof GenericTable)) { + return false; + } + GenericTable genericTable = (GenericTable) o; + return Objects.equals(this.name, genericTable.name) + && Objects.equals(this.format, genericTable.format) + && Objects.equals(this.baseLocation, genericTable.baseLocation) + && Objects.equals(this.doc, genericTable.doc) + && Objects.equals(this.properties, genericTable.properties); + } + + @Override + public int hashCode() { + return Objects.hash(name, format, baseLocation, doc, properties); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("class GenericTable {\n"); + + sb.append(" name: ").append(toIndentedString(name)).append("\n"); + sb.append(" format: ").append(toIndentedString(format)).append("\n"); + sb.append(" baseLocation: ").append(toIndentedString(baseLocation)).append("\n"); + sb.append(" doc: ").append(toIndentedString(doc)).append("\n"); + sb.append(" properties: ").append(toIndentedString(properties)).append("\n"); + sb.append("}"); + return sb.toString(); + } + + /** + * Convert the given object to string with each line indented by 4 spaces (except the first line). + */ + private String toIndentedString(Object o) { + if (o == null) { + return "null"; + } + return o.toString().replace("\n", "\n "); + } +} diff --git a/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/rest/ListGenericTablesRESTResponse.java b/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/rest/ListGenericTablesRESTResponse.java new file mode 100644 index 0000000000..55205d30f5 --- /dev/null +++ b/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/rest/ListGenericTablesRESTResponse.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.spark.rest; + +import java.util.Set; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.rest.RESTResponse; +import org.apache.iceberg.shaded.com.fasterxml.jackson.annotation.JsonCreator; +import org.apache.iceberg.shaded.com.fasterxml.jackson.annotation.JsonProperty; + +/** + * RESTResponse definition for ListGenericTable which extends the iceberg RESTResponse. This is + * currently required because the Iceberg HTTPClient requires the request and response to be a class + * of RESTRequest and RESTResponse. + */ +public class ListGenericTablesRESTResponse extends ListGenericTablesResponse + implements RESTResponse { + + @JsonCreator + public ListGenericTablesRESTResponse( + @JsonProperty(value = "next-page-token") String nextPageToken, + @JsonProperty(value = "identifiers") Set identifiers) { + super(nextPageToken, identifiers); + } + + @Override + public void validate() {} +} diff --git a/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/rest/ListGenericTablesResponse.java b/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/rest/ListGenericTablesResponse.java new file mode 100644 index 0000000000..1e7369ed45 --- /dev/null +++ b/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/rest/ListGenericTablesResponse.java @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.spark.rest; + +import jakarta.validation.Valid; +import java.util.LinkedHashSet; +import java.util.Objects; +import java.util.Set; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.shaded.com.fasterxml.jackson.annotation.JsonCreator; +import org.apache.iceberg.shaded.com.fasterxml.jackson.annotation.JsonProperty; + +// TODO: auto generate the class based on spec +public class ListGenericTablesResponse { + + private final String nextPageToken; + @Valid private final Set<@Valid TableIdentifier> identifiers; + + /** + * An opaque token that allows clients to make use of pagination for list APIs (e.g. ListTables). + * Clients may initiate the first paginated request by sending an empty query parameter + * `pageToken` to the server. Servers that support pagination should identify the + * `pageToken` parameter and return a `next-page-token` in the response if + * there are more results available. After the initial request, the value of + * `next-page-token` from each response must be used as the `pageToken` + * parameter value for the next request. The server must return `null` value for the + * `next-page-token` in the last response. Servers that support pagination must return + * all results in a single response with the value of `next-page-token` set to + * `null` if the query parameter `pageToken` is not set in the request. + * Servers that do not support pagination should ignore the `pageToken` parameter and + * return all results in a single response. The `next-page-token` must be omitted from + * the response. Clients must interpret either `null` or missing response value of + * `next-page-token` as the end of the listing results. + */ + @JsonProperty(value = "next-page-token") + public String getNextPageToken() { + return nextPageToken; + } + + /** */ + @JsonProperty(value = "identifiers") + public Set<@Valid TableIdentifier> getIdentifiers() { + return identifiers; + } + + @JsonCreator + public ListGenericTablesResponse( + @JsonProperty(value = "next-page-token") String nextPageToken, + @JsonProperty(value = "identifiers") Set<@Valid TableIdentifier> identifiers) { + this.nextPageToken = nextPageToken; + this.identifiers = Objects.requireNonNullElse(identifiers, new LinkedHashSet<>()); + } + + public static Builder builder() { + return new Builder(); + } + + public static final class Builder { + private String nextPageToken; + private Set<@Valid TableIdentifier> identifiers; + + private Builder() {} + + public Builder setNextPageToken(String nextPageToken) { + this.nextPageToken = nextPageToken; + return this; + } + + public Builder setIdentifiers(Set<@Valid TableIdentifier> identifiers) { + this.identifiers = identifiers; + return this; + } + + public ListGenericTablesResponse build() { + ListGenericTablesResponse inst = new ListGenericTablesResponse(nextPageToken, identifiers); + return inst; + } + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (!(o instanceof ListGenericTablesResponse)) { + return false; + } + ListGenericTablesResponse listGenericTablesResponse = (ListGenericTablesResponse) o; + return Objects.equals(this.nextPageToken, listGenericTablesResponse.nextPageToken) + && Objects.equals(this.identifiers, listGenericTablesResponse.identifiers); + } + + @Override + public int hashCode() { + return Objects.hash(nextPageToken, identifiers); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("class ListGenericTablesResponse {\n"); + + sb.append(" nextPageToken: ").append(toIndentedString(nextPageToken)).append("\n"); + sb.append(" identifiers: ").append(toIndentedString(identifiers)).append("\n"); + sb.append("}"); + return sb.toString(); + } + + /** + * Convert the given object to string with each line indented by 4 spaces (except the first line). + */ + private String toIndentedString(Object o) { + if (o == null) { + return "null"; + } + return o.toString().replace("\n", "\n "); + } +} diff --git a/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/rest/LoadGenericTableRESTResponse.java b/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/rest/LoadGenericTableRESTResponse.java new file mode 100644 index 0000000000..ae9999dd58 --- /dev/null +++ b/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/rest/LoadGenericTableRESTResponse.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.spark.rest; + +import org.apache.iceberg.rest.RESTResponse; +import org.apache.iceberg.shaded.com.fasterxml.jackson.annotation.JsonCreator; +import org.apache.iceberg.shaded.com.fasterxml.jackson.annotation.JsonProperty; + +/** + * RESTResponse definition for LoadGenericTable which extends the iceberg RESTResponse. This is + * currently required because the Iceberg HTTPClient requires the request and response to be a class + * of RESTRequest and RESTResponse. + */ +public class LoadGenericTableRESTResponse extends LoadGenericTableResponse implements RESTResponse { + + @JsonCreator + public LoadGenericTableRESTResponse( + @JsonProperty(value = "table", required = true) GenericTable table) { + super(table); + } + + @Override + public void validate() {} +} diff --git a/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/rest/LoadGenericTableResponse.java b/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/rest/LoadGenericTableResponse.java new file mode 100644 index 0000000000..1923db1225 --- /dev/null +++ b/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/rest/LoadGenericTableResponse.java @@ -0,0 +1,108 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.spark.rest; + +import jakarta.validation.Valid; +import jakarta.validation.constraints.NotNull; +import java.util.Objects; +import org.apache.iceberg.shaded.com.fasterxml.jackson.annotation.JsonCreator; +import org.apache.iceberg.shaded.com.fasterxml.jackson.annotation.JsonProperty; + +// TODO: auto generate the class based on spec +public class LoadGenericTableResponse { + + @NotNull @Valid private final GenericTable table; + + /** */ + @JsonProperty(value = "table", required = true) + public GenericTable getTable() { + return table; + } + + @JsonCreator + public LoadGenericTableResponse( + @JsonProperty(value = "table", required = true) GenericTable table) { + this.table = table; + } + + public static Builder builder() { + return new Builder(); + } + + public static Builder builder(GenericTable table) { + return new Builder(table); + } + + public static final class Builder { + private GenericTable table; + + private Builder() {} + + private Builder(GenericTable table) { + this.table = table; + } + + public Builder setTable(GenericTable table) { + this.table = table; + return this; + } + + public LoadGenericTableResponse build() { + LoadGenericTableResponse inst = new LoadGenericTableResponse(table); + return inst; + } + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (!(o instanceof LoadGenericTableResponse)) { + return false; + } + LoadGenericTableResponse loadGenericTableResponse = (LoadGenericTableResponse) o; + return Objects.equals(this.table, loadGenericTableResponse.table); + } + + @Override + public int hashCode() { + return Objects.hash(table); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("class LoadGenericTableResponse {\n"); + + sb.append(" table: ").append(toIndentedString(table)).append("\n"); + sb.append("}"); + return sb.toString(); + } + + /** + * Convert the given object to string with each line indented by 4 spaces (except the first line). + */ + private String toIndentedString(Object o) { + if (o == null) { + return "null"; + } + return o.toString().replace("\n", "\n "); + } +} diff --git a/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/utils/DeltaHelper.java b/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/utils/DeltaHelper.java new file mode 100644 index 0000000000..2974384247 --- /dev/null +++ b/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/utils/DeltaHelper.java @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.spark.utils; + +import java.lang.reflect.Field; +import java.lang.reflect.Method; +import org.apache.iceberg.common.DynConstructors; +import org.apache.polaris.spark.PolarisSparkCatalog; +import org.apache.spark.sql.connector.catalog.DelegatingCatalogExtension; +import org.apache.spark.sql.connector.catalog.TableCatalog; +import org.apache.spark.sql.util.CaseInsensitiveStringMap; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class DeltaHelper { + private static final Logger LOG = LoggerFactory.getLogger(DeltaHelper.class); + + public static final String DELTA_CATALOG_IMPL_KEY = "delta-catalog-impl"; + private static final String DEFAULT_DELTA_CATALOG_CLASS = + "org.apache.spark.sql.delta.catalog.DeltaCatalog"; + + private TableCatalog deltaCatalog = null; + private String deltaCatalogImpl = DEFAULT_DELTA_CATALOG_CLASS; + + public DeltaHelper(CaseInsensitiveStringMap options) { + if (options.get(DELTA_CATALOG_IMPL_KEY) != null) { + this.deltaCatalogImpl = options.get(DELTA_CATALOG_IMPL_KEY); + } + } + + public TableCatalog loadDeltaCatalog(PolarisSparkCatalog polarisSparkCatalog) { + if (this.deltaCatalog != null) { + return this.deltaCatalog; + } + + DynConstructors.Ctor ctor; + try { + ctor = DynConstructors.builder(TableCatalog.class).impl(deltaCatalogImpl).buildChecked(); + } catch (NoSuchMethodException e) { + throw new IllegalArgumentException( + String.format("Cannot initialize Delta Catalog %s: %s", deltaCatalogImpl, e.getMessage()), + e); + } + + try { + this.deltaCatalog = ctor.newInstance(); + } catch (ClassCastException e) { + throw new IllegalArgumentException( + String.format( + "Cannot initialize Delta Catalog, %s does not implement Table Catalog.", + deltaCatalogImpl), + e); + } + + // set the polaris spark catalog as the delegate catalog of delta catalog + ((DelegatingCatalogExtension) this.deltaCatalog).setDelegateCatalog(polarisSparkCatalog); + + // We want to behave exactly the same as unity catalog for Delta. However, DeltaCatalog + // implementation today is hard coded for unity catalog. Following issue is used to track + // the extension of the usage https://github.com/delta-io/delta/issues/4306. + // Here, we use reflection to set the isUnityCatalog to true for exactly same behavior as + // unity catalog for now. + try { + // isUnityCatalog is a lazy val, access the compute method for the lazy val + // make sure the method is triggered before the value is set, otherwise, the + // value will be overwritten later when the method is triggered. + String methodGetName = "isUnityCatalog" + "$lzycompute"; + Method method = this.deltaCatalog.getClass().getDeclaredMethod(methodGetName); + method.setAccessible(true); + // invoke the lazy methods before it is set + method.invoke(this.deltaCatalog); + } catch (NoSuchMethodException e) { + LOG.warn("No lazy compute method found for variable isUnityCatalog"); + } catch (Exception e) { + throw new RuntimeException("Failed to invoke the lazy compute methods for isUnityCatalog", e); + } + + try { + Field field = this.deltaCatalog.getClass().getDeclaredField("isUnityCatalog"); + field.setAccessible(true); + field.set(this.deltaCatalog, true); + } catch (NoSuchFieldException e) { + throw new RuntimeException( + "Failed find the isUnityCatalog field, delta-spark version >= 3.2.1 is required", e); + } catch (IllegalAccessException e) { + throw new RuntimeException("Failed to set the isUnityCatalog field", e); + } + + return this.deltaCatalog; + } +} diff --git a/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/utils/PolarisCatalogUtils.java b/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/utils/PolarisCatalogUtils.java new file mode 100644 index 0000000000..98016b71fd --- /dev/null +++ b/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/utils/PolarisCatalogUtils.java @@ -0,0 +1,126 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.spark.utils; + +import com.google.common.collect.Maps; +import java.lang.reflect.Field; +import java.util.Map; +import org.apache.iceberg.CachingCatalog; +import org.apache.iceberg.catalog.Catalog; +import org.apache.iceberg.rest.RESTCatalog; +import org.apache.iceberg.rest.RESTSessionCatalog; +import org.apache.iceberg.rest.auth.OAuth2Util; +import org.apache.iceberg.spark.SparkCatalog; +import org.apache.polaris.spark.rest.GenericTable; +import org.apache.spark.sql.SparkSession; +import org.apache.spark.sql.connector.catalog.Table; +import org.apache.spark.sql.connector.catalog.TableCatalog; +import org.apache.spark.sql.connector.catalog.TableProvider; +import org.apache.spark.sql.execution.datasources.DataSource; +import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Utils; +import org.apache.spark.sql.util.CaseInsensitiveStringMap; + +public class PolarisCatalogUtils { + public static final String TABLE_PROVIDER_KEY = "provider"; + public static final String TABLE_PATH_KEY = "path"; + + /** Check whether the table provider is iceberg. */ + public static boolean useIceberg(String provider) { + return provider == null || "iceberg".equalsIgnoreCase(provider); + } + + /** Check whether the table provider is delta. */ + public static boolean useDelta(String provider) { + return "delta".equalsIgnoreCase(provider); + } + + /** + * For tables whose location is managed by Spark Session Catalog, there will be no location or + * path in the properties. + */ + public static boolean isTableWithSparkManagedLocation(Map properties) { + boolean hasLocationClause = properties.containsKey(TableCatalog.PROP_LOCATION); + boolean hasPathClause = properties.containsKey(TABLE_PATH_KEY); + return !hasLocationClause && !hasPathClause; + } + + /** + * Load spark table using DataSourceV2. + * + * @return V2Table if DataSourceV2 is available for the table format. For delta table, it returns + * DeltaTableV2. + */ + public static Table loadSparkTable(GenericTable genericTable) { + SparkSession sparkSession = SparkSession.active(); + TableProvider provider = + DataSource.lookupDataSourceV2(genericTable.getFormat(), sparkSession.sessionState().conf()) + .get(); + Map properties = genericTable.getProperties(); + boolean hasLocationClause = properties.get(TableCatalog.PROP_LOCATION) != null; + boolean hasPathClause = properties.get(TABLE_PATH_KEY) != null; + Map tableProperties = Maps.newHashMap(); + tableProperties.putAll(properties); + if (!hasPathClause) { + // DataSourceV2 requires the path property on table loading. However, spark today + // doesn't create the corresponding path property if the path keyword is not + // provided by user when location is provided. Here, we duplicate the location + // property as path to make sure the table can be loaded. + if (genericTable.getBaseLocation() != null && !genericTable.getBaseLocation().isEmpty()) { + tableProperties.put(TABLE_PATH_KEY, genericTable.getBaseLocation()); + } else if (hasLocationClause) { + tableProperties.put(TABLE_PATH_KEY, properties.get(TableCatalog.PROP_LOCATION)); + } + } + return DataSourceV2Utils.getTableFromProvider( + provider, new CaseInsensitiveStringMap(tableProperties), scala.Option.empty()); + } + + /** + * Get the catalogAuth field inside the RESTSessionCatalog used by Iceberg Spark Catalog use + * reflection. TODO: Deprecate this function once the iceberg client is updated to 1.9.0 to use + * AuthManager and the capability of injecting an AuthManger is available. Related iceberg PR: + * https://github.com/apache/iceberg/pull/12655 + */ + public static OAuth2Util.AuthSession getAuthSession(SparkCatalog sparkCatalog) { + try { + Field icebergCatalogField = sparkCatalog.getClass().getDeclaredField("icebergCatalog"); + icebergCatalogField.setAccessible(true); + Catalog icebergCatalog = (Catalog) icebergCatalogField.get(sparkCatalog); + RESTCatalog icebergRestCatalog; + if (icebergCatalog instanceof CachingCatalog) { + Field catalogField = icebergCatalog.getClass().getDeclaredField("catalog"); + catalogField.setAccessible(true); + icebergRestCatalog = (RESTCatalog) catalogField.get(icebergCatalog); + } else { + icebergRestCatalog = (RESTCatalog) icebergCatalog; + } + + Field sessionCatalogField = icebergRestCatalog.getClass().getDeclaredField("sessionCatalog"); + sessionCatalogField.setAccessible(true); + RESTSessionCatalog sessionCatalog = + (RESTSessionCatalog) sessionCatalogField.get(icebergRestCatalog); + + Field authField = sessionCatalog.getClass().getDeclaredField("catalogAuth"); + authField.setAccessible(true); + return (OAuth2Util.AuthSession) authField.get(sessionCatalog); + } catch (Exception e) { + throw new RuntimeException("Failed to get the catalogAuth from the Iceberg SparkCatalog", e); + } + } +} diff --git a/plugins/spark/v4.0/spark/src/test/java/org/apache/polaris/spark/NoopDeltaCatalog.java b/plugins/spark/v4.0/spark/src/test/java/org/apache/polaris/spark/NoopDeltaCatalog.java new file mode 100644 index 0000000000..f698615e67 --- /dev/null +++ b/plugins/spark/v4.0/spark/src/test/java/org/apache/polaris/spark/NoopDeltaCatalog.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.spark; + +import org.apache.spark.sql.catalyst.analysis.NoSuchTableException; +import org.apache.spark.sql.connector.catalog.DelegatingCatalogExtension; +import org.apache.spark.sql.connector.catalog.Identifier; +import org.apache.spark.sql.connector.catalog.Table; +import org.apache.spark.sql.connector.catalog.TableChange; + +/** + * This is a fake delta catalog class that is used for testing. This class is a noop class that + * directly passes all calls to the delegate CatalogPlugin configured as part of + * DelegatingCatalogExtension. + */ +public class NoopDeltaCatalog extends DelegatingCatalogExtension { + // This is a mock of isUnityCatalog scala val in + // org.apache.spark.sql.delta.catalog.DeltaCatalog. + private boolean isUnityCatalog = false; + + @Override + public Table alterTable(Identifier ident, TableChange... changes) throws NoSuchTableException { + return super.loadTable(ident); + } +} diff --git a/plugins/spark/v4.0/spark/src/test/java/org/apache/polaris/spark/PolarisInMemoryCatalog.java b/plugins/spark/v4.0/spark/src/test/java/org/apache/polaris/spark/PolarisInMemoryCatalog.java new file mode 100644 index 0000000000..2d71d9cb6e --- /dev/null +++ b/plugins/spark/v4.0/spark/src/test/java/org/apache/polaris/spark/PolarisInMemoryCatalog.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.spark; + +import com.google.common.collect.Maps; +import java.util.Comparator; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentMap; +import java.util.stream.Collectors; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.exceptions.AlreadyExistsException; +import org.apache.iceberg.exceptions.NoSuchNamespaceException; +import org.apache.iceberg.exceptions.NoSuchTableException; +import org.apache.iceberg.inmemory.InMemoryCatalog; +import org.apache.polaris.spark.rest.GenericTable; + +/** InMemory implementation for the Polaris Catalog. This class is mainly used by testing. */ +public class PolarisInMemoryCatalog extends InMemoryCatalog implements PolarisCatalog { + private final ConcurrentMap genericTables; + + public PolarisInMemoryCatalog() { + this.genericTables = Maps.newConcurrentMap(); + } + + @Override + public List listGenericTables(Namespace ns) { + return this.genericTables.keySet().stream() + .filter(t -> t.namespace().equals(ns)) + .sorted(Comparator.comparing(TableIdentifier::toString)) + .collect(Collectors.toList()); + } + + @Override + public GenericTable loadGenericTable(TableIdentifier identifier) { + GenericTable table = this.genericTables.get(identifier); + if (table == null) { + throw new NoSuchTableException("Generic table does not exist: %s", identifier); + } + + return table; + } + + @Override + public boolean dropGenericTable(TableIdentifier identifier) { + return null != this.genericTables.remove(identifier); + } + + @Override + public GenericTable createGenericTable( + TableIdentifier identifier, + String format, + String baseLocation, + String doc, + Map props) { + if (!namespaceExists(identifier.namespace())) { + throw new NoSuchNamespaceException( + "Cannot create generic table %s. Namespace does not exist: %s", + identifier, identifier.namespace()); + } + + GenericTable previous = + this.genericTables.putIfAbsent( + identifier, + GenericTable.builder() + .setName(identifier.name()) + .setFormat(format) + .setBaseLocation(baseLocation) + .setProperties(props) + .build()); + + if (previous != null) { + throw new AlreadyExistsException("Generic table already exists: %s", identifier); + } + + return this.genericTables.get(identifier); + } +} diff --git a/plugins/spark/v4.0/spark/src/test/java/org/apache/polaris/spark/SparkCatalogTest.java b/plugins/spark/v4.0/spark/src/test/java/org/apache/polaris/spark/SparkCatalogTest.java new file mode 100644 index 0000000000..708bf60e7c --- /dev/null +++ b/plugins/spark/v4.0/spark/src/test/java/org/apache/polaris/spark/SparkCatalogTest.java @@ -0,0 +1,642 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.spark; + +import static org.apache.iceberg.CatalogProperties.CATALOG_IMPL; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import com.google.common.collect.Maps; +import java.util.Arrays; +import java.util.Map; +import java.util.UUID; +import org.apache.hadoop.conf.Configuration; +import org.apache.iceberg.CatalogUtil; +import org.apache.iceberg.actions.DeleteReachableFiles; +import org.apache.iceberg.catalog.Catalog; +import org.apache.iceberg.spark.SparkUtil; +import org.apache.iceberg.spark.actions.DeleteReachableFilesSparkAction; +import org.apache.iceberg.spark.actions.SparkActions; +import org.apache.iceberg.spark.source.SparkTable; +import org.apache.polaris.spark.utils.DeltaHelper; +import org.apache.polaris.spark.utils.PolarisCatalogUtils; +import org.apache.spark.SparkContext; +import org.apache.spark.sql.SparkSession; +import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException; +import org.apache.spark.sql.catalyst.analysis.NoSuchTableException; +import org.apache.spark.sql.catalyst.analysis.NoSuchViewException; +import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException; +import org.apache.spark.sql.connector.catalog.Identifier; +import org.apache.spark.sql.connector.catalog.NamespaceChange; +import org.apache.spark.sql.connector.catalog.Table; +import org.apache.spark.sql.connector.catalog.TableCatalog; +import org.apache.spark.sql.connector.catalog.TableChange; +import org.apache.spark.sql.connector.catalog.TableProvider; +import org.apache.spark.sql.connector.catalog.V1Table; +import org.apache.spark.sql.connector.catalog.View; +import org.apache.spark.sql.connector.catalog.ViewChange; +import org.apache.spark.sql.connector.catalog.ViewInfo; +import org.apache.spark.sql.connector.expressions.Transform; +import org.apache.spark.sql.execution.datasources.DataSource; +import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Utils; +import org.apache.spark.sql.internal.SQLConf; +import org.apache.spark.sql.internal.SessionState; +import org.apache.spark.sql.types.StructType; +import org.apache.spark.sql.util.CaseInsensitiveStringMap; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; +import org.mockito.MockedStatic; +import org.mockito.Mockito; +import scala.Option; + +public class SparkCatalogTest { + private static class InMemoryIcebergSparkCatalog extends org.apache.iceberg.spark.SparkCatalog { + private PolarisInMemoryCatalog inMemoryCatalog = null; + + @Override + protected Catalog buildIcebergCatalog(String name, CaseInsensitiveStringMap options) { + PolarisInMemoryCatalog inMemoryCatalog = new PolarisInMemoryCatalog(); + inMemoryCatalog.initialize(name, options); + + this.inMemoryCatalog = inMemoryCatalog; + + return inMemoryCatalog; + } + + public PolarisInMemoryCatalog getInMemoryCatalog() { + return this.inMemoryCatalog; + } + } + + /** + * And SparkCatalog implementation that uses InMemory catalog implementation for both Iceberg and + * Polaris + */ + private static class InMemorySparkCatalog extends SparkCatalog { + @Override + public void initialize(String name, CaseInsensitiveStringMap options) { + this.catalogName = name; + // initialize the InMemory icebergSparkCatalog + this.icebergsSparkCatalog = new InMemoryIcebergSparkCatalog(); + this.icebergsSparkCatalog.initialize(name, options); + + // initialize the polarisSparkCatalog with PolarisSparkCatalog + this.polarisSparkCatalog = + new PolarisSparkCatalog( + ((InMemoryIcebergSparkCatalog) this.icebergsSparkCatalog).getInMemoryCatalog()); + this.polarisSparkCatalog.initialize(name, options); + + this.deltaHelper = new DeltaHelper(options); + } + } + + private InMemorySparkCatalog catalog; + private String catalogName; + + private static final String[] defaultNS = new String[] {"ns"}; + private static StructType defaultSchema = + new StructType().add("id", "long").add("name", "string"); + + @BeforeEach + public void setup() throws Exception { + catalogName = "test_" + UUID.randomUUID(); + Map catalogConfig = Maps.newHashMap(); + catalogConfig.put(CATALOG_IMPL, "org.apache.iceberg.inmemory.InMemoryCatalog"); + catalogConfig.put("cache-enabled", "false"); + catalogConfig.put( + DeltaHelper.DELTA_CATALOG_IMPL_KEY, "org.apache.polaris.spark.NoopDeltaCatalog"); + catalog = new InMemorySparkCatalog(); + Configuration conf = new Configuration(); + try (MockedStatic mockedStaticSparkSession = + Mockito.mockStatic(SparkSession.class); + MockedStatic mockedSparkUtil = Mockito.mockStatic(SparkUtil.class)) { + SparkSession mockedSession = Mockito.mock(SparkSession.class); + mockedStaticSparkSession.when(SparkSession::active).thenReturn(mockedSession); + mockedStaticSparkSession + .when(SparkSession::getActiveSession) + .thenReturn(Option.apply(mockedSession)); + mockedSparkUtil + .when(() -> SparkUtil.hadoopConfCatalogOverrides(mockedSession, catalogName)) + .thenReturn(conf); + SparkContext mockedContext = Mockito.mock(SparkContext.class); + Mockito.when(mockedSession.sparkContext()).thenReturn(mockedContext); + Mockito.when(mockedContext.applicationId()).thenReturn("appId"); + Mockito.when(mockedContext.sparkUser()).thenReturn("test-user"); + Mockito.when(mockedContext.version()).thenReturn("4.0"); + + catalog.initialize(catalogName, new CaseInsensitiveStringMap(catalogConfig)); + } + catalog.createNamespace(defaultNS, Maps.newHashMap()); + } + + @Test + void testCatalogValidation() { + Map catalogConfigWithImpl = Maps.newHashMap(); + catalogConfigWithImpl.put(CATALOG_IMPL, "org.apache.iceberg.inmemory.InMemoryCatalog"); + catalogConfigWithImpl.put("cache-enabled", "false"); + SparkCatalog testCatalog = new SparkCatalog(); + assertThatThrownBy( + () -> + testCatalog.validateAndResolveCatalogOptions( + new CaseInsensitiveStringMap(catalogConfigWithImpl))) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Customized catalog implementation is not supported and not needed"); + + Map catalogConfigInvalidType = Maps.newHashMap(); + catalogConfigInvalidType.put(CatalogUtil.ICEBERG_CATALOG_TYPE, "hive"); + assertThatThrownBy( + () -> + testCatalog.validateAndResolveCatalogOptions( + new CaseInsensitiveStringMap(catalogConfigInvalidType))) + .isInstanceOf(IllegalArgumentException.class); + + CaseInsensitiveStringMap resolvedMap = + testCatalog.validateAndResolveCatalogOptions( + new CaseInsensitiveStringMap(Maps.newHashMap())); + assertThat(resolvedMap.get(CatalogUtil.ICEBERG_CATALOG_TYPE)) + .isEqualTo(CatalogUtil.ICEBERG_CATALOG_TYPE_REST); + } + + @Test + void testCreateAndLoadNamespace() throws Exception { + String[] namespace = new String[] {"ns1"}; + Map metadata = Maps.newHashMap(); + metadata.put("key1", "value1"); + + // no namespace can be found + assertThatThrownBy(() -> catalog.loadNamespaceMetadata(namespace)) + .isInstanceOf(NoSuchNamespaceException.class); + + // create the namespace + catalog.createNamespace(namespace, metadata); + + Map nsMetadata = catalog.loadNamespaceMetadata(namespace); + assertThat(nsMetadata).contains(Map.entry("key1", "value1")); + } + + @Test + void testDropAndListNamespaces() throws Exception { + String[][] lv1ns = new String[][] {{"l1ns1"}, {"l1ns2"}}; + String[][] lv2ns1 = new String[][] {{"l1ns1", "l2ns1"}, {"l1ns1", "l2ns2"}}; + String[][] lv2ns2 = new String[][] {{"l1ns2", "l2ns3"}}; + + // create the namespaces + for (String[] namespace : lv1ns) { + catalog.createNamespace(namespace, Maps.newHashMap()); + } + for (String[] namespace : lv2ns1) { + catalog.createNamespace(namespace, Maps.newHashMap()); + } + for (String[] namespace : lv2ns2) { + catalog.createNamespace(namespace, Maps.newHashMap()); + } + + // list namespaces under root + String[][] lv1nsResult = catalog.listNamespaces(); + assertThat(lv1nsResult.length).isEqualTo(lv1ns.length + 1); + assertThat(Arrays.asList(lv1nsResult)).contains(defaultNS); + for (String[] namespace : lv1ns) { + assertThat(Arrays.asList(lv1nsResult)).contains(namespace); + } + // list namespace under l1ns1 + String[][] lv2ns1Result = catalog.listNamespaces(lv1ns[0]); + assertThat(lv2ns1Result.length).isEqualTo(lv2ns1.length); + for (String[] namespace : lv2ns1) { + assertThat(Arrays.asList(lv2ns1Result)).contains(namespace); + } + // list namespace under l1ns2 + String[][] lv2ns2Result = catalog.listNamespaces(lv1ns[1]); + assertThat(lv2ns2Result.length).isEqualTo(lv2ns2.length); + for (String[] namespace : lv2ns2) { + assertThat(Arrays.asList(lv2ns2Result)).contains(namespace); + } + // no namespace under l1ns2.l2ns3 + assertThat(catalog.listNamespaces(lv2ns2[0]).length).isEqualTo(0); + + // drop l1ns2 + catalog.dropNamespace(lv2ns2[0], true); + assertThat(catalog.listNamespaces(lv1ns[1]).length).isEqualTo(0); + + catalog.dropNamespace(lv1ns[1], true); + assertThatThrownBy(() -> catalog.listNamespaces(lv1ns[1])) + .isInstanceOf(NoSuchNamespaceException.class); + } + + @Test + void testAlterNamespace() throws Exception { + String[] namespace = new String[] {"ns1"}; + Map metadata = Maps.newHashMap(); + metadata.put("orig_key1", "orig_value1"); + + catalog.createNamespace(namespace, metadata); + assertThat(catalog.loadNamespaceMetadata(namespace)) + .contains(Map.entry("orig_key1", "orig_value1")); + + catalog.alterNamespace(namespace, NamespaceChange.setProperty("new_key", "new_value")); + assertThat(catalog.loadNamespaceMetadata(namespace)) + .contains(Map.entry("new_key", "new_value")); + } + + @Test + void testStageOperations() throws Exception { + Identifier createId = Identifier.of(defaultNS, "iceberg-table-create"); + Map icebergProperties = Maps.newHashMap(); + icebergProperties.put("provider", "iceberg"); + icebergProperties.put(TableCatalog.PROP_LOCATION, "file:///tmp/path/to/iceberg-table/"); + StructType iceberg_schema = new StructType().add("boolType", "boolean"); + + catalog.stageCreate(createId, iceberg_schema, new Transform[0], icebergProperties); + + catalog.stageCreateOrReplace(createId, iceberg_schema, new Transform[0], icebergProperties); + } + + @Test + void testBasicViewOperations() throws Exception { + Identifier viewIdentifier = Identifier.of(defaultNS, "test-view"); + String viewSql = "select id from test-table where id < 3"; + StructType schema = new StructType().add("id", "long"); + ViewInfo viewInfo = + new ViewInfo( + viewIdentifier, + viewSql, + catalogName, + defaultNS, + schema, + new String[0], + new String[0], + new String[0], + Maps.newHashMap()); + catalog.createView(viewInfo); + + // load the view + View view = catalog.loadView(viewIdentifier); + assertThat(view.query()).isEqualTo(viewSql); + assertThat(view.schema()).isEqualTo(schema); + + // alter the view properties + catalog.alterView(viewIdentifier, ViewChange.setProperty("view_key1", "view_value1")); + view = catalog.loadView(viewIdentifier); + assertThat(view.properties()).contains(Map.entry("view_key1", "view_value1")); + + // rename the view + Identifier newIdentifier = Identifier.of(defaultNS, "new-view"); + catalog.renameView(viewIdentifier, newIdentifier); + assertThatThrownBy(() -> catalog.loadView(viewIdentifier)) + .isInstanceOf(NoSuchViewException.class); + view = catalog.loadView(newIdentifier); + assertThat(view.query()).isEqualTo(viewSql); + assertThat(view.schema()).isEqualTo(schema); + + // replace the view + String newSql = "select id from test-table where id == 3"; + Map properties = Maps.newHashMap(); + properties.put("key1", "value1"); + catalog.replaceView( + newIdentifier, + newSql, + catalogName, + defaultNS, + schema, + new String[0], + new String[0], + new String[0], + properties); + view = catalog.loadView(newIdentifier); + assertThat(view.query()).isEqualTo(newSql); + assertThat(view.properties()).contains(Map.entry("key1", "value1")); + + // drop the view + catalog.dropView(newIdentifier); + assertThatThrownBy(() -> catalog.loadView(newIdentifier)) + .isInstanceOf(NoSuchViewException.class); + } + + @Test + void testListViews() throws Exception { + // create a new namespace under the default NS + String[] namespace = new String[] {"ns", "nsl2"}; + catalog.createNamespace(namespace, Maps.newHashMap()); + // create under defaultNS + String view1Name = "test-view1"; + String view1SQL = "select id from test-table where id >= 3"; + ViewInfo viewInfo1 = + new ViewInfo( + Identifier.of(defaultNS, view1Name), + view1SQL, + catalogName, + defaultNS, + defaultSchema, + new String[0], + new String[0], + new String[0], + Maps.newHashMap()); + catalog.createView(viewInfo1); + // create two views under ns.nsl2 + String[] nsl2ViewNames = new String[] {"test-view2", "test-view3"}; + String[] nsl2ViewSQLs = + new String[] { + "select id from test-table where id == 3", "select id from test-table where id < 3" + }; + for (int i = 0; i < nsl2ViewNames.length; i++) { + ViewInfo viewInfo = + new ViewInfo( + Identifier.of(namespace, nsl2ViewNames[i]), + nsl2ViewSQLs[i], + catalogName, + namespace, + defaultSchema, + new String[0], + new String[0], + new String[0], + Maps.newHashMap()); + catalog.createView(viewInfo); + } + // list views under defaultNS + Identifier[] l1Views = catalog.listViews(defaultNS); + assertThat(l1Views.length).isEqualTo(1); + assertThat(l1Views[0].name()).isEqualTo(view1Name); + + // list views under ns1.nsl2 + Identifier[] l2Views = catalog.listViews(namespace); + assertThat(l2Views.length).isEqualTo(nsl2ViewSQLs.length); + for (String name : nsl2ViewNames) { + assertThat(Arrays.asList(l2Views)).contains(Identifier.of(namespace, name)); + } + } + + @Test + void testIcebergTableOperations() throws Exception { + Identifier identifier = Identifier.of(defaultNS, "iceberg-table"); + createAndValidateGenericTableWithLoad(catalog, identifier, defaultSchema, "iceberg"); + + // load the table + Table table = catalog.loadTable(identifier); + // verify iceberg SparkTable is loaded + assertThat(table).isInstanceOf(SparkTable.class); + + Identifier[] icebergTables = catalog.listTables(defaultNS); + assertThat(icebergTables.length).isEqualTo(1); + assertThat(icebergTables[0]).isEqualTo(Identifier.of(defaultNS, "iceberg-table")); + + // verify create table with the same identifier fails with spark TableAlreadyExistsException + Map newProperties = Maps.newHashMap(); + newProperties.put(PolarisCatalogUtils.TABLE_PROVIDER_KEY, "iceberg"); + newProperties.put(TableCatalog.PROP_LOCATION, "file:///tmp/path/to/table/"); + assertThatThrownBy( + () -> catalog.createTable(identifier, defaultSchema, new Transform[0], newProperties)) + .isInstanceOf(TableAlreadyExistsException.class); + + // drop the iceberg table + catalog.dropTable(identifier); + assertThatThrownBy(() -> catalog.loadTable(identifier)) + .isInstanceOf(NoSuchTableException.class); + assertThat(catalog.listTables(defaultNS)).isEmpty(); + } + + @ParameterizedTest + @ValueSource(strings = {"delta", "csv"}) + void testCreateAndLoadGenericTable(String format) throws Exception { + Identifier identifier = Identifier.of(defaultNS, "generic-test-table"); + createAndValidateGenericTableWithLoad(catalog, identifier, defaultSchema, format); + + Identifier[] icebergTables = catalog.listTables(defaultNS); + assertThat(icebergTables.length).isEqualTo(1); + assertThat(icebergTables[0]).isEqualTo(Identifier.of(defaultNS, "generic-test-table")); + + Map newProperties = Maps.newHashMap(); + newProperties.put(PolarisCatalogUtils.TABLE_PROVIDER_KEY, "parquet"); + newProperties.put(TableCatalog.PROP_LOCATION, "file:///tmp/path/to/table/"); + assertThatThrownBy( + () -> catalog.createTable(identifier, defaultSchema, new Transform[0], newProperties)) + .isInstanceOf(TableAlreadyExistsException.class); + + // drop the iceberg table + catalog.dropTable(identifier); + assertThatThrownBy(() -> catalog.loadTable(identifier)) + .isInstanceOf(NoSuchTableException.class); + assertThat(catalog.listTables(defaultNS)).isEmpty(); + } + + @Test + void testMixedTables() throws Exception { + // create two iceberg tables, and three non-iceberg tables + String[] tableNames = new String[] {"iceberg1", "iceberg2", "delta1", "csv1", "delta2"}; + String[] tableFormats = new String[] {"iceberg", null, "delta", "csv", "delta"}; + for (int i = 0; i < tableNames.length; i++) { + Identifier identifier = Identifier.of(defaultNS, tableNames[i]); + createAndValidateGenericTableWithLoad(catalog, identifier, defaultSchema, tableFormats[i]); + } + + // list all tables + Identifier[] tableIdents = catalog.listTables(defaultNS); + assertThat(tableIdents.length).isEqualTo(tableNames.length); + for (String name : tableNames) { + assertThat(tableIdents).contains(Identifier.of(defaultNS, name)); + } + + // drop iceberg2 and delta1 table + catalog.dropTable(Identifier.of(defaultNS, "iceberg2")); + catalog.dropTable(Identifier.of(defaultNS, "delta2")); + + String[] remainingTableNames = new String[] {"iceberg1", "delta1", "csv1"}; + Identifier[] remainingTableIndents = catalog.listTables(defaultNS); + assertThat(remainingTableIndents.length).isEqualTo(remainingTableNames.length); + for (String name : remainingTableNames) { + assertThat(tableIdents).contains(Identifier.of(defaultNS, name)); + } + + // drop the remaining tables + for (String name : remainingTableNames) { + catalog.dropTable(Identifier.of(defaultNS, name)); + } + assertThat(catalog.listTables(defaultNS)).isEmpty(); + } + + @Test + void testAlterAndRenameTable() throws Exception { + String icebergTableName = "iceberg-table"; + String deltaTableName = "delta-table"; + String csvTableName = "csv-table"; + Identifier icebergIdent = Identifier.of(defaultNS, icebergTableName); + Identifier deltaIdent = Identifier.of(defaultNS, deltaTableName); + Identifier csvIdent = Identifier.of(defaultNS, csvTableName); + createAndValidateGenericTableWithLoad(catalog, icebergIdent, defaultSchema, "iceberg"); + createAndValidateGenericTableWithLoad(catalog, deltaIdent, defaultSchema, "delta"); + createAndValidateGenericTableWithLoad(catalog, csvIdent, defaultSchema, "csv"); + + // verify alter iceberg table + Table newIcebergTable = + catalog.alterTable(icebergIdent, TableChange.setProperty("iceberg_key", "iceberg_value")); + assertThat(newIcebergTable).isInstanceOf(SparkTable.class); + assertThat(newIcebergTable.properties()).contains(Map.entry("iceberg_key", "iceberg_value")); + + // verify rename iceberg table works + Identifier newIcebergIdent = Identifier.of(defaultNS, "new-iceberg-table"); + catalog.renameTable(icebergIdent, newIcebergIdent); + assertThatThrownBy(() -> catalog.loadTable(icebergIdent)) + .isInstanceOf(NoSuchTableException.class); + Table icebergTable = catalog.loadTable(newIcebergIdent); + assertThat(icebergTable).isInstanceOf(SparkTable.class); + + // verify alter delta table is a no-op, and alter csv table throws an exception + SQLConf conf = new SQLConf(); + try (MockedStatic mockedStaticSparkSession = + Mockito.mockStatic(SparkSession.class); + MockedStatic mockedStaticDS = Mockito.mockStatic(DataSource.class); + MockedStatic mockedStaticDSV2 = + Mockito.mockStatic(DataSourceV2Utils.class)) { + SparkSession mockedSession = Mockito.mock(SparkSession.class); + mockedStaticSparkSession.when(SparkSession::active).thenReturn(mockedSession); + SessionState mockedState = Mockito.mock(SessionState.class); + Mockito.when(mockedSession.sessionState()).thenReturn(mockedState); + Mockito.when(mockedState.conf()).thenReturn(conf); + + TableProvider deltaProvider = Mockito.mock(TableProvider.class); + mockedStaticDS + .when(() -> DataSource.lookupDataSourceV2(Mockito.eq("delta"), Mockito.any())) + .thenReturn(Option.apply(deltaProvider)); + V1Table deltaTable = Mockito.mock(V1Table.class); + Map deltaProps = Maps.newHashMap(); + deltaProps.put(PolarisCatalogUtils.TABLE_PROVIDER_KEY, "delta"); + deltaProps.put(TableCatalog.PROP_LOCATION, "file:///tmp/delta/path/to/table/test-delta/"); + Mockito.when(deltaTable.properties()).thenReturn(deltaProps); + mockedStaticDSV2 + .when( + () -> + DataSourceV2Utils.getTableFromProvider( + Mockito.eq(deltaProvider), Mockito.any(), Mockito.any())) + .thenReturn(deltaTable); + + Table delta = + catalog.alterTable(deltaIdent, TableChange.setProperty("delta_key", "delta_value")); + assertThat(delta).isInstanceOf(V1Table.class); + + TableProvider csvProvider = Mockito.mock(TableProvider.class); + mockedStaticDS + .when(() -> DataSource.lookupDataSourceV2(Mockito.eq("csv"), Mockito.any())) + .thenReturn(Option.apply(csvProvider)); + Map csvProps = Maps.newHashMap(); + csvProps.put(PolarisCatalogUtils.TABLE_PROVIDER_KEY, "csv"); + V1Table csvTable = Mockito.mock(V1Table.class); + Mockito.when(csvTable.properties()).thenReturn(csvProps); + mockedStaticDSV2 + .when( + () -> + DataSourceV2Utils.getTableFromProvider( + Mockito.eq(csvProvider), Mockito.any(), Mockito.any())) + .thenReturn(csvTable); + assertThatThrownBy( + () -> catalog.alterTable(csvIdent, TableChange.setProperty("csv_key", "scv_value"))) + .isInstanceOf(UnsupportedOperationException.class); + } + + // verify rename non-iceberg table is not supported + assertThatThrownBy( + () -> catalog.renameTable(deltaIdent, Identifier.of(defaultNS, "new-delta-table"))) + .isInstanceOf(UnsupportedOperationException.class); + assertThatThrownBy( + () -> catalog.renameTable(csvIdent, Identifier.of(defaultNS, "new-csv-table"))) + .isInstanceOf(UnsupportedOperationException.class); + } + + @Test + void testPurgeInvalidateTable() throws Exception { + Identifier icebergIdent = Identifier.of(defaultNS, "iceberg-table"); + Identifier deltaIdent = Identifier.of(defaultNS, "delta-table"); + createAndValidateGenericTableWithLoad(catalog, icebergIdent, defaultSchema, "iceberg"); + createAndValidateGenericTableWithLoad(catalog, deltaIdent, defaultSchema, "delta"); + + // test invalidate table is a no op today + catalog.invalidateTable(icebergIdent); + catalog.invalidateTable(deltaIdent); + + Identifier[] tableIdents = catalog.listTables(defaultNS); + assertThat(tableIdents.length).isEqualTo(2); + + // verify purge tables drops the table + catalog.purgeTable(deltaIdent); + assertThat(catalog.listTables(defaultNS).length).isEqualTo(1); + + // purge iceberg table triggers file deletion + try (MockedStatic mockedStaticActions = Mockito.mockStatic(SparkActions.class)) { + SparkActions actions = Mockito.mock(SparkActions.class); + DeleteReachableFilesSparkAction deleteAction = + Mockito.mock(DeleteReachableFilesSparkAction.class); + mockedStaticActions.when(SparkActions::get).thenReturn(actions); + Mockito.when(actions.deleteReachableFiles(Mockito.any())).thenReturn(deleteAction); + Mockito.when(deleteAction.io(Mockito.any())).thenReturn(deleteAction); + Mockito.when(deleteAction.execute()) + .thenReturn(Mockito.mock(DeleteReachableFiles.Result.class)); + + catalog.purgeTable(icebergIdent); + } + assertThat(catalog.listTables(defaultNS).length).isEqualTo(0); + } + + private void createAndValidateGenericTableWithLoad( + InMemorySparkCatalog sparkCatalog, Identifier identifier, StructType schema, String format) + throws Exception { + Map properties = Maps.newHashMap(); + properties.put(PolarisCatalogUtils.TABLE_PROVIDER_KEY, format); + properties.put( + TableCatalog.PROP_LOCATION, + String.format("file:///tmp/delta/path/to/table/%s/", identifier.name())); + + SQLConf conf = new SQLConf(); + try (MockedStatic mockedStaticSparkSession = + Mockito.mockStatic(SparkSession.class); + MockedStatic mockedStaticDS = Mockito.mockStatic(DataSource.class); + MockedStatic mockedStaticDSV2 = + Mockito.mockStatic(DataSourceV2Utils.class)) { + SparkSession mockedSession = Mockito.mock(SparkSession.class); + mockedStaticSparkSession.when(SparkSession::active).thenReturn(mockedSession); + SessionState mockedState = Mockito.mock(SessionState.class); + Mockito.when(mockedSession.sessionState()).thenReturn(mockedState); + Mockito.when(mockedState.conf()).thenReturn(conf); + + TableProvider provider = Mockito.mock(TableProvider.class); + mockedStaticDS + .when(() -> DataSource.lookupDataSourceV2(Mockito.eq(format), Mockito.any())) + .thenReturn(Option.apply(provider)); + V1Table table = Mockito.mock(V1Table.class); + mockedStaticDSV2 + .when( + () -> + DataSourceV2Utils.getTableFromProvider( + Mockito.eq(provider), Mockito.any(), Mockito.any())) + .thenReturn(table); + Table createdTable = + sparkCatalog.createTable(identifier, schema, new Transform[0], properties); + Table loadedTable = sparkCatalog.loadTable(identifier); + + // verify the create and load table result + if (PolarisCatalogUtils.useIceberg(format)) { + // iceberg SparkTable is returned for iceberg tables + assertThat(createdTable).isInstanceOf(SparkTable.class); + assertThat(loadedTable).isInstanceOf(SparkTable.class); + } else { + // Spark V1 table is returned for non-iceberg tables + assertThat(createdTable).isInstanceOf(V1Table.class); + assertThat(loadedTable).isInstanceOf(V1Table.class); + } + } + } +} diff --git a/plugins/spark/v4.0/spark/src/test/java/org/apache/polaris/spark/rest/DeserializationTest.java b/plugins/spark/v4.0/spark/src/test/java/org/apache/polaris/spark/rest/DeserializationTest.java new file mode 100644 index 0000000000..0f7d3c99b3 --- /dev/null +++ b/plugins/spark/v4.0/spark/src/test/java/org/apache/polaris/spark/rest/DeserializationTest.java @@ -0,0 +1,169 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.spark.rest; + +import static org.assertj.core.api.Assertions.assertThat; + +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Maps; +import java.util.Map; +import java.util.Set; +import java.util.stream.Stream; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.rest.RESTSerializers; +import org.apache.iceberg.shaded.com.fasterxml.jackson.annotation.JsonAutoDetect; +import org.apache.iceberg.shaded.com.fasterxml.jackson.annotation.PropertyAccessor; +import org.apache.iceberg.shaded.com.fasterxml.jackson.core.JsonFactory; +import org.apache.iceberg.shaded.com.fasterxml.jackson.core.JsonFactoryBuilder; +import org.apache.iceberg.shaded.com.fasterxml.jackson.core.JsonProcessingException; +import org.apache.iceberg.shaded.com.fasterxml.jackson.databind.DeserializationFeature; +import org.apache.iceberg.shaded.com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.iceberg.shaded.com.fasterxml.jackson.databind.PropertyNamingStrategies; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +public class DeserializationTest { + private ObjectMapper mapper; + private static final JsonFactory FACTORY = + new JsonFactoryBuilder() + .configure(JsonFactory.Feature.INTERN_FIELD_NAMES, false) + .configure(JsonFactory.Feature.FAIL_ON_SYMBOL_HASH_OVERFLOW, false) + .build(); + + @BeforeEach + public void setUp() { + // NOTE: This is the same setting as iceberg RESTObjectMapper.java. However, + // RESTObjectMapper is not a public class, therefore, we duplicate the + // setting here for serialization and deserialization tests. + mapper = new ObjectMapper(FACTORY); + mapper.setVisibility(PropertyAccessor.FIELD, JsonAutoDetect.Visibility.ANY); + mapper.setVisibility(PropertyAccessor.CREATOR, JsonAutoDetect.Visibility.ANY); + mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); + mapper.setPropertyNamingStrategy(new PropertyNamingStrategies.KebabCaseStrategy()); + RESTSerializers.registerAll(mapper); + } + + @ParameterizedTest + @MethodSource("genericTableTestCases") + public void testLoadGenericTableRESTResponse( + String baseLocation, String doc, Map properties) + throws JsonProcessingException { + GenericTable.Builder tableBuilder = + GenericTable.builder() + .setFormat("delta") + .setName("test-table") + .setProperties(properties) + .setDoc(doc); + if (baseLocation != null) { + tableBuilder.setBaseLocation(baseLocation); + } + GenericTable table = tableBuilder.build(); + LoadGenericTableRESTResponse response = new LoadGenericTableRESTResponse(table); + String json = mapper.writeValueAsString(response); + LoadGenericTableRESTResponse deserializedResponse = + mapper.readValue(json, LoadGenericTableRESTResponse.class); + assertThat(deserializedResponse.getTable().getFormat()).isEqualTo("delta"); + assertThat(deserializedResponse.getTable().getName()).isEqualTo("test-table"); + assertThat(deserializedResponse.getTable().getDoc()).isEqualTo(doc); + assertThat(deserializedResponse.getTable().getProperties().size()).isEqualTo(properties.size()); + assertThat(deserializedResponse.getTable().getBaseLocation()).isEqualTo(baseLocation); + } + + @ParameterizedTest + @MethodSource("genericTableTestCases") + public void testCreateGenericTableRESTRequest( + String baseLocation, String doc, Map properties) + throws JsonProcessingException { + CreateGenericTableRESTRequest request = + new CreateGenericTableRESTRequest( + CreateGenericTableRequest.builder() + .setName("test-table") + .setFormat("delta") + .setDoc(doc) + .setBaseLocation(baseLocation) + .setProperties(properties) + .build()); + String json = mapper.writeValueAsString(request); + CreateGenericTableRESTRequest deserializedRequest = + mapper.readValue(json, CreateGenericTableRESTRequest.class); + assertThat(deserializedRequest.getName()).isEqualTo("test-table"); + assertThat(deserializedRequest.getFormat()).isEqualTo("delta"); + assertThat(deserializedRequest.getDoc()).isEqualTo(doc); + assertThat(deserializedRequest.getProperties().size()).isEqualTo(properties.size()); + assertThat(deserializedRequest.getBaseLocation()).isEqualTo(baseLocation); + } + + @Test + public void testListGenericTablesRESTResponse() throws JsonProcessingException { + Namespace namespace = Namespace.of("test-ns"); + Set idents = + ImmutableSet.of( + TableIdentifier.of(namespace, "table1"), + TableIdentifier.of(namespace, "table2"), + TableIdentifier.of(namespace, "table3")); + + // page token is null + ListGenericTablesRESTResponse response = new ListGenericTablesRESTResponse(null, idents); + String json = mapper.writeValueAsString(response); + ListGenericTablesRESTResponse deserializedResponse = + mapper.readValue(json, ListGenericTablesRESTResponse.class); + assertThat(deserializedResponse.getNextPageToken()).isNull(); + assertThat(deserializedResponse.getIdentifiers().size()).isEqualTo(idents.size()); + for (TableIdentifier identifier : idents) { + assertThat(deserializedResponse.getIdentifiers()).contains(identifier); + } + + // page token is not null + response = new ListGenericTablesRESTResponse("page-token", idents); + json = mapper.writeValueAsString(response); + deserializedResponse = mapper.readValue(json, ListGenericTablesRESTResponse.class); + assertThat(deserializedResponse.getNextPageToken()).isEqualTo("page-token"); + for (TableIdentifier identifier : idents) { + assertThat(deserializedResponse.getIdentifiers()).contains(identifier); + } + } + + @Test + public void testLoadGenericTableRestResponse() throws JsonProcessingException { + LoadGenericTableRESTResponse request = + new LoadGenericTableRESTResponse( + GenericTable.builder().setName("test-table").setFormat("delta").build()); + String json = mapper.writeValueAsString(request); + LoadGenericTableRESTResponse deserializedResponse = + mapper.readValue(json, LoadGenericTableRESTResponse.class); + assertThat(deserializedResponse.getTable().getName()).isEqualTo("test-table"); + } + + private static Stream genericTableTestCases() { + var doc = "table for testing"; + var properties = Maps.newHashMap(); + properties.put("location", "s3://path/to/table/"); + var baseLocation = "s3://path/to/table/"; + return Stream.of( + Arguments.of(null, doc, properties), + Arguments.of(baseLocation, doc, properties), + Arguments.of(null, null, Maps.newHashMap()), + Arguments.of(baseLocation, doc, Maps.newHashMap()), + Arguments.of(baseLocation, null, properties)); + } +} diff --git a/plugins/spark/v4.0/spark/src/test/resources/logback-test.xml b/plugins/spark/v4.0/spark/src/test/resources/logback-test.xml new file mode 100644 index 0000000000..b7e97bb826 --- /dev/null +++ b/plugins/spark/v4.0/spark/src/test/resources/logback-test.xml @@ -0,0 +1,32 @@ + + + + + + + %date{ISO8601} [%thread] %-5level %logger{36} - %msg%n + + + + + + diff --git a/runtime/spark-tests/build.gradle.kts b/runtime/spark-tests/build.gradle.kts index 884475c221..f4bd05ef80 100644 --- a/runtime/spark-tests/build.gradle.kts +++ b/runtime/spark-tests/build.gradle.kts @@ -23,6 +23,14 @@ plugins { id("polaris-runtime") } +configurations.all { + if (name != "checkstyle") { + resolutionStrategy { + force("org.antlr:antlr4-runtime:4.9.3") // Spark 3.5 and Delta 3.3 require ANTLR 4.9.3 + } + } +} + dependencies { // must be enforced to get a consistent and validated set of dependencies @@ -56,7 +64,7 @@ dependencies { testImplementation(enforcedPlatform(libs.scala212.lang.library)) testImplementation(enforcedPlatform(libs.scala212.lang.reflect)) testImplementation(libs.javax.servlet.api) - testImplementation(libs.antlr4.runtime) + // ANTLR version is determined by Spark/Delta dependencies, not enforced } tasks.named("intTest").configure { diff --git a/settings.gradle.kts b/settings.gradle.kts index 83347575d3..b2de0163f6 100644 --- a/settings.gradle.kts +++ b/settings.gradle.kts @@ -68,7 +68,15 @@ val sparkVersions = sparkScalaVersions["sparkVersions"].toString().split(",").ma val noSourceChecksProjects = mutableSetOf() for (sparkVersion in sparkVersions) { - val scalaVersions = sparkScalaVersions["scalaVersions"].toString().split(",").map { it.trim() } + // Check if there's a version-specific scalaVersions property, otherwise use the default + val scalaVersionsKey = "scalaVersions.${sparkVersion}" + val scalaVersionsStr = + if (sparkScalaVersions.containsKey(scalaVersionsKey)) { + sparkScalaVersions[scalaVersionsKey].toString() + } else { + sparkScalaVersions["scalaVersions"].toString() + } + val scalaVersions = scalaVersionsStr.split(",").map { it.trim() } var first = true for (scalaVersion in scalaVersions) { val sparkArtifactId = "polaris-spark-${sparkVersion}_${scalaVersion}"