Skip to content

Commit 2320dc6

Browse files
Remove reliance on cassndra extensions (#141)
1 parent 85dee3d commit 2320dc6

File tree

2 files changed

+5
-22
lines changed

2 files changed

+5
-22
lines changed

.github/workflows/prepare_release.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,4 @@ jobs:
1616
uses: SneaksAndData/github-actions/semver_release@v0.1.9
1717
with:
1818
major_v: 1
19-
minor_v: 2
19+
minor_v: 3

spark_utils/common/spark_session_provider.py

Lines changed: 4 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,6 @@ class SparkSessionProvider:
6464
"""
6565

6666
DELTA_CATALOG_EXTENSION = "io.delta.sql.DeltaSparkSessionExtension"
67-
CASSANDRA_CATALOG_EXTENSION = "com.datastax.spark.connector.CassandraSparkExtensions"
6867
TRANSIENT_INIT_ERRORS = ["temporary failure in name resolution"]
6968

7069
def __init__(
@@ -148,17 +147,12 @@ def session_builder(self) -> pyspark.sql.session.SparkSession.Builder:
148147
"""
149148
return self._spark_session_builder
150149

151-
def with_astra_db(
152-
self, db_name: str, bundle_bytes: str, client_id: str, client_secret: str
153-
) -> "SparkSessionProvider":
150+
def with_astra_bundle(self, db_name: str, bundle_bytes: str) -> "SparkSessionProvider":
154151
"""
155-
If you do not have the jar available, remember to create the session provider with this package added:
156-
SparkSessionProvider(additional_packages=["com.datastax.spark:spark-cassandra-connector_2.12:3.4.0"])
152+
Mounts Astra DB bundle into a Spark Session.
157153
158154
:param db_name: Astra database name to use as Spark Catalog reference
159155
:param bundle_bytes: Base64 encoded secure bundle zip content. Generate with `cat bundle.zip | base64`
160-
:param client_id: Connection token client id
161-
:param client_secret: Connection token client secret
162156
"""
163157
bundle_file_name = f"{db_name}_bundle"
164158
bundle_path = os.path.join(tempfile.gettempdir(), ".astra")
@@ -167,19 +161,8 @@ def with_astra_db(
167161
with open(os.path.join(bundle_path, bundle_file_name), "wb") as bundle_file:
168162
bundle_file.write(base64.b64decode(bundle_bytes))
169163

170-
self._spark_session_builder = (
171-
self._spark_session_builder.config(
172-
"spark.sql.extensions",
173-
",".join(
174-
[SparkSessionProvider.DELTA_CATALOG_EXTENSION, SparkSessionProvider.CASSANDRA_CATALOG_EXTENSION]
175-
),
176-
)
177-
.config(f"spark.sql.catalog.{db_name}", "com.datastax.spark.connector.datasource.CassandraCatalog")
178-
.config("spark.files", os.path.join(bundle_path, bundle_file_name))
179-
.config("spark.cassandra.connection.config.cloud.path", bundle_file_name)
180-
.config("spark.cassandra.auth.username", client_id)
181-
.config("spark.cassandra.auth.password", client_secret)
182-
.config("spark.dse.continuousPagingEnabled", "false")
164+
self._spark_session_builder = self._spark_session_builder.config(
165+
"spark.files", os.path.join(bundle_path, bundle_file_name)
183166
)
184167

185168
return self

0 commit comments

Comments
 (0)