Skip to content

Commit 73799ac

Browse files
Suppport s3a in hive publish (#138)
1 parent aa91217 commit 73799ac

File tree

1 file changed

+4
-4
lines changed

1 file changed

+4
-4
lines changed

spark_utils/delta_lake/functions.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
"""
2424
Helper functions for Delta Lake
2525
"""
26-
26+
import re
2727
from typing import Iterator
2828

2929
from delta import DeltaTable
@@ -83,7 +83,7 @@ def publish_delta_to_hive(
8383
8484
OR
8585
86-
Generate simple Hive table linked to abfss path
86+
Generate simple Hive table linked to abfss or s3a path
8787
8888
:param spark_session: SparkSession that is configured to use an external Hive Metastore
8989
:param publish_table_name: Name of a table to publish
@@ -93,9 +93,9 @@ def publish_delta_to_hive(
9393
:param publish_as_symlink: Generate symlink format manifest to make table readable from Trino
9494
:return:
9595
"""
96-
96+
protocol, data_location = re.match(r"^(abfss|s3a):\/\/([^ ]+)$", data_path).groups()
9797
spark_session.sql(
98-
f"CREATE SCHEMA IF NOT EXISTS {publish_schema_name} location 'abfss://{'/'.join(data_path[8:].split('/')[0:-1])}/'"
98+
f"CREATE SCHEMA IF NOT EXISTS {publish_schema_name} location '{protocol}://{'/'.join(data_location.split('/')[0:-1])}/'"
9999
)
100100
if refresh:
101101
spark_session.sql(f"DROP TABLE IF EXISTS {publish_schema_name}.{publish_table_name}")

0 commit comments

Comments
 (0)