Skip to content

Commit 0b59776

Browse files
committed
Revert "Refactor"
This reverts commit 15d06fd. Signed-off-by: star-yar <[email protected]>
1 parent 78aa265 commit 0b59776

File tree

1 file changed

+28
-22
lines changed

1 file changed

+28
-22
lines changed

kedro-datasets/kedro_datasets/_utils/spark_utils.py

Lines changed: 28 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,39 +1,45 @@
1+
from typing import TYPE_CHECKING, Union
2+
13
from pyspark.sql import SparkSession
24

5+
if TYPE_CHECKING:
6+
from databricks.connect import DatabricksSession
7+
38

4-
def get_spark() -> SparkSession:
9+
def get_spark() -> Union[SparkSession, "DatabricksSession"]:
510
"""
611
Returns the SparkSession. In case databricks-connect is available we use it for
712
extended configuration mechanisms and notebook compatibility,
813
otherwise we use classic pyspark.
914
"""
1015
try:
16+
# When using databricks-connect >= 13.0.0 (a.k.a databricks-connect-v2)
17+
# the remote session is instantiated using the databricks module
18+
# If the databricks-connect module is installed, we use a remote session
19+
from databricks.connect import DatabricksSession
20+
1121
# We can't test this as there's no Databricks test env available
12-
spark = _create_databricks_session() # pragma: no cover
22+
try:
23+
spark = DatabricksSession.builder.getOrCreate() # pragma: no cover
24+
# this can't be narrowed down since databricks-connect throws error of Exception type
25+
except Exception as e:
26+
error_message = str(e)
27+
if (
28+
error_message
29+
== "Cluster id or serverless are required but were not specified."
30+
):
31+
raise type(e)(
32+
"DatabricksSession is expected to behave as singleton but it didn't. "
33+
"Either set up DATABRICKS_CONFIG_PROFILE or DATABRICKS_PROFILE and DATABRICKS_SERVERLESS_COMPUTE_ID "
34+
"env variables in your hooks prior to using the spark session. "
35+
"Read more about these variables here: "
36+
"https://docs.databricks.com/aws/en/dev-tools/databricks-connect/cluster-config#config-profile-env-var"
37+
) from e
38+
pass
1339

1440
except ImportError:
1541
# For "normal" spark sessions that don't use databricks-connect
1642
# we get spark normally
1743
spark = SparkSession.builder.getOrCreate()
1844

1945
return spark
20-
21-
22-
def _create_databricks_session() -> SparkSession:
23-
# When using databricks-connect >= 13.0.0 (a.k.a databricks-connect-v2)
24-
# the remote session is instantiated using the databricks module
25-
# If the databricks-connect module is installed, we use a remote session
26-
from databricks.connect import DatabricksSession
27-
28-
try:
29-
return DatabricksSession.builder.getOrCreate()
30-
# this can't be narrowed down since databricks-connect throws error of Exception type
31-
except Exception as exception:
32-
if str(exception) == "Cluster id or serverless are required but were not specified.":
33-
raise type(exception)(
34-
"DatabricksSession is expected to behave as singleton but it didn't. "
35-
"Either set up DATABRICKS_CONFIG_PROFILE or DATABRICKS_PROFILE and DATABRICKS_SERVERLESS_COMPUTE_ID "
36-
"env variables in your hooks prior to using the spark session. "
37-
"Read more about these variables here: "
38-
"https://docs.databricks.com/aws/en/dev-tools/databricks-connect/cluster-config#config-profile-env-var"
39-
) from exception

0 commit comments

Comments
 (0)