Skip to content

Commit 27ff177

Browse files
committed
Refactor
Signed-off-by: star-yar <[email protected]>
1 parent 480ac00 commit 27ff177

File tree

1 file changed

+22
-29
lines changed

1 file changed

+22
-29
lines changed
Lines changed: 22 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,45 +1,38 @@
1-
from typing import TYPE_CHECKING, Union
2-
31
from pyspark.sql import SparkSession
42

5-
if TYPE_CHECKING:
6-
from databricks.connect import DatabricksSession
7-
8-
9-
def get_spark() -> Union[SparkSession, "DatabricksSession"]:
3+
def get_spark() -> SparkSession:
104
"""
115
Returns the SparkSession. In case databricks-connect is available we use it for
126
extended configuration mechanisms and notebook compatibility,
137
otherwise we use classic pyspark.
148
"""
159
try:
16-
# When using databricks-connect >= 13.0.0 (a.k.a databricks-connect-v2)
17-
# the remote session is instantiated using the databricks module
18-
# If the databricks-connect module is installed, we use a remote session
19-
from databricks.connect import DatabricksSession
20-
2110
# We can't test this as there's no Databricks test env available
22-
try:
23-
spark = DatabricksSession.builder.getOrCreate() # pragma: no cover
24-
# this can't be narrowed down since databricks-connect throws error of Exception type
25-
except Exception as e:
26-
error_message = str(e)
27-
if (
28-
error_message
29-
== "Cluster id or serverless are required but were not specified."
30-
):
31-
raise type(e)(
32-
"DatabricksSession is expected to behave as singleton but it didn't. "
33-
"Either set up DATABRICKS_CONFIG_PROFILE or DATABRICKS_PROFILE and DATABRICKS_SERVERLESS_COMPUTE_ID "
34-
"env variables in your hooks prior to using the spark session. "
35-
"Read more about these variables here: "
36-
"https://docs.databricks.com/aws/en/dev-tools/databricks-connect/cluster-config#config-profile-env-var"
37-
) from e
38-
pass
11+
spark = _create_databricks_session() # pragma: no cover
3912

4013
except ImportError:
4114
# For "normal" spark sessions that don't use databricks-connect
4215
# we get spark normally
4316
spark = SparkSession.builder.getOrCreate()
4417

4518
return spark
19+
20+
21+
def _create_databricks_session() -> SparkSession:
22+
# When using databricks-connect >= 13.0.0 (a.k.a databricks-connect-v2)
23+
# the remote session is instantiated using the databricks module
24+
# If the databricks-connect module is installed, we use a remote session
25+
from databricks.connect import DatabricksSession
26+
27+
try:
28+
return DatabricksSession.builder.getOrCreate()
29+
# this can't be narrowed down since databricks-connect throws error of Exception type
30+
except Exception as exception:
31+
if str(exception) == "Cluster id or serverless are required but were not specified.":
32+
raise type(exception)(
33+
"DatabricksSession is expected to behave as singleton but it didn't. "
34+
"Either set up DATABRICKS_CONFIG_PROFILE or DATABRICKS_PROFILE and DATABRICKS_SERVERLESS_COMPUTE_ID "
35+
"env variables in your hooks prior to using the spark session. "
36+
"Read more about these variables here: "
37+
"https://docs.databricks.com/aws/en/dev-tools/databricks-connect/cluster-config#config-profile-env-var"
38+
) from exception

0 commit comments

Comments
 (0)