Skip to content

Commit 15d06fd

Browse files
committed
Refactor
Signed-off-by: star-yar <[email protected]>
1 parent 480ac00 commit 15d06fd

File tree

1 file changed

+22
-28
lines changed

1 file changed

+22
-28
lines changed
Lines changed: 22 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,45 +1,39 @@
1-
from typing import TYPE_CHECKING, Union
2-
31
from pyspark.sql import SparkSession
42

5-
if TYPE_CHECKING:
6-
from databricks.connect import DatabricksSession
7-
83

9-
def get_spark() -> Union[SparkSession, "DatabricksSession"]:
4+
def get_spark() -> SparkSession:
105
"""
116
Returns the SparkSession. In case databricks-connect is available we use it for
127
extended configuration mechanisms and notebook compatibility,
138
otherwise we use classic pyspark.
149
"""
1510
try:
16-
# When using databricks-connect >= 13.0.0 (a.k.a databricks-connect-v2)
17-
# the remote session is instantiated using the databricks module
18-
# If the databricks-connect module is installed, we use a remote session
19-
from databricks.connect import DatabricksSession
20-
2111
# We can't test this as there's no Databricks test env available
22-
try:
23-
spark = DatabricksSession.builder.getOrCreate() # pragma: no cover
24-
# this can't be narrowed down since databricks-connect throws error of Exception type
25-
except Exception as e:
26-
error_message = str(e)
27-
if (
28-
error_message
29-
== "Cluster id or serverless are required but were not specified."
30-
):
31-
raise type(e)(
32-
"DatabricksSession is expected to behave as singleton but it didn't. "
33-
"Either set up DATABRICKS_CONFIG_PROFILE or DATABRICKS_PROFILE and DATABRICKS_SERVERLESS_COMPUTE_ID "
34-
"env variables in your hooks prior to using the spark session. "
35-
"Read more about these variables here: "
36-
"https://docs.databricks.com/aws/en/dev-tools/databricks-connect/cluster-config#config-profile-env-var"
37-
) from e
38-
pass
12+
spark = _create_databricks_session() # pragma: no cover
3913

4014
except ImportError:
4115
# For "normal" spark sessions that don't use databricks-connect
4216
# we get spark normally
4317
spark = SparkSession.builder.getOrCreate()
4418

4519
return spark
20+
21+
22+
def _create_databricks_session() -> SparkSession:
23+
# When using databricks-connect >= 13.0.0 (a.k.a databricks-connect-v2)
24+
# the remote session is instantiated using the databricks module
25+
# If the databricks-connect module is installed, we use a remote session
26+
from databricks.connect import DatabricksSession
27+
28+
try:
29+
return DatabricksSession.builder.getOrCreate()
30+
# this can't be narrowed down since databricks-connect throws error of Exception type
31+
except Exception as exception:
32+
if str(exception) == "Cluster id or serverless are required but were not specified.":
33+
raise type(exception)(
34+
"DatabricksSession is expected to behave as singleton but it didn't. "
35+
"Either set up DATABRICKS_CONFIG_PROFILE or DATABRICKS_PROFILE and DATABRICKS_SERVERLESS_COMPUTE_ID "
36+
"env variables in your hooks prior to using the spark session. "
37+
"Read more about these variables here: "
38+
"https://docs.databricks.com/aws/en/dev-tools/databricks-connect/cluster-config#config-profile-env-var"
39+
) from exception

0 commit comments

Comments
 (0)