From 22293b46db9b49c90ae5adcd27764b0ae9e8efdb Mon Sep 17 00:00:00 2001
From: star-yar <vestnik.yar@gmail.com>
Date: Fri, 14 Mar 2025 15:44:53 -0400
Subject: [PATCH 1/5] Extend err message

Signed-off-by: star-yar <vestnik.yar@gmail.com>
---
 .../kedro_datasets/_utils/spark_utils.py       | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/kedro-datasets/kedro_datasets/_utils/spark_utils.py b/kedro-datasets/kedro_datasets/_utils/spark_utils.py
index e55012275..048a1a409 100644
--- a/kedro-datasets/kedro_datasets/_utils/spark_utils.py
+++ b/kedro-datasets/kedro_datasets/_utils/spark_utils.py
@@ -19,7 +19,23 @@ def get_spark() -> Union[SparkSession, "DatabricksSession"]:
         from databricks.connect import DatabricksSession
 
         # We can't test this as there's no Databricks test env available
-        spark = DatabricksSession.builder.getOrCreate()  # pragma: no cover
+        try:
+            spark = DatabricksSession.builder.getOrCreate()  # pragma: no cover
+        # this can't be narrowed down since databricks-connect throws error of Exception type
+        except Exception as e:
+            error_message = str(e)
+            if (
+                error_message
+                == "Cluster id or serverless are required but were not specified."
+            ):
+                raise type(e)(
+                    "DatabricksSession is expected to behave as singleton but it didn't. "
+                    "Either set up DATABRICKS_CONFIG_PROFILE or DATABRICKS_PROFILE and DATABRICKS_SERVERLESS_COMPUTE_ID "
+                    "env variables in your hooks prior to using the spark session. "
+                    "Read more about these variables here: "
+                    "https://docs.databricks.com/aws/en/dev-tools/databricks-connect/cluster-config#config-profile-env-var"
+                ) from e
+            pass
 
     except ImportError:
         # For "normal" spark sessions that don't use databricks-connect

From 25a48779217cbaf07276dcc5bca543f08f94b612 Mon Sep 17 00:00:00 2001
From: star-yar <vestnik.yar@gmail.com>
Date: Fri, 14 Mar 2025 15:59:20 -0400
Subject: [PATCH 2/5] Update RELEASE.md

Signed-off-by: star-yar <vestnik.yar@gmail.com>
---
 kedro-datasets/RELEASE.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kedro-datasets/RELEASE.md b/kedro-datasets/RELEASE.md
index 39cdd2f5a..8e460227c 100755
--- a/kedro-datasets/RELEASE.md
+++ b/kedro-datasets/RELEASE.md
@@ -10,6 +10,7 @@
 - Fixed `polars.CSVDataset` `save` method on Windows using `utf-8` as default encoding.
 - Made `table_name` a keyword argument in the `ibis.FileDataset` implementation to be compatible with Ibis 10.0.
 - Fixed how sessions are handled in the `snowflake.SnowflakeTableDataset` implementation.
+- Provide enhanced error message for the spark session created via databricks-connect if the builder args are incomplete provided.
 
 ## Breaking Changes
 

From 78aa2659b0ee9bad6d8e7528887c29dd0a06a69e Mon Sep 17 00:00:00 2001
From: star-yar <vestnik.yar@gmail.com>
Date: Fri, 14 Mar 2025 17:44:54 -0400
Subject: [PATCH 3/5] Refactor

Signed-off-by: star-yar <vestnik.yar@gmail.com>
---
 .../kedro_datasets/_utils/spark_utils.py      | 50 ++++++++-----------
 1 file changed, 22 insertions(+), 28 deletions(-)

diff --git a/kedro-datasets/kedro_datasets/_utils/spark_utils.py b/kedro-datasets/kedro_datasets/_utils/spark_utils.py
index 048a1a409..f6b61f8ac 100644
--- a/kedro-datasets/kedro_datasets/_utils/spark_utils.py
+++ b/kedro-datasets/kedro_datasets/_utils/spark_utils.py
@@ -1,41 +1,15 @@
-from typing import TYPE_CHECKING, Union
-
 from pyspark.sql import SparkSession
 
-if TYPE_CHECKING:
-    from databricks.connect import DatabricksSession
-
 
-def get_spark() -> Union[SparkSession, "DatabricksSession"]:
+def get_spark() -> SparkSession:
     """
     Returns the SparkSession. In case databricks-connect is available we use it for
     extended configuration mechanisms and notebook compatibility,
     otherwise we use classic pyspark.
     """
     try:
-        # When using databricks-connect >= 13.0.0 (a.k.a databricks-connect-v2)
-        # the remote session is instantiated using the databricks module
-        # If the databricks-connect module is installed, we use a remote session
-        from databricks.connect import DatabricksSession
-
         # We can't test this as there's no Databricks test env available
-        try:
-            spark = DatabricksSession.builder.getOrCreate()  # pragma: no cover
-        # this can't be narrowed down since databricks-connect throws error of Exception type
-        except Exception as e:
-            error_message = str(e)
-            if (
-                error_message
-                == "Cluster id or serverless are required but were not specified."
-            ):
-                raise type(e)(
-                    "DatabricksSession is expected to behave as singleton but it didn't. "
-                    "Either set up DATABRICKS_CONFIG_PROFILE or DATABRICKS_PROFILE and DATABRICKS_SERVERLESS_COMPUTE_ID "
-                    "env variables in your hooks prior to using the spark session. "
-                    "Read more about these variables here: "
-                    "https://docs.databricks.com/aws/en/dev-tools/databricks-connect/cluster-config#config-profile-env-var"
-                ) from e
-            pass
+        spark = _create_databricks_session()  # pragma: no cover
 
     except ImportError:
         # For "normal" spark sessions that don't use databricks-connect
@@ -43,3 +17,23 @@ def get_spark() -> Union[SparkSession, "DatabricksSession"]:
         spark = SparkSession.builder.getOrCreate()
 
     return spark
+
+
+def _create_databricks_session() -> SparkSession:
+    # When using databricks-connect >= 13.0.0 (a.k.a databricks-connect-v2)
+    # the remote session is instantiated using the databricks module
+    # If the databricks-connect module is installed, we use a remote session
+    from databricks.connect import DatabricksSession
+
+    try:
+        return DatabricksSession.builder.getOrCreate()
+    # this can't be narrowed down since databricks-connect throws error of Exception type
+    except Exception as exception:
+        if str(exception) == "Cluster id or serverless are required but were not specified.":
+            raise type(exception)(
+                "DatabricksSession is expected to behave as singleton but it didn't. "
+                "Either set up DATABRICKS_CONFIG_PROFILE or DATABRICKS_PROFILE and DATABRICKS_SERVERLESS_COMPUTE_ID "
+                "env variables in your hooks prior to using the spark session. "
+                "Read more about these variables here: "
+                "https://docs.databricks.com/aws/en/dev-tools/databricks-connect/cluster-config#config-profile-env-var"
+            ) from exception

From b962af8b98f9b282881b5151637f782b427533f4 Mon Sep 17 00:00:00 2001
From: star-yar <vestnik.yar@gmail.com>
Date: Fri, 14 Mar 2025 18:14:08 -0400
Subject: [PATCH 4/5] Add missing re-raise

Signed-off-by: star-yar <vestnik.yar@gmail.com>
---
 kedro-datasets/kedro_datasets/_utils/spark_utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kedro-datasets/kedro_datasets/_utils/spark_utils.py b/kedro-datasets/kedro_datasets/_utils/spark_utils.py
index f6b61f8ac..d4d6ee33d 100644
--- a/kedro-datasets/kedro_datasets/_utils/spark_utils.py
+++ b/kedro-datasets/kedro_datasets/_utils/spark_utils.py
@@ -37,3 +37,4 @@ def _create_databricks_session() -> SparkSession:
                 "Read more about these variables here: "
                 "https://docs.databricks.com/aws/en/dev-tools/databricks-connect/cluster-config#config-profile-env-var"
             ) from exception
+        raise exception

From 1f6f18373194f5664dc920e8f11d2f0666be3e8e Mon Sep 17 00:00:00 2001
From: star-yar <vestnik.yar@gmail.com>
Date: Fri, 14 Mar 2025 18:20:54 -0400
Subject: [PATCH 5/5] Format

Signed-off-by: star-yar <vestnik.yar@gmail.com>
---
 kedro-datasets/kedro_datasets/_utils/spark_utils.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/kedro-datasets/kedro_datasets/_utils/spark_utils.py b/kedro-datasets/kedro_datasets/_utils/spark_utils.py
index d4d6ee33d..4ff079354 100644
--- a/kedro-datasets/kedro_datasets/_utils/spark_utils.py
+++ b/kedro-datasets/kedro_datasets/_utils/spark_utils.py
@@ -29,7 +29,10 @@ def _create_databricks_session() -> SparkSession:
         return DatabricksSession.builder.getOrCreate()
     # this can't be narrowed down since databricks-connect throws error of Exception type
     except Exception as exception:
-        if str(exception) == "Cluster id or serverless are required but were not specified.":
+        if (
+            str(exception)
+            == "Cluster id or serverless are required but were not specified."
+        ):
             raise type(exception)(
                 "DatabricksSession is expected to behave as singleton but it didn't. "
                 "Either set up DATABRICKS_CONFIG_PROFILE or DATABRICKS_PROFILE and DATABRICKS_SERVERLESS_COMPUTE_ID "