Skip to content

Commit e1f9469

Browse files
Update error messages for SparkConnect linter (#2348)
## Changes Update phrasing in linter messages to be more clear. Replace `UC Shared clusters` with `Unity Catalog clusters in Shared access mode`
1 parent d3d42c0 commit e1f9469

File tree

10 files changed

+47
-41
lines changed

10 files changed

+47
-41
lines changed

src/databricks/labs/ucx/source_code/linters/spark_connect.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,11 @@ class SharedClusterMatcher:
1919
session_state: CurrentSessionState
2020

2121
def _cluster_type_str(self) -> str:
22-
return 'UC Shared Clusters' if not self.session_state.is_serverless else 'Serverless Compute'
22+
return (
23+
'Unity Catalog clusters in Shared access mode'
24+
if not self.session_state.is_serverless
25+
else 'Serverless Compute'
26+
)
2327

2428
@abstractmethod
2529
def lint(self, node: NodeNG) -> Iterator[Advice]:

tests/unit/source_code/linters/test_spark_connect.py

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def test_jvm_access_match_shared(session_state):
2323
expected = [
2424
Failure(
2525
code="jvm-access-in-shared-clusters",
26-
message='Cannot access Spark Driver JVM on UC Shared Clusters',
26+
message='Cannot access Spark Driver JVM on Unity Catalog clusters in Shared access mode',
2727
start_line=2,
2828
start_col=0,
2929
end_line=2,
@@ -65,31 +65,31 @@ def test_rdd_context_match_shared(session_state):
6565
expected = [
6666
Failure(
6767
code="rdd-in-shared-clusters",
68-
message='RDD APIs are not supported on UC Shared Clusters. Rewrite it using DataFrame API',
68+
message='RDD APIs are not supported on Unity Catalog clusters in Shared access mode. Rewrite it using DataFrame API',
6969
start_line=1,
7070
start_col=7,
7171
end_line=1,
7272
end_col=32,
7373
),
7474
Failure(
7575
code="rdd-in-shared-clusters",
76-
message='RDD APIs are not supported on UC Shared Clusters. Rewrite it using DataFrame API',
76+
message='RDD APIs are not supported on Unity Catalog clusters in Shared access mode. Rewrite it using DataFrame API',
7777
start_line=2,
7878
start_col=29,
7979
end_line=2,
8080
end_col=42,
8181
),
8282
Failure(
8383
code='legacy-context-in-shared-clusters',
84-
message='sc is not supported on UC Shared Clusters. Rewrite it using spark',
84+
message='sc is not supported on Unity Catalog clusters in Shared access mode. Rewrite it using spark',
8585
start_line=1,
8686
start_col=7,
8787
end_line=1,
8888
end_col=21,
8989
),
9090
Failure(
9191
code="legacy-context-in-shared-clusters",
92-
message='sc is not supported on UC Shared Clusters. Rewrite it using spark',
92+
message='sc is not supported on Unity Catalog clusters in Shared access mode. Rewrite it using spark',
9393
start_line=2,
9494
start_col=29,
9595
end_line=2,
@@ -152,7 +152,7 @@ def test_rdd_map_partitions(session_state):
152152
expected = [
153153
Failure(
154154
code="rdd-in-shared-clusters",
155-
message='RDD APIs are not supported on UC Shared Clusters. Use mapInArrow() or Pandas UDFs instead',
155+
message='RDD APIs are not supported on Unity Catalog clusters in Shared access mode. Use mapInArrow() or Pandas UDFs instead',
156156
start_line=2,
157157
start_col=0,
158158
end_line=2,
@@ -169,7 +169,7 @@ def test_conf_shared(session_state):
169169
assert [
170170
Failure(
171171
code='legacy-context-in-shared-clusters',
172-
message='sparkContext and getConf are not supported on UC Shared Clusters. Rewrite it using spark.conf',
172+
message='sparkContext and getConf are not supported on Unity Catalog clusters in Shared access mode. Rewrite it using spark.conf',
173173
start_line=0,
174174
start_col=0,
175175
end_line=0,
@@ -211,7 +211,7 @@ def test_logging_shared(session_state):
211211
assert [
212212
Failure(
213213
code='spark-logging-in-shared-clusters',
214-
message='Cannot set Spark log level directly from code on UC Shared Clusters. '
214+
message='Cannot set Spark log level directly from code on Unity Catalog clusters in Shared access mode. '
215215
'Remove the call and set the cluster spark conf \'spark.log.level\' instead',
216216
start_line=1,
217217
start_col=0,
@@ -220,15 +220,17 @@ def test_logging_shared(session_state):
220220
),
221221
Failure(
222222
code='spark-logging-in-shared-clusters',
223-
message='Cannot access Spark Driver JVM logger on UC Shared Clusters. ' 'Use logging.getLogger() instead',
223+
message='Cannot access Spark Driver JVM logger on Unity Catalog clusters in Shared access mode. '
224+
'Use logging.getLogger() instead',
224225
start_line=4,
225226
start_col=14,
226227
end_line=4,
227228
end_col=38,
228229
),
229230
Failure(
230231
code='spark-logging-in-shared-clusters',
231-
message='Cannot access Spark Driver JVM logger on UC Shared Clusters. ' 'Use logging.getLogger() instead',
232+
message='Cannot access Spark Driver JVM logger on Unity Catalog clusters in Shared access mode. '
233+
'Use logging.getLogger() instead',
232234
start_line=6,
233235
start_col=0,
234236
end_line=6,

tests/unit/source_code/samples/functional/spark-connect/catalog-api_13_3.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# ucx[session-state] {"dbr_version": [13, 3], "data_security_mode": "USER_ISOLATION"}
2-
# ucx[catalog-api-in-shared-clusters:+1:0:+1:13] spark.catalog functions require DBR 14.3 LTS or above on UC Shared Clusters
2+
# ucx[catalog-api-in-shared-clusters:+1:0:+1:13] spark.catalog functions require DBR 14.3 LTS or above on Unity Catalog clusters in Shared access mode
33
spark.catalog.tableExists("table")
4-
# ucx[catalog-api-in-shared-clusters:+1:0:+1:13] spark.catalog functions require DBR 14.3 LTS or above on UC Shared Clusters
4+
# ucx[catalog-api-in-shared-clusters:+1:0:+1:13] spark.catalog functions require DBR 14.3 LTS or above on Unity Catalog clusters in Shared access mode
55
spark.catalog.listDatabases()
66

77

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# ucx[session-state] {"data_security_mode": "USER_ISOLATION"}
2-
# ucx[to-json-in-shared-clusters:+1:6:+1:80] toJson() is not available on UC Shared Clusters. Use toSafeJson() on DBR 13.3 LTS or above to get a subset of command context information.
2+
# ucx[to-json-in-shared-clusters:+1:6:+1:80] toJson() is not available on Unity Catalog clusters in Shared access mode. Use toSafeJson() on DBR 13.3 LTS or above to get a subset of command context information.
33
print(dbutils.notebook.entry_point.getDbutils().notebook().getContext().toJson())
44
dbutils.notebook.entry_point.getDbutils().notebook().getContext().toSafeJson()
55
notebook = dbutils.notebook.entry_point.getDbutils().notebook()
6-
# ucx[to-json-in-shared-clusters:+1:0:+1:30] toJson() is not available on UC Shared Clusters. Use toSafeJson() on DBR 13.3 LTS or above to get a subset of command context information.
6+
# ucx[to-json-in-shared-clusters:+1:0:+1:30] toJson() is not available on Unity Catalog clusters in Shared access mode. Use toSafeJson() on DBR 13.3 LTS or above to get a subset of command context information.
77
notebook.getContext().toJson()
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# ucx[session-state] {"data_security_mode": "USER_ISOLATION"}
22
spark.range(10).collect()
3-
# ucx[jvm-access-in-shared-clusters:+1:0:+1:18] Cannot access Spark Driver JVM on UC Shared Clusters
3+
# ucx[jvm-access-in-shared-clusters:+1:0:+1:18] Cannot access Spark Driver JVM on Unity Catalog clusters in Shared access mode
44
spark._jspark._jvm.com.my.custom.Name()
55

6-
# ucx[jvm-access-in-shared-clusters:+1:0:+1:18] Cannot access Spark Driver JVM on UC Shared Clusters
6+
# ucx[jvm-access-in-shared-clusters:+1:0:+1:18] Cannot access Spark Driver JVM on Unity Catalog clusters in Shared access mode
77
spark._jspark._jvm.com.my.custom.Name()

tests/unit/source_code/samples/functional/spark-connect/python-udfs_13_3.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ def slen(s):
88
return len(s)
99

1010

11-
# ucx[python-udf-in-shared-clusters:+1:1:+1:37] Arrow UDFs require DBR 14.3 LTS or above on UC Shared Clusters
11+
# ucx[python-udf-in-shared-clusters:+1:1:+1:37] Arrow UDFs require DBR 14.3 LTS or above on Unity Catalog clusters in Shared access mode
1212
@udf(returnType='int', useArrow=True)
1313
def arrow_slen(s):
1414
return len(s)
@@ -18,7 +18,7 @@ def arrow_slen(s):
1818
df.select(slen("name"), arrow_slen("name")).show()
1919

2020
slen1 = udf(lambda s: len(s), returnType='int')
21-
# ucx[python-udf-in-shared-clusters:+1:14:+1:68] Arrow UDFs require DBR 14.3 LTS or above on UC Shared Clusters
21+
# ucx[python-udf-in-shared-clusters:+1:14:+1:68] Arrow UDFs require DBR 14.3 LTS or above on Unity Catalog clusters in Shared access mode
2222
arrow_slen1 = udf(lambda s: len(s), returnType='int', useArrow=True)
2323

2424
df = spark.createDataFrame([(1, "John Doe", 21)], ("id", "name", "age"))
@@ -33,7 +33,7 @@ def subtract_mean(pdf: pd.DataFrame) -> pd.DataFrame:
3333
return pdf.assign(v=v - v.mean())
3434

3535

36-
# ucx[python-udf-in-shared-clusters:+1:0:+1:73] applyInPandas require DBR 14.3 LTS or above on UC Shared Clusters
36+
# ucx[python-udf-in-shared-clusters:+1:0:+1:73] applyInPandas require DBR 14.3 LTS or above on Unity Catalog clusters in Shared access mode
3737
df.groupby("id").applyInPandas(subtract_mean, schema="id long, v double").show()
3838

3939

@@ -43,11 +43,11 @@ def eval(self, start: int, end: int):
4343
yield (num, num * num)
4444

4545

46-
# ucx[python-udf-in-shared-clusters:+1:13:+1:69] udtf require DBR 14.3 LTS or above on UC Shared Clusters
46+
# ucx[python-udf-in-shared-clusters:+1:13:+1:69] udtf require DBR 14.3 LTS or above on Unity Catalog clusters in Shared access mode
4747
square_num = udtf(SquareNumbers, returnType="num: int, squared: int")
4848
square_num(lit(1), lit(3)).show()
4949

5050
from pyspark.sql.types import IntegerType
5151

52-
# ucx[python-udf-in-shared-clusters:+1:0:+1:73] Cannot register Java UDF from Python code on UC Shared Clusters. Use a %scala cell to register the Scala UDF using spark.udf.register.
52+
# ucx[python-udf-in-shared-clusters:+1:0:+1:73] Cannot register Java UDF from Python code on Unity Catalog clusters in Shared access mode. Use a %scala cell to register the Scala UDF using spark.udf.register.
5353
spark.udf.registerJavaFunction("func", "org.example.func", IntegerType())

tests/unit/source_code/samples/functional/spark-connect/python-udfs_14_3.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,5 +35,5 @@ def subtract_mean(pdf: pd.DataFrame) -> pd.DataFrame:
3535

3636
from pyspark.sql.types import IntegerType
3737

38-
# ucx[python-udf-in-shared-clusters:+1:0:+1:73] Cannot register Java UDF from Python code on UC Shared Clusters. Use a %scala cell to register the Scala UDF using spark.udf.register.
38+
# ucx[python-udf-in-shared-clusters:+1:0:+1:73] Cannot register Java UDF from Python code on Unity Catalog clusters in Shared access mode. Use a %scala cell to register the Scala UDF using spark.udf.register.
3939
spark.udf.registerJavaFunction("func", "org.example.func", IntegerType())
Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
# ucx[session-state] {"data_security_mode": "USER_ISOLATION"}
22
df = spark.createDataFrame([])
3-
# ucx[rdd-in-shared-clusters:+1:0:+1:27] RDD APIs are not supported on UC Shared Clusters. Use mapInArrow() or Pandas UDFs instead
3+
# ucx[rdd-in-shared-clusters:+1:0:+1:27] RDD APIs are not supported on Unity Catalog clusters in Shared access mode. Use mapInArrow() or Pandas UDFs instead
44
df.rdd.mapPartitions(myUdf)
55

6-
# ucx[rdd-in-shared-clusters:+2:7:+2:32] RDD APIs are not supported on UC Shared Clusters. Rewrite it using DataFrame API
7-
# ucx[legacy-context-in-shared-clusters:+1:7:+1:21] sc is not supported on UC Shared Clusters. Rewrite it using spark
6+
# ucx[rdd-in-shared-clusters:+2:7:+2:32] RDD APIs are not supported on Unity Catalog clusters in Shared access mode. Rewrite it using DataFrame API
7+
# ucx[legacy-context-in-shared-clusters:+1:7:+1:21] sc is not supported on Unity Catalog clusters in Shared access mode. Rewrite it using spark
88
rdd1 = sc.parallelize([1, 2, 3])
99

10-
# ucx[rdd-in-shared-clusters:+2:29:+2:42] RDD APIs are not supported on UC Shared Clusters. Rewrite it using DataFrame API
11-
# ucx[legacy-context-in-shared-clusters:+1:29:+1:40] sc is not supported on UC Shared Clusters. Rewrite it using spark
10+
# ucx[rdd-in-shared-clusters:+2:29:+2:42] RDD APIs are not supported on Unity Catalog clusters in Shared access mode. Rewrite it using DataFrame API
11+
# ucx[legacy-context-in-shared-clusters:+1:29:+1:40] sc is not supported on Unity Catalog clusters in Shared access mode. Rewrite it using spark
1212
rdd2 = spark.createDataFrame(sc.emptyRDD(), schema)
Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,16 @@
11
# ucx[session-state] {"data_security_mode": "USER_ISOLATION"}
2-
# ucx[legacy-context-in-shared-clusters:+2:0:+2:14] sc is not supported on UC Shared Clusters. Rewrite it using spark
3-
# ucx[spark-logging-in-shared-clusters:+1:0:+1:22] Cannot set Spark log level directly from code on UC Shared Clusters. Remove the call and set the cluster spark conf 'spark.log.level' instead
2+
# ucx[legacy-context-in-shared-clusters:+2:0:+2:14] sc is not supported on Unity Catalog clusters in Shared access mode. Rewrite it using spark
3+
# ucx[spark-logging-in-shared-clusters:+1:0:+1:22] Cannot set Spark log level directly from code on Unity Catalog clusters in Shared access mode. Remove the call and set the cluster spark conf 'spark.log.level' instead
44
sc.setLogLevel("INFO")
55
setLogLevel("WARN")
66

7-
# ucx[jvm-access-in-shared-clusters:+3:14:+3:21] Cannot access Spark Driver JVM on UC Shared Clusters
8-
# ucx[legacy-context-in-shared-clusters:+2:14:+2:21] sc is not supported on UC Shared Clusters. Rewrite it using spark
9-
# ucx[spark-logging-in-shared-clusters:+1:14:+1:38] Cannot access Spark Driver JVM logger on UC Shared Clusters. Use logging.getLogger() instead
7+
# ucx[jvm-access-in-shared-clusters:+3:14:+3:21] Cannot access Spark Driver JVM on Unity Catalog clusters in Shared access mode
8+
# ucx[legacy-context-in-shared-clusters:+2:14:+2:21] sc is not supported on Unity Catalog clusters in Shared access mode. Rewrite it using spark
9+
# ucx[spark-logging-in-shared-clusters:+1:14:+1:38] Cannot access Spark Driver JVM logger on Unity Catalog clusters in Shared access mode. Use logging.getLogger() instead
1010
log4jLogger = sc._jvm.org.apache.log4j
1111
LOGGER = log4jLogger.LogManager.getLogger(__name__)
1212

13-
# ucx[jvm-access-in-shared-clusters:+3:0:+3:7] Cannot access Spark Driver JVM on UC Shared Clusters
14-
# ucx[legacy-context-in-shared-clusters:+2:0:+2:7] sc is not supported on UC Shared Clusters. Rewrite it using spark
15-
# ucx[spark-logging-in-shared-clusters:+1:0:+1:24] Cannot access Spark Driver JVM logger on UC Shared Clusters. Use logging.getLogger() instead
13+
# ucx[jvm-access-in-shared-clusters:+3:0:+3:7] Cannot access Spark Driver JVM on Unity Catalog clusters in Shared access mode
14+
# ucx[legacy-context-in-shared-clusters:+2:0:+2:7] sc is not supported on Unity Catalog clusters in Shared access mode. Rewrite it using spark
15+
# ucx[spark-logging-in-shared-clusters:+1:0:+1:24] Cannot access Spark Driver JVM logger on Unity Catalog clusters in Shared access mode. Use logging.getLogger() instead
1616
sc._jvm.org.apache.log4j.LogManager.getLogger(__name__).info("test")

tests/unit/source_code/test_lsp_plugin.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -53,14 +53,14 @@ def test_pylsp_lint(workspace, config):
5353
'range': {'start': {'line': 0, 'character': 0}, 'end': {'line': 0, 'character': 11}},
5454
'code': 'legacy-context-in-shared-clusters',
5555
'source': 'databricks.labs.ucx',
56-
'message': 'sc is not supported on UC Shared Clusters. Rewrite it using spark',
56+
'message': 'sc is not supported on Unity Catalog clusters in Shared access mode. Rewrite it using spark',
5757
'severity': 1,
5858
'tags': [],
5959
},
6060
{
6161
'range': {'end': {'character': 35, 'line': 1}, 'start': {'character': 0, 'line': 1}},
6262
'code': 'python-udf-in-shared-clusters',
63-
'message': 'applyInPandas require DBR 14.3 LTS or above on UC Shared Clusters',
63+
'message': 'applyInPandas require DBR 14.3 LTS or above on Unity Catalog clusters in Shared access mode',
6464
'severity': 1,
6565
'source': 'databricks.labs.ucx',
6666
'tags': [],
@@ -69,7 +69,7 @@ def test_pylsp_lint(workspace, config):
6969
'range': {'start': {'line': 0, 'character': 0}, 'end': {'line': 0, 'character': 13}},
7070
'code': 'rdd-in-shared-clusters',
7171
'source': 'databricks.labs.ucx',
72-
'message': 'RDD APIs are not supported on UC Shared Clusters. Rewrite it using DataFrame API',
72+
'message': 'RDD APIs are not supported on Unity Catalog clusters in Shared access mode. Rewrite it using DataFrame API',
7373
'severity': 1,
7474
'tags': [],
7575
},
@@ -93,15 +93,15 @@ def test_pylsp_lint_no_dbr_version(workspace, config):
9393
'range': {'start': {'line': 0, 'character': 0}, 'end': {'line': 0, 'character': 11}},
9494
'code': 'legacy-context-in-shared-clusters',
9595
'source': 'databricks.labs.ucx',
96-
'message': 'sc is not supported on UC Shared Clusters. Rewrite it using spark',
96+
'message': 'sc is not supported on Unity Catalog clusters in Shared access mode. Rewrite it using spark',
9797
'severity': 1,
9898
'tags': [],
9999
},
100100
{
101101
'range': {'start': {'line': 0, 'character': 0}, 'end': {'line': 0, 'character': 13}},
102102
'code': 'rdd-in-shared-clusters',
103103
'source': 'databricks.labs.ucx',
104-
'message': 'RDD APIs are not supported on UC Shared Clusters. Rewrite it using DataFrame API',
104+
'message': 'RDD APIs are not supported on Unity Catalog clusters in Shared access mode. Rewrite it using DataFrame API',
105105
'severity': 1,
106106
'tags': [],
107107
},

0 commit comments

Comments
 (0)