Skip to content

Commit 9831552

Browse files
xinrong-mengHyukjinKwon
authored andcommitted
[SPARK-52047][PYTHON] Raise PySparkValueError for unsupported plot kinds
### What changes were proposed in this pull request? Raise PySparkValueError for unsupported plot kinds ### Why are the changes needed? Previously, unsupported plot kinds raised a raw KeyError, which was unclear. This change adds explicit validation and raises a descriptive PySparkValueError, improving user experience and debugging clarity. ### Does this PR introduce _any_ user-facing change? Yes, when passing an invalid kind to DataFrame.plot, users now receive a clear PySparkValueError instead of a generic KeyError. ### How was this patch tested? Unit tests. ### Was this patch authored or co-authored using generative AI tooling? No. Closes apache#50837 from xinrong-meng/plot_test. Authored-by: Xinrong Meng <[email protected]> Signed-off-by: Hyukjin Kwon <[email protected]>
1 parent 1cec856 commit 9831552

File tree

3 files changed

+40
-1
lines changed

3 files changed

+40
-1
lines changed

python/pyspark/errors/error-conditions.json

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1155,6 +1155,11 @@
11551155
"`<backend>` does not support `<param>` set to <value>, it should be one of the values from <supported_values>"
11561156
]
11571157
},
1158+
"UNSUPPORTED_PLOT_KIND": {
1159+
"message": [
1160+
"`<plot_type>` is not supported, it should be one of the values from <supported_plot_types>"
1161+
]
1162+
},
11581163
"UNSUPPORTED_SIGNATURE": {
11591164
"message": [
11601165
"Unsupported signature: <signature>."

python/pyspark/sql/plot/plotly.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,19 @@ def plot_pyspark(data: "DataFrame", kind: str, **kwargs: Any) -> "Figure":
4343
return plot_kde(data, **kwargs)
4444
if kind == "hist":
4545
return plot_histogram(data, **kwargs)
46+
if kind not in PySparkPlotAccessor.plot_data_map:
47+
raise PySparkValueError(
48+
errorClass="UNSUPPORTED_PLOT_KIND",
49+
messageParameters={
50+
"plot_type": kind,
51+
"supported_plot_types": ", ".join(
52+
sorted(
53+
list(PySparkPlotAccessor.plot_data_map.keys())
54+
+ ["pie", "box", "kde", "density", "hist"]
55+
)
56+
),
57+
},
58+
)
4659

4760
return plotly.plot(PySparkPlotAccessor.plot_data_map[kind](data), kind, **kwargs)
4861

python/pyspark/sql/tests/plot/test_frame_plot.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,30 @@ def test_backend(self):
4848
messageParameters={"backend": "matplotlib", "supported_backends": "plotly"},
4949
)
5050

51+
def test_unsupported_plot_kind(self):
52+
from pyspark.sql.plot.core import PySparkPlotAccessor
53+
54+
data = [Row(a=i, b=i + 1, c=i + 2, d=i + 3) for i in range(2000)]
55+
sdf = self.spark.createDataFrame(data)
56+
with self.assertRaises(PySparkValueError) as pe:
57+
sdf.plot(kind="bubble")
58+
59+
self.check_error(
60+
exception=pe.exception,
61+
errorClass="UNSUPPORTED_PLOT_KIND",
62+
messageParameters={
63+
"plot_type": "bubble",
64+
"supported_plot_types": ", ".join(
65+
sorted(
66+
list(PySparkPlotAccessor.plot_data_map.keys())
67+
+ ["pie", "box", "kde", "density", "hist"]
68+
)
69+
),
70+
},
71+
)
72+
5173
def test_topn_max_rows(self):
5274
with self.sql_conf({"spark.sql.pyspark.plotting.max_rows": "1000"}):
53-
self.spark.conf.set("spark.sql.pyspark.plotting.max_rows", "1000")
5475
sdf = self.spark.range(2500)
5576
pdf = PySparkTopNPlotBase().get_top_n(sdf)
5677
self.assertEqual(len(pdf), 1000)

0 commit comments

Comments
 (0)