Skip to content

Commit c955a5d

Browse files
janani-gurrampanrachCopilot
authored
fix(histogram): add NULL handling for histogram (apache#35693)
Co-authored-by: Rachel Pan <r.pan@mail.utoronto.ca> Co-authored-by: Rachel Pan <panrrachel@gmail.com> Co-authored-by: Janani Gurram <68124448+JG-ctrl@users.noreply.github.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
1 parent e6a5616 commit c955a5d

File tree

2 files changed

+72
-0
lines changed

2 files changed

+72
-0
lines changed

superset/utils/pandas_postprocessing/histogram.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,11 @@ def histogram(
4848
if groupby is None:
4949
groupby = []
5050

51+
# drop empty values from the target column
52+
df = df.dropna(subset=[column])
53+
if df.empty:
54+
return df
55+
5156
# convert to numeric, coercing errors to NaN
5257
df[column] = to_numeric(df[column], errors="coerce")
5358

tests/unit_tests/pandas_postprocessing/test_histogram.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,3 +140,70 @@ def test_histogram_with_some_non_numeric_values():
140140
histogram(data_with_non_numeric, "a", ["group"], bins)
141141
except ValueError as e:
142142
assert str(e) == "Column 'group' contains non-numeric values" # noqa: PT017
143+
144+
145+
def test_histogram_with_groupby_and_some_null_values():
146+
data_with_groupby_and_some_nulls = DataFrame(
147+
{
148+
"group": ["A", "A", "B", "B", "A", "A", "B", "B", "A", "A"],
149+
"a": [1, 2, 3, 4, 5, None, 7, 8, 9, 10],
150+
"b": [1, 2, 3, 4, 5, None, 7, 8, 9, 10],
151+
}
152+
)
153+
154+
result = histogram(data_with_groupby_and_some_nulls, "a", ["group"], bins)
155+
assert result.shape == (2, bins + 1)
156+
assert result.columns.tolist() == [
157+
"group",
158+
"1.0 - 2.8",
159+
"2.8 - 4.6",
160+
"4.6 - 6.4",
161+
"6.4 - 8.2",
162+
"8.2 - 10.0",
163+
]
164+
assert result.values.tolist() == [["A", 2, 0, 1, 0, 2], ["B", 0, 2, 0, 2, 0]]
165+
166+
167+
def test_histogram_with_no_groupby_and_some_null_values():
168+
data_with_no_groupby_and_some_nulls = DataFrame(
169+
{
170+
"a": [1, 2, 3, 4, 5, None, 7, 8, 9, 10],
171+
"b": [1, 2, 3, 4, 5, None, 7, 8, 9, 10],
172+
}
173+
)
174+
175+
result = histogram(data_with_no_groupby_and_some_nulls, "a", [], bins)
176+
assert result.shape == (1, bins)
177+
assert result.columns.tolist() == [
178+
"1.0 - 2.8",
179+
"2.8 - 4.6",
180+
"4.6 - 6.4",
181+
"6.4 - 8.2",
182+
"8.2 - 10.0",
183+
]
184+
assert result.values.tolist() == [[2, 2, 1, 2, 2]]
185+
186+
187+
def test_histogram_with_groupby_and_all_null_values():
188+
data_with_groupby_and_all_nulls = DataFrame(
189+
{
190+
"group": ["A", "A", "B", "B", "A", "A", "B", "B", "A", "A"],
191+
"a": [None, None, None, None, None, None, None, None, None, None],
192+
"b": [None, None, None, None, None, None, None, None, None, None],
193+
}
194+
)
195+
196+
result = histogram(data_with_groupby_and_all_nulls, "a", ["group"], bins)
197+
assert result.empty
198+
199+
200+
def test_histogram_with_no_groupby_and_all_null_values():
201+
data_with_no_groupby_and_all_nulls = DataFrame(
202+
{
203+
"a": [None, None, None, None, None, None, None, None, None, None],
204+
"b": [None, None, None, None, None, None, None, None, None, None],
205+
}
206+
)
207+
208+
result = histogram(data_with_no_groupby_and_all_nulls, "a", [], bins)
209+
assert result.empty

0 commit comments

Comments
 (0)