Skip to content

Commit 766e2ed

Browse files
authored
Run ruff format in CI (#837)
* Run ruff format in CI * Add --check parameter * Apply ruff format
1 parent 22c70ef commit 766e2ed

File tree

7 files changed

+251
-153
lines changed

7 files changed

+251
-153
lines changed

.github/workflows/build.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,9 @@ jobs:
3838
pip install ruff
3939
# Update output format to enable automatic inline annotations.
4040
- name: Run Ruff
41-
run: ruff check --output-format=github python/
41+
run: |
42+
ruff check --output-format=github python/
43+
ruff format --check python/
4244
4345
generate-license:
4446
runs-on: ubuntu-latest

python/datafusion/functions.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1479,12 +1479,17 @@ def approx_percentile_cont(
14791479
"""Returns the value that is approximately at a given percentile of ``expr``."""
14801480
if num_centroids is None:
14811481
return Expr(
1482-
f.approx_percentile_cont(expression.expr, percentile.expr, distinct=distinct, num_centroids=None)
1482+
f.approx_percentile_cont(
1483+
expression.expr, percentile.expr, distinct=distinct, num_centroids=None
1484+
)
14831485
)
14841486

14851487
return Expr(
14861488
f.approx_percentile_cont(
1487-
expression.expr, percentile.expr, distinct=distinct, num_centroids=num_centroids.expr
1489+
expression.expr,
1490+
percentile.expr,
1491+
distinct=distinct,
1492+
num_centroids=num_centroids.expr,
14881493
)
14891494
)
14901495

python/datafusion/tests/test_aggregation.py

Lines changed: 62 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -39,56 +39,74 @@ def df():
3939
)
4040
return ctx.create_dataframe([[batch]])
4141

42+
4243
@pytest.fixture
4344
def df_aggregate_100():
4445
ctx = SessionContext()
4546
ctx.register_csv("aggregate_test_data", "./testing/data/csv/aggregate_test_100.csv")
4647
return ctx.table("aggregate_test_data")
4748

4849

49-
@pytest.mark.parametrize("agg_expr, calc_expected", [
50-
(f.avg(column("a")), lambda a, b, c, d: np.array(np.average(a))),
51-
(f.corr(column("a"), column("b")), lambda a, b, c, d: np.array(np.corrcoef(a, b)[0][1])),
52-
(f.count(column("a")), lambda a, b, c, d: pa.array([len(a)])),
53-
# Sample (co)variance -> ddof=1
54-
# Population (co)variance -> ddof=0
55-
(f.covar(column("a"), column("b")), lambda a, b, c, d: np.array(np.cov(a, b, ddof=1)[0][1])),
56-
(f.covar_pop(column("a"), column("c")), lambda a, b, c, d: np.array(np.cov(a, c, ddof=0)[0][1])),
57-
(f.covar_samp(column("b"), column("c")), lambda a, b, c, d: np.array(np.cov(b, c, ddof=1)[0][1])),
58-
# f.grouping(col_a), # No physical plan implemented yet
59-
(f.max(column("a")), lambda a, b, c, d: np.array(np.max(a))),
60-
(f.mean(column("b")), lambda a, b, c, d: np.array(np.mean(b))),
61-
(f.median(column("b")), lambda a, b, c, d: np.array(np.median(b))),
62-
(f.min(column("a")), lambda a, b, c, d: np.array(np.min(a))),
63-
(f.sum(column("b")), lambda a, b, c, d: np.array(np.sum(b.to_pylist()))),
64-
# Sample stdev -> ddof=1
65-
# Population stdev -> ddof=0
66-
(f.stddev(column("a")), lambda a, b, c, d: np.array(np.std(a, ddof=1))),
67-
(f.stddev_pop(column("b")), lambda a, b, c, d: np.array(np.std(b, ddof=0))),
68-
(f.stddev_samp(column("c")), lambda a, b, c, d: np.array(np.std(c, ddof=1))),
69-
(f.var(column("a")), lambda a, b, c, d: np.array(np.var(a, ddof=1))),
70-
(f.var_pop(column("b")), lambda a, b, c, d: np.array(np.var(b, ddof=0))),
71-
(f.var_samp(column("c")), lambda a, b, c, d: np.array(np.var(c, ddof=1))),
72-
])
50+
@pytest.mark.parametrize(
51+
"agg_expr, calc_expected",
52+
[
53+
(f.avg(column("a")), lambda a, b, c, d: np.array(np.average(a))),
54+
(
55+
f.corr(column("a"), column("b")),
56+
lambda a, b, c, d: np.array(np.corrcoef(a, b)[0][1]),
57+
),
58+
(f.count(column("a")), lambda a, b, c, d: pa.array([len(a)])),
59+
# Sample (co)variance -> ddof=1
60+
# Population (co)variance -> ddof=0
61+
(
62+
f.covar(column("a"), column("b")),
63+
lambda a, b, c, d: np.array(np.cov(a, b, ddof=1)[0][1]),
64+
),
65+
(
66+
f.covar_pop(column("a"), column("c")),
67+
lambda a, b, c, d: np.array(np.cov(a, c, ddof=0)[0][1]),
68+
),
69+
(
70+
f.covar_samp(column("b"), column("c")),
71+
lambda a, b, c, d: np.array(np.cov(b, c, ddof=1)[0][1]),
72+
),
73+
# f.grouping(col_a), # No physical plan implemented yet
74+
(f.max(column("a")), lambda a, b, c, d: np.array(np.max(a))),
75+
(f.mean(column("b")), lambda a, b, c, d: np.array(np.mean(b))),
76+
(f.median(column("b")), lambda a, b, c, d: np.array(np.median(b))),
77+
(f.min(column("a")), lambda a, b, c, d: np.array(np.min(a))),
78+
(f.sum(column("b")), lambda a, b, c, d: np.array(np.sum(b.to_pylist()))),
79+
# Sample stdev -> ddof=1
80+
# Population stdev -> ddof=0
81+
(f.stddev(column("a")), lambda a, b, c, d: np.array(np.std(a, ddof=1))),
82+
(f.stddev_pop(column("b")), lambda a, b, c, d: np.array(np.std(b, ddof=0))),
83+
(f.stddev_samp(column("c")), lambda a, b, c, d: np.array(np.std(c, ddof=1))),
84+
(f.var(column("a")), lambda a, b, c, d: np.array(np.var(a, ddof=1))),
85+
(f.var_pop(column("b")), lambda a, b, c, d: np.array(np.var(b, ddof=0))),
86+
(f.var_samp(column("c")), lambda a, b, c, d: np.array(np.var(c, ddof=1))),
87+
],
88+
)
7389
def test_aggregation_stats(df, agg_expr, calc_expected):
74-
7590
agg_df = df.aggregate([], [agg_expr])
7691
result = agg_df.collect()[0]
7792
values_a, values_b, values_c, values_d = df.collect()[0]
7893
expected = calc_expected(values_a, values_b, values_c, values_d)
7994
np.testing.assert_array_almost_equal(result.column(0), expected)
8095

8196

82-
@pytest.mark.parametrize("agg_expr, expected", [
83-
(f.approx_distinct(column("b")), pa.array([2], type=pa.uint64())),
84-
(f.approx_median(column("b")), pa.array([4])),
85-
(f.approx_percentile_cont(column("b"), lit(0.5)), pa.array([4])),
86-
(
87-
f.approx_percentile_cont_with_weight(column("b"), lit(0.6), lit(0.5)),
88-
pa.array([6], type=pa.float64())
89-
),
90-
(f.array_agg(column("b")), pa.array([[4, 4, 6]])),
91-
])
97+
@pytest.mark.parametrize(
98+
"agg_expr, expected",
99+
[
100+
(f.approx_distinct(column("b")), pa.array([2], type=pa.uint64())),
101+
(f.approx_median(column("b")), pa.array([4])),
102+
(f.approx_percentile_cont(column("b"), lit(0.5)), pa.array([4])),
103+
(
104+
f.approx_percentile_cont_with_weight(column("b"), lit(0.6), lit(0.5)),
105+
pa.array([6], type=pa.float64()),
106+
),
107+
(f.array_agg(column("b")), pa.array([[4, 4, 6]])),
108+
],
109+
)
92110
def test_aggregation(df, agg_expr, expected):
93111
agg_df = df.aggregate([], [agg_expr])
94112
result = agg_df.collect()[0]
@@ -98,20 +116,21 @@ def test_aggregation(df, agg_expr, expected):
98116
def test_aggregate_100(df_aggregate_100):
99117
# https://github.com/apache/datafusion/blob/bddb6415a50746d2803dd908d19c3758952d74f9/datafusion/sqllogictest/test_files/aggregate.slt#L1490-L1498
100118

101-
result = df_aggregate_100.aggregate(
102-
[
103-
column("c1")
104-
],
105-
[
106-
f.approx_percentile_cont(column("c3"), lit(0.95), lit(200)).alias("c3")
107-
]
108-
).sort(column("c1").sort(ascending=True)).collect()
119+
result = (
120+
df_aggregate_100.aggregate(
121+
[column("c1")],
122+
[f.approx_percentile_cont(column("c3"), lit(0.95), lit(200)).alias("c3")],
123+
)
124+
.sort(column("c1").sort(ascending=True))
125+
.collect()
126+
)
109127

110128
assert len(result) == 1
111129
result = result[0]
112130
assert result.column("c1") == pa.array(["a", "b", "c", "d", "e"])
113131
assert result.column("c3") == pa.array([73, 68, 122, 124, 115])
114132

133+
115134
def test_bit_add_or_xor(df):
116135
df = df.aggregate(
117136
[],

python/datafusion/tests/test_dataframe.py

Lines changed: 50 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -279,57 +279,67 @@ def test_distinct():
279279

280280

281281
data_test_window_functions = [
282-
("row", f.window("row_number", [], order_by=[f.order_by(column("c"))]), [2, 1, 3]),
283-
("rank", f.window("rank", [], order_by=[f.order_by(column("c"))]), [2, 1, 2]),
284-
("dense_rank", f.window("dense_rank", [], order_by=[f.order_by(column("c"))]), [2, 1, 2] ),
285-
("percent_rank", f.window("percent_rank", [], order_by=[f.order_by(column("c"))]), [0.5, 0, 0.5]),
286-
("cume_dist", f.window("cume_dist", [], order_by=[f.order_by(column("b"))]), [0.3333333333333333, 0.6666666666666666, 1.0]),
287-
("ntile", f.window("ntile", [literal(2)], order_by=[f.order_by(column("c"))]), [1, 1, 2]),
288-
("next", f.window("lead", [column("b")], order_by=[f.order_by(column("b"))]), [5, 6, None]),
289-
("previous", f.window("lag", [column("b")], order_by=[f.order_by(column("b"))]), [None, 4, 5]),
290-
pytest.param(
291-
"first_value",
292-
f.window(
282+
("row", f.window("row_number", [], order_by=[f.order_by(column("c"))]), [2, 1, 3]),
283+
("rank", f.window("rank", [], order_by=[f.order_by(column("c"))]), [2, 1, 2]),
284+
(
285+
"dense_rank",
286+
f.window("dense_rank", [], order_by=[f.order_by(column("c"))]),
287+
[2, 1, 2],
288+
),
289+
(
290+
"percent_rank",
291+
f.window("percent_rank", [], order_by=[f.order_by(column("c"))]),
292+
[0.5, 0, 0.5],
293+
),
294+
(
295+
"cume_dist",
296+
f.window("cume_dist", [], order_by=[f.order_by(column("b"))]),
297+
[0.3333333333333333, 0.6666666666666666, 1.0],
298+
),
299+
(
300+
"ntile",
301+
f.window("ntile", [literal(2)], order_by=[f.order_by(column("c"))]),
302+
[1, 1, 2],
303+
),
304+
(
305+
"next",
306+
f.window("lead", [column("b")], order_by=[f.order_by(column("b"))]),
307+
[5, 6, None],
308+
),
309+
(
310+
"previous",
311+
f.window("lag", [column("b")], order_by=[f.order_by(column("b"))]),
312+
[None, 4, 5],
313+
),
314+
pytest.param(
293315
"first_value",
294-
[column("a")],
295-
order_by=[f.order_by(column("b"))]
316+
f.window("first_value", [column("a")], order_by=[f.order_by(column("b"))]),
317+
[1, 1, 1],
318+
),
319+
pytest.param(
320+
"last_value",
321+
f.window("last_value", [column("b")], order_by=[f.order_by(column("b"))]),
322+
[4, 5, 6],
296323
),
297-
[1, 1, 1],
298-
),
299-
pytest.param(
300-
"last_value",
301-
f.window("last_value", [column("b")], order_by=[f.order_by(column("b"))]),
302-
[4, 5, 6],
303-
),
304-
pytest.param(
305-
"2nd_value",
306-
f.window(
307-
"nth_value",
308-
[column("b"), literal(2)],
309-
order_by=[f.order_by(column("b"))],
324+
pytest.param(
325+
"2nd_value",
326+
f.window(
327+
"nth_value",
328+
[column("b"), literal(2)],
329+
order_by=[f.order_by(column("b"))],
330+
),
331+
[None, 5, 5],
310332
),
311-
[None, 5, 5],
312-
),
313333
]
314334

315335

316336
@pytest.mark.parametrize("name,expr,result", data_test_window_functions)
317337
def test_window_functions(df, name, expr, result):
318-
df = df.select(
319-
column("a"),
320-
column("b"),
321-
column("c"),
322-
f.alias(expr, name)
323-
)
338+
df = df.select(column("a"), column("b"), column("c"), f.alias(expr, name))
324339

325340
table = pa.Table.from_batches(df.collect())
326341

327-
expected = {
328-
"a": [1, 2, 3],
329-
"b": [4, 5, 6],
330-
"c": [8, 5, 8],
331-
name: result
332-
}
342+
expected = {"a": [1, 2, 3], "b": [4, 5, 6], "c": [8, 5, 8], name: result}
333343

334344
assert table.sort_by("a").to_pydict() == expected
335345

python/datafusion/tests/test_expr.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -146,24 +146,26 @@ def test_expr_to_variant():
146146
from datafusion import SessionContext
147147
from datafusion.expr import Filter
148148

149-
150149
def traverse_logical_plan(plan):
151150
cur_node = plan.to_variant()
152151
if isinstance(cur_node, Filter):
153152
return cur_node.predicate().to_variant()
154-
if hasattr(plan, 'inputs'):
153+
if hasattr(plan, "inputs"):
155154
for input_plan in plan.inputs():
156155
res = traverse_logical_plan(input_plan)
157156
if res is not None:
158157
return res
159158

160159
ctx = SessionContext()
161-
data = {'id': [1, 2, 3], 'name': ['Alice', 'Bob', 'Charlie']}
162-
ctx.from_pydict(data, name='table1')
160+
data = {"id": [1, 2, 3], "name": ["Alice", "Bob", "Charlie"]}
161+
ctx.from_pydict(data, name="table1")
163162
query = "SELECT * FROM table1 t1 WHERE t1.name IN ('dfa', 'ad', 'dfre', 'vsa')"
164163
logical_plan = ctx.sql(query).optimized_logical_plan()
165164
variant = traverse_logical_plan(logical_plan)
166165
assert variant is not None
167-
assert variant.expr().to_variant().qualified_name() == 'table1.name'
168-
assert str(variant.list()) == '[Expr(Utf8("dfa")), Expr(Utf8("ad")), Expr(Utf8("dfre")), Expr(Utf8("vsa"))]'
166+
assert variant.expr().to_variant().qualified_name() == "table1.name"
167+
assert (
168+
str(variant.list())
169+
== '[Expr(Utf8("dfa")), Expr(Utf8("ad")), Expr(Utf8("dfre")), Expr(Utf8("vsa"))]'
170+
)
169171
assert not variant.negated()

0 commit comments

Comments
 (0)