Skip to content

Commit 05ff09e

Browse files
committed
Enabled ruff rule ICN001
1 parent 17c184a commit 05ff09e

File tree

6 files changed

+101
-105
lines changed

6 files changed

+101
-105
lines changed

benchmarks/db-benchmark/groupby-datafusion.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
import timeit
2121

2222
import datafusion as df
23-
import pyarrow
23+
import pyarrow as pa
2424
from datafusion import (
2525
RuntimeEnvBuilder,
2626
SessionConfig,
@@ -68,14 +68,14 @@ def execute(df) -> list:
6868
src_grp = os.path.join("data", data_name + ".csv")
6969
print("loading dataset %s" % src_grp, flush=True)
7070

71-
schema = pyarrow.schema(
71+
schema = pa.schema(
7272
[
73-
("id4", pyarrow.int32()),
74-
("id5", pyarrow.int32()),
75-
("id6", pyarrow.int32()),
76-
("v1", pyarrow.int32()),
77-
("v2", pyarrow.int32()),
78-
("v3", pyarrow.float64()),
73+
("id4", pa.int32()),
74+
("id5", pa.int32()),
75+
("id6", pa.int32()),
76+
("v1", pa.int32()),
77+
("v2", pa.int32()),
78+
("v3", pa.float64()),
7979
]
8080
)
8181

examples/python-udaf.py

Lines changed: 15 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
# under the License.
1717

1818
import datafusion
19-
import pyarrow
19+
import pyarrow as pa
2020
import pyarrow.compute
2121
from datafusion import Accumulator, col, udaf
2222

@@ -27,47 +27,43 @@ class MyAccumulator(Accumulator):
2727
"""
2828

2929
def __init__(self) -> None:
30-
self._sum = pyarrow.scalar(0.0)
30+
self._sum = pa.scalar(0.0)
3131

32-
def update(self, values: pyarrow.Array) -> None:
32+
def update(self, values: pa.Array) -> None:
3333
# not nice since pyarrow scalars can't be summed yet. This breaks on `None`
34-
self._sum = pyarrow.scalar(
35-
self._sum.as_py() + pyarrow.compute.sum(values).as_py()
36-
)
34+
self._sum = pa.scalar(self._sum.as_py() + pa.compute.sum(values).as_py())
3735

38-
def merge(self, states: pyarrow.Array) -> None:
36+
def merge(self, states: pa.Array) -> None:
3937
# not nice since pyarrow scalars can't be summed yet. This breaks on `None`
40-
self._sum = pyarrow.scalar(
41-
self._sum.as_py() + pyarrow.compute.sum(states).as_py()
42-
)
38+
self._sum = pa.scalar(self._sum.as_py() + pa.compute.sum(states).as_py())
4339

44-
def state(self) -> pyarrow.Array:
45-
return pyarrow.array([self._sum.as_py()])
40+
def state(self) -> pa.Array:
41+
return pa.array([self._sum.as_py()])
4642

47-
def evaluate(self) -> pyarrow.Scalar:
43+
def evaluate(self) -> pa.Scalar:
4844
return self._sum
4945

5046

5147
# create a context
5248
ctx = datafusion.SessionContext()
5349

5450
# create a RecordBatch and a new DataFrame from it
55-
batch = pyarrow.RecordBatch.from_arrays(
56-
[pyarrow.array([1, 2, 3]), pyarrow.array([4, 5, 6])],
51+
batch = pa.RecordBatch.from_arrays(
52+
[pa.array([1, 2, 3]), pa.array([4, 5, 6])],
5753
names=["a", "b"],
5854
)
5955
df = ctx.create_dataframe([[batch]])
6056

6157
my_udaf = udaf(
6258
MyAccumulator,
63-
pyarrow.float64(),
64-
pyarrow.float64(),
65-
[pyarrow.float64()],
59+
pa.float64(),
60+
pa.float64(),
61+
[pa.float64()],
6662
"stable",
6763
)
6864

6965
df = df.aggregate([], [my_udaf(col("a"))])
7066

7167
result = df.collect()[0]
7268

73-
assert result.column(0) == pyarrow.array([6.0])
69+
assert result.column(0) == pa.array([6.0])

examples/python-udf.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,23 +15,23 @@
1515
# specific language governing permissions and limitations
1616
# under the License.
1717

18-
import pyarrow
18+
import pyarrow as pa
1919
from datafusion import SessionContext, udf
2020
from datafusion import functions as f
2121

2222

23-
def is_null(array: pyarrow.Array) -> pyarrow.Array:
23+
def is_null(array: pa.Array) -> pa.Array:
2424
return array.is_null()
2525

2626

27-
is_null_arr = udf(is_null, [pyarrow.int64()], pyarrow.bool_(), "stable")
27+
is_null_arr = udf(is_null, [pa.int64()], pa.bool_(), "stable")
2828

2929
# create a context
3030
ctx = SessionContext()
3131

3232
# create a RecordBatch and a new DataFrame from it
33-
batch = pyarrow.RecordBatch.from_arrays(
34-
[pyarrow.array([1, 2, 3]), pyarrow.array([4, 5, 6])],
33+
batch = pa.RecordBatch.from_arrays(
34+
[pa.array([1, 2, 3]), pa.array([4, 5, 6])],
3535
names=["a", "b"],
3636
)
3737
df = ctx.create_dataframe([[batch]])
@@ -40,4 +40,4 @@ def is_null(array: pyarrow.Array) -> pyarrow.Array:
4040

4141
result = df.collect()[0]
4242

43-
assert result.column(0) == pyarrow.array([False] * 3)
43+
assert result.column(0) == pa.array([False] * 3)

examples/query-pyarrow-data.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,15 @@
1616
# under the License.
1717

1818
import datafusion
19-
import pyarrow
19+
import pyarrow as pa
2020
from datafusion import col
2121

2222
# create a context
2323
ctx = datafusion.SessionContext()
2424

2525
# create a RecordBatch and a new DataFrame from it
26-
batch = pyarrow.RecordBatch.from_arrays(
27-
[pyarrow.array([1, 2, 3]), pyarrow.array([4, 5, 6])],
26+
batch = pa.RecordBatch.from_arrays(
27+
[pa.array([1, 2, 3]), pa.array([4, 5, 6])],
2828
names=["a", "b"],
2929
)
3030
df = ctx.create_dataframe([[batch]])
@@ -38,5 +38,5 @@
3838
# execute and collect the first (and only) batch
3939
result = df.collect()[0]
4040

41-
assert result.column(0) == pyarrow.array([5, 7, 9])
42-
assert result.column(1) == pyarrow.array([-3, -3, -3])
41+
assert result.column(0) == pa.array([5, 7, 9])
42+
assert result.column(1) == pa.array([-3, -3, -3])

examples/tpch/convert_data_to_parquet.py

Lines changed: 65 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -25,95 +25,95 @@
2525
import os
2626

2727
import datafusion
28-
import pyarrow
28+
import pyarrow as pa
2929

3030
ctx = datafusion.SessionContext()
3131

3232
all_schemas = {}
3333

3434
all_schemas["customer"] = [
35-
("C_CUSTKEY", pyarrow.int64()),
36-
("C_NAME", pyarrow.string()),
37-
("C_ADDRESS", pyarrow.string()),
38-
("C_NATIONKEY", pyarrow.int64()),
39-
("C_PHONE", pyarrow.string()),
40-
("C_ACCTBAL", pyarrow.decimal128(15, 2)),
41-
("C_MKTSEGMENT", pyarrow.string()),
42-
("C_COMMENT", pyarrow.string()),
35+
("C_CUSTKEY", pa.int64()),
36+
("C_NAME", pa.string()),
37+
("C_ADDRESS", pa.string()),
38+
("C_NATIONKEY", pa.int64()),
39+
("C_PHONE", pa.string()),
40+
("C_ACCTBAL", pa.decimal128(15, 2)),
41+
("C_MKTSEGMENT", pa.string()),
42+
("C_COMMENT", pa.string()),
4343
]
4444

4545
all_schemas["lineitem"] = [
46-
("L_ORDERKEY", pyarrow.int64()),
47-
("L_PARTKEY", pyarrow.int64()),
48-
("L_SUPPKEY", pyarrow.int64()),
49-
("L_LINENUMBER", pyarrow.int32()),
50-
("L_QUANTITY", pyarrow.decimal128(15, 2)),
51-
("L_EXTENDEDPRICE", pyarrow.decimal128(15, 2)),
52-
("L_DISCOUNT", pyarrow.decimal128(15, 2)),
53-
("L_TAX", pyarrow.decimal128(15, 2)),
54-
("L_RETURNFLAG", pyarrow.string()),
55-
("L_LINESTATUS", pyarrow.string()),
56-
("L_SHIPDATE", pyarrow.date32()),
57-
("L_COMMITDATE", pyarrow.date32()),
58-
("L_RECEIPTDATE", pyarrow.date32()),
59-
("L_SHIPINSTRUCT", pyarrow.string()),
60-
("L_SHIPMODE", pyarrow.string()),
61-
("L_COMMENT", pyarrow.string()),
46+
("L_ORDERKEY", pa.int64()),
47+
("L_PARTKEY", pa.int64()),
48+
("L_SUPPKEY", pa.int64()),
49+
("L_LINENUMBER", pa.int32()),
50+
("L_QUANTITY", pa.decimal128(15, 2)),
51+
("L_EXTENDEDPRICE", pa.decimal128(15, 2)),
52+
("L_DISCOUNT", pa.decimal128(15, 2)),
53+
("L_TAX", pa.decimal128(15, 2)),
54+
("L_RETURNFLAG", pa.string()),
55+
("L_LINESTATUS", pa.string()),
56+
("L_SHIPDATE", pa.date32()),
57+
("L_COMMITDATE", pa.date32()),
58+
("L_RECEIPTDATE", pa.date32()),
59+
("L_SHIPINSTRUCT", pa.string()),
60+
("L_SHIPMODE", pa.string()),
61+
("L_COMMENT", pa.string()),
6262
]
6363

6464
all_schemas["nation"] = [
65-
("N_NATIONKEY", pyarrow.int64()),
66-
("N_NAME", pyarrow.string()),
67-
("N_REGIONKEY", pyarrow.int64()),
68-
("N_COMMENT", pyarrow.string()),
65+
("N_NATIONKEY", pa.int64()),
66+
("N_NAME", pa.string()),
67+
("N_REGIONKEY", pa.int64()),
68+
("N_COMMENT", pa.string()),
6969
]
7070

7171
all_schemas["orders"] = [
72-
("O_ORDERKEY", pyarrow.int64()),
73-
("O_CUSTKEY", pyarrow.int64()),
74-
("O_ORDERSTATUS", pyarrow.string()),
75-
("O_TOTALPRICE", pyarrow.decimal128(15, 2)),
76-
("O_ORDERDATE", pyarrow.date32()),
77-
("O_ORDERPRIORITY", pyarrow.string()),
78-
("O_CLERK", pyarrow.string()),
79-
("O_SHIPPRIORITY", pyarrow.int32()),
80-
("O_COMMENT", pyarrow.string()),
72+
("O_ORDERKEY", pa.int64()),
73+
("O_CUSTKEY", pa.int64()),
74+
("O_ORDERSTATUS", pa.string()),
75+
("O_TOTALPRICE", pa.decimal128(15, 2)),
76+
("O_ORDERDATE", pa.date32()),
77+
("O_ORDERPRIORITY", pa.string()),
78+
("O_CLERK", pa.string()),
79+
("O_SHIPPRIORITY", pa.int32()),
80+
("O_COMMENT", pa.string()),
8181
]
8282

8383
all_schemas["part"] = [
84-
("P_PARTKEY", pyarrow.int64()),
85-
("P_NAME", pyarrow.string()),
86-
("P_MFGR", pyarrow.string()),
87-
("P_BRAND", pyarrow.string()),
88-
("P_TYPE", pyarrow.string()),
89-
("P_SIZE", pyarrow.int32()),
90-
("P_CONTAINER", pyarrow.string()),
91-
("P_RETAILPRICE", pyarrow.decimal128(15, 2)),
92-
("P_COMMENT", pyarrow.string()),
84+
("P_PARTKEY", pa.int64()),
85+
("P_NAME", pa.string()),
86+
("P_MFGR", pa.string()),
87+
("P_BRAND", pa.string()),
88+
("P_TYPE", pa.string()),
89+
("P_SIZE", pa.int32()),
90+
("P_CONTAINER", pa.string()),
91+
("P_RETAILPRICE", pa.decimal128(15, 2)),
92+
("P_COMMENT", pa.string()),
9393
]
9494

9595
all_schemas["partsupp"] = [
96-
("PS_PARTKEY", pyarrow.int64()),
97-
("PS_SUPPKEY", pyarrow.int64()),
98-
("PS_AVAILQTY", pyarrow.int32()),
99-
("PS_SUPPLYCOST", pyarrow.decimal128(15, 2)),
100-
("PS_COMMENT", pyarrow.string()),
96+
("PS_PARTKEY", pa.int64()),
97+
("PS_SUPPKEY", pa.int64()),
98+
("PS_AVAILQTY", pa.int32()),
99+
("PS_SUPPLYCOST", pa.decimal128(15, 2)),
100+
("PS_COMMENT", pa.string()),
101101
]
102102

103103
all_schemas["region"] = [
104-
("r_REGIONKEY", pyarrow.int64()),
105-
("r_NAME", pyarrow.string()),
106-
("r_COMMENT", pyarrow.string()),
104+
("r_REGIONKEY", pa.int64()),
105+
("r_NAME", pa.string()),
106+
("r_COMMENT", pa.string()),
107107
]
108108

109109
all_schemas["supplier"] = [
110-
("S_SUPPKEY", pyarrow.int64()),
111-
("S_NAME", pyarrow.string()),
112-
("S_ADDRESS", pyarrow.string()),
113-
("S_NATIONKEY", pyarrow.int32()),
114-
("S_PHONE", pyarrow.string()),
115-
("S_ACCTBAL", pyarrow.decimal128(15, 2)),
116-
("S_COMMENT", pyarrow.string()),
110+
("S_SUPPKEY", pa.int64()),
111+
("S_NAME", pa.string()),
112+
("S_ADDRESS", pa.string()),
113+
("S_NATIONKEY", pa.int32()),
114+
("S_PHONE", pa.string()),
115+
("S_ACCTBAL", pa.decimal128(15, 2)),
116+
("S_COMMENT", pa.string()),
117117
]
118118

119119
curr_dir = os.path.dirname(os.path.abspath(__file__))
@@ -125,12 +125,12 @@
125125
# in to handle the trailing | in the file
126126
output_cols = [r[0] for r in curr_schema]
127127

128-
curr_schema = [pyarrow.field(r[0], r[1], nullable=False) for r in curr_schema]
128+
curr_schema = [pa.field(r[0], r[1], nullable=False) for r in curr_schema]
129129

130130
# Trailing | requires extra field for in processing
131-
curr_schema.append(("some_null", pyarrow.null()))
131+
curr_schema.append(("some_null", pa.null()))
132132

133-
schema = pyarrow.schema(curr_schema)
133+
schema = pa.schema(curr_schema)
134134

135135
source_file = os.path.abspath(
136136
os.path.join(curr_dir, f"../../benchmarks/tpch/data/{filename}.csv")

python/datafusion/catalog.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
import datafusion._internal as df_internal
2525

2626
if TYPE_CHECKING:
27-
import pyarrow
27+
import pyarrow as pa
2828

2929

3030
class Catalog:
@@ -67,7 +67,7 @@ def __init__(self, table: df_internal.Table) -> None:
6767
self.table = table
6868

6969
@property
70-
def schema(self) -> pyarrow.Schema:
70+
def schema(self) -> pa.Schema:
7171
"""Returns the schema associated with this table."""
7272
return self.table.schema
7373

0 commit comments

Comments
 (0)