Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
9c4a375
Run python tests on all currently supported python versions
timsaucer Mar 9, 2025
99dcb4f
Update ruff checks to select all
timsaucer Mar 9, 2025
5306d1e
Ruff auto fix
timsaucer Mar 9, 2025
d66a946
Applying ruff suggestions
timsaucer Mar 9, 2025
d87e794
noqa rules updates per ruff checks
timsaucer Mar 9, 2025
2216d21
Working through more ruff suggestions
timsaucer Mar 9, 2025
bd041f8
Working through more ruff suggestions
timsaucer Mar 9, 2025
7b5642e
update timestamps on tests
timsaucer Mar 9, 2025
ee2e488
More ruff updates
timsaucer Mar 9, 2025
f4e1754
More ruff updates
timsaucer Mar 9, 2025
8c9b6d8
Instead of importing udf static functions as variables, import
timsaucer Mar 9, 2025
f67a727
More ruff formatting suggestions
timsaucer Mar 9, 2025
5d2e384
more ruff formatting suggestions
timsaucer Mar 10, 2025
412eef0
More ruff formatting
timsaucer Mar 10, 2025
76fda6f
More ruff formatting
timsaucer Mar 10, 2025
591425f
Cut off lint errors for this PR
timsaucer Mar 10, 2025
bd18e40
Working through more ruff checks and disabling a bunch for now
timsaucer Mar 10, 2025
6102da5
Address CI difference from local ruff
timsaucer Mar 10, 2025
5f3f7c7
UDWF isn't a proper abstract base class right now since users can opt…
timsaucer Mar 10, 2025
96ad9ff
Update pre-commit to match the version of ruff used in CI
timsaucer Mar 10, 2025
9b6d627
To enable testing in python 3.9 we need numpy. Also going to the curr…
timsaucer Mar 10, 2025
099baaa
Update min requried version of python to 3.9 in pyproject.toml. The o…
timsaucer Mar 10, 2025
a33bc21
Suppress UP035
timsaucer Mar 10, 2025
a52b330
ruff format
timsaucer Mar 10, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,11 @@ jobs:
fail-fast: false
matrix:
python-version:
- "3.9"
- "3.10"
- "3.11"
- "3.12"
- "3.13"
toolchain:
- "stable"

Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ repos:
- id: actionlint-docker
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.3.0
rev: v0.9.10
hooks:
# Run the linter.
- id: ruff
Expand Down
14 changes: 7 additions & 7 deletions benchmarks/tpch/tpch.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,13 @@ def bench(data_path, query_path):
end = time.time()
time_millis = (end - start) * 1000
total_time_millis += time_millis
print("setup,{}".format(round(time_millis, 1)))
results.write("setup,{}\n".format(round(time_millis, 1)))
print(f"setup,{round(time_millis, 1)}")
results.write(f"setup,{round(time_millis, 1)}\n")
results.flush()

# run queries
for query in range(1, 23):
with open("{}/q{}.sql".format(query_path, query)) as f:
with open(f"{query_path}/q{query}.sql") as f:
text = f.read()
tmp = text.split(";")
queries = []
Expand All @@ -83,14 +83,14 @@ def bench(data_path, query_path):
end = time.time()
time_millis = (end - start) * 1000
total_time_millis += time_millis
print("q{},{}".format(query, round(time_millis, 1)))
results.write("q{},{}\n".format(query, round(time_millis, 1)))
print(f"q{query},{round(time_millis, 1)}")
results.write(f"q{query},{round(time_millis, 1)}\n")
results.flush()
except Exception as e:
print("query", query, "failed", e)

print("total,{}".format(round(total_time_millis, 1)))
results.write("total,{}\n".format(round(total_time_millis, 1)))
print(f"total,{round(total_time_millis, 1)}")
results.write(f"total,{round(total_time_millis, 1)}\n")


if __name__ == "__main__":
Expand Down
2 changes: 1 addition & 1 deletion dev/release/check-rat-report.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
exclude_globs_filename = sys.argv[1]
xml_filename = sys.argv[2]

globs = [line.strip() for line in open(exclude_globs_filename, "r")]
globs = [line.strip() for line in open(exclude_globs_filename)]

tree = ET.parse(xml_filename)
root = tree.getroot()
Expand Down
10 changes: 3 additions & 7 deletions dev/release/generate-changelog.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,11 @@

def print_pulls(repo_name, title, pulls):
if len(pulls) > 0:
print("**{}:**".format(title))
print(f"**{title}:**")
print()
for pull, commit in pulls:
url = "https://github.com/{}/pull/{}".format(repo_name, pull.number)
print(
"- {} [#{}]({}) ({})".format(
pull.title, pull.number, url, commit.author.login
)
)
url = f"https://github.com/{repo_name}/pull/{pull.number}"
print(f"- {pull.title} [#{pull.number}]({url}) ({commit.author.login})")
print()


Expand Down
2 changes: 1 addition & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@
autoapi_python_class_content = "both"


def autoapi_skip_member_fn(app, what, name, obj, skip, options):
def autoapi_skip_member_fn(app, what, name, obj, skip, options): # noqa: ARG001
skip_contents = [
# Re-exports
("class", "datafusion.DataFrame"),
Expand Down
2 changes: 1 addition & 1 deletion examples/python-udwf.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def __init__(self, alpha: float) -> None:
def supports_bounded_execution(self) -> bool:
return True

def get_range(self, idx: int, num_rows: int) -> tuple[int, int]:
def get_range(self, idx: int, num_rows: int) -> tuple[int, int]: # noqa: ARG002
# Override the default range of current row since uses_window_frame is False
# So for the purpose of this test we just smooth from the previous row to
# current.
Expand Down
15 changes: 6 additions & 9 deletions examples/tpch/_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,28 +27,25 @@
def df_selection(col_name, col_type):
if col_type == pa.float64() or isinstance(col_type, pa.Decimal128Type):
return F.round(col(col_name), lit(2)).alias(col_name)
elif col_type == pa.string() or col_type == pa.string_view():
if col_type == pa.string() or col_type == pa.string_view():
return F.trim(col(col_name)).alias(col_name)
else:
return col(col_name)
return col(col_name)


def load_schema(col_name, col_type):
if col_type == pa.int64() or col_type == pa.int32():
return col_name, pa.string()
elif isinstance(col_type, pa.Decimal128Type):
if isinstance(col_type, pa.Decimal128Type):
return col_name, pa.float64()
else:
return col_name, col_type
return col_name, col_type


def expected_selection(col_name, col_type):
if col_type == pa.int64() or col_type == pa.int32():
return F.trim(col(col_name)).cast(col_type).alias(col_name)
elif col_type == pa.string() or col_type == pa.string_view():
if col_type == pa.string() or col_type == pa.string_view():
return F.trim(col(col_name)).alias(col_name)
else:
return col(col_name)
return col(col_name)


def selections_and_schema(original_schema):
Expand Down
79 changes: 71 additions & 8 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ name = "datafusion"
description = "Build and run queries against data"
readme = "README.md"
license = { file = "LICENSE.txt" }
requires-python = ">=3.8"
requires-python = ">=3.9"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we split out this python version bump?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know why the diff doesn't show, but that's already merged into main

keywords = ["datafusion", "dataframe", "rust", "query-engine"]
classifiers = [
"Development Status :: 2 - Pre-Alpha",
Expand All @@ -35,7 +35,6 @@ classifiers = [
"Operating System :: Microsoft :: Windows",
"Operating System :: POSIX :: Linux",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
Expand Down Expand Up @@ -66,7 +65,57 @@ features = ["substrait"]

# Enable docstring linting using the google style guide
[tool.ruff.lint]
select = ["E4", "E7", "E9", "F", "FA", "D", "W", "I"]
select = ["ALL" ]
ignore = [
"A001", # Allow using words like min as variable names
"A002", # Allow using words like filter as variable names
"ANN401", # Allow Any for wrapper classes
"COM812", # Recommended to ignore these rules when using with ruff-format
"FIX002", # Allow TODO lines - consider removing at some point
"FBT001", # Allow boolean positional args
"FBT002", # Allow boolean positional args
"ISC001", # Recommended to ignore these rules when using with ruff-format
"SLF001", # Allow accessing private members
"TD002",
"TD003", # Allow TODO lines
"UP007", # Disallowing Union is pedantic
# TODO: Enable all of the following, but this PR is getting too large already
"PT001",
"ANN204",
"B008",
"EM101",
"PLR0913",
"PLR1714",
"ANN201",
"C400",
"TRY003",
"B904",
"UP006",
"RUF012",
"FBT003",
"C416",
"SIM102",
"PGH003",
"PLR2004",
"PERF401",
"PD901",
"EM102",
"ERA001",
"SIM108",
"ICN001",
"ANN001",
"ANN202",
"PTH",
"N812",
"INP001",
"DTZ007",
"PLW2901",
"RET503",
"RUF015",
"A005",
"TC001",
"UP035",
]

[tool.ruff.lint.pydocstyle]
convention = "google"
Expand All @@ -76,16 +125,30 @@ max-doc-length = 88

# Disable docstring checking for these directories
[tool.ruff.lint.per-file-ignores]
"python/tests/*" = ["D"]
"examples/*" = ["D", "W505"]
"dev/*" = ["D"]
"benchmarks/*" = ["D", "F"]
"python/tests/*" = [
"ANN",
"ARG",
"BLE001",
"D",
"S101",
"SLF",
"PD",
"PLR2004",
"PT011",
"RUF015",
"S608",
"PLR0913",
"PT004",
]
"examples/*" = ["D", "W505", "E501", "T201", "S101"]
"dev/*" = ["D", "E", "T", "S", "PLR", "C", "SIM", "UP", "EXE", "N817"]
"benchmarks/*" = ["D", "F", "T", "BLE", "FURB", "PLR", "E", "TD", "TRY", "S", "SIM", "EXE", "UP"]
"docs/*" = ["D"]

[dependency-groups]
dev = [
"maturin>=1.8.1",
"numpy>1.24.4 ; python_full_version >= '3.10'",
"numpy>1.25.0",
"pytest>=7.4.4",
"ruff>=0.9.1",
"toml>=0.10.2",
Expand Down
50 changes: 23 additions & 27 deletions python/datafusion/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,44 +48,47 @@
from .io import read_avro, read_csv, read_json, read_parquet
from .plan import ExecutionPlan, LogicalPlan
from .record_batch import RecordBatch, RecordBatchStream
from .udf import Accumulator, AggregateUDF, ScalarUDF, WindowUDF
from .udf import Accumulator, AggregateUDF, ScalarUDF, WindowUDF, udaf, udf, udwf

__version__ = importlib_metadata.version(__name__)

__all__ = [
"Accumulator",
"AggregateUDF",
"Catalog",
"Config",
"DataFrame",
"SessionContext",
"SessionConfig",
"SQLOptions",
"RuntimeEnvBuilder",
"Expr",
"ScalarUDF",
"WindowFrame",
"column",
"col",
"literal",
"lit",
"DFSchema",
"Catalog",
"DataFrame",
"Database",
"Table",
"AggregateUDF",
"WindowUDF",
"LogicalPlan",
"ExecutionPlan",
"Expr",
"LogicalPlan",
"RecordBatch",
"RecordBatchStream",
"RuntimeEnvBuilder",
"SQLOptions",
"ScalarUDF",
"SessionConfig",
"SessionContext",
"Table",
"WindowFrame",
"WindowUDF",
"col",
"column",
"common",
"expr",
"functions",
"lit",
"literal",
"object_store",
"substrait",
"read_parquet",
"read_avro",
"read_csv",
"read_json",
"read_parquet",
"substrait",
"udaf",
"udf",
"udwf",
]


Expand Down Expand Up @@ -120,10 +123,3 @@ def str_lit(value):
def lit(value):
"""Create a literal expression."""
return Expr.literal(value)


udf = ScalarUDF.udf

udaf = AggregateUDF.udaf

udwf = WindowUDF.udwf
14 changes: 7 additions & 7 deletions python/datafusion/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

from ._internal import common as common_internal

# TODO these should all have proper wrapper classes
# TODO: these should all have proper wrapper classes

DFSchema = common_internal.DFSchema
DataType = common_internal.DataType
Expand All @@ -38,15 +38,15 @@
"DFSchema",
"DataType",
"DataTypeMap",
"RexType",
"PythonType",
"SqlType",
"NullTreatment",
"SqlTable",
"PythonType",
"RexType",
"SqlFunction",
"SqlSchema",
"SqlView",
"SqlStatistics",
"SqlFunction",
"SqlTable",
"SqlType",
"SqlView",
]


Expand Down
4 changes: 1 addition & 3 deletions python/datafusion/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,8 +393,6 @@ def with_temp_file_path(self, path: str | pathlib.Path) -> RuntimeEnvBuilder:
class RuntimeConfig(RuntimeEnvBuilder):
"""See `RuntimeEnvBuilder`."""

pass


class SQLOptions:
"""Options to be used when performing SQL queries."""
Expand Down Expand Up @@ -498,7 +496,7 @@ def __init__(

self.ctx = SessionContextInternal(config, runtime)

def enable_url_table(self) -> "SessionContext":
def enable_url_table(self) -> SessionContext:
"""Control if local files can be queried as tables.

Returns:
Expand Down
Loading