Skip to content

Commit eb7000b

Browse files
committed
update deps, more tests
1 parent 64e46c1 commit eb7000b

File tree

4 files changed

+37
-7
lines changed

4 files changed

+37
-7
lines changed

examples/tips.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,4 +52,4 @@
5252
)
5353

5454
ray_results = ray_ctx.plan(df.execution_plan())
55-
df_ctx.create_dataframe([[ray_results]]).show()
55+
df_ctx.create_dataframe([ray_results]).show()

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,8 @@ classifiers = [
2828
"Programming Language :: Python :: Implementation :: PyPy",
2929
]
3030
dependencies = [
31-
"datafusion>=42.0.0",
32-
"pyarrow>=11.0.0",
31+
"datafusion>=43.0.0",
32+
"pyarrow>=18.0.0",
3333
"typing-extensions;python_version<'3.13'",
3434
]
3535

requirements-in.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@ isort
44
maturin
55
mypy
66
numpy
7-
pyarrow
7+
pyarrow>=18.0.0
88
pytest
99
ray==2.37.0
10-
datafusion>=42.0.0
10+
datafusion>=43.0.0
1111
toml
1212
importlib_metadata; python_version < "3.8"

tests/test_context.py

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
# under the License.
1717

1818
from datafusion_ray.context import DatafusionRayContext
19-
from datafusion import SessionContext
19+
from datafusion import SessionContext, SessionConfig, RuntimeConfig, col, lit, functions as F
2020

2121

2222
def test_basic_query_succeed():
@@ -27,7 +27,7 @@ def test_basic_query_succeed():
2727
record_batch = ctx.sql("SELECT * FROM tips")
2828
assert record_batch.num_rows == 244
2929

30-
def test_aggregate():
30+
def test_aggregate_csv():
3131
df_ctx = SessionContext()
3232
ctx = DatafusionRayContext(df_ctx)
3333
df_ctx.register_csv("tips", "examples/tips.csv", has_header=True)
@@ -39,6 +39,36 @@ def test_aggregate():
3939
num_rows += record_batch.num_rows
4040
assert num_rows == 4
4141

42+
def test_aggregate_parquet():
43+
runtime = RuntimeConfig()
44+
config = SessionConfig().set('datafusion.execution.parquet.schema_force_view_types', 'true')
45+
df_ctx = SessionContext(config, runtime)
46+
ctx = DatafusionRayContext(df_ctx)
47+
df_ctx.register_parquet("tips", "examples/tips.parquet")
48+
record_batches = ctx.sql("select sex, smoker, avg(tip/total_bill) as tip_pct from tips group by sex, smoker")
49+
assert isinstance(record_batches, list)
50+
# TODO why does this return many empty batches?
51+
num_rows = 0
52+
for record_batch in record_batches:
53+
num_rows += record_batch.num_rows
54+
assert num_rows == 4
55+
56+
def test_aggregate_parquet_dataframe():
57+
df_ctx = SessionContext()
58+
ray_ctx = DatafusionRayContext(df_ctx)
59+
df = df_ctx.read_parquet(f"examples/tips.parquet")
60+
df = (
61+
df.aggregate(
62+
[col("sex"), col("smoker"), col("day"), col("time")],
63+
[F.avg(col("tip") / col("total_bill")).alias("tip_pct")],
64+
)
65+
.filter(col("day") != lit("Dinner"))
66+
.aggregate([col("sex"), col("smoker")], [F.avg(col("tip_pct")).alias("avg_pct")])
67+
)
68+
ray_results = ray_ctx.plan(df.execution_plan())
69+
df_ctx.create_dataframe([ray_results]).show()
70+
71+
4272
def test_no_result_query():
4373
df_ctx = SessionContext()
4474
ctx = DatafusionRayContext(df_ctx)

0 commit comments

Comments
 (0)