@@ -4,6 +4,7 @@ PYTHONPATH=
44SHELL =/bin/bash
55VENV =.venv
66VENV_BIN =$(VENV ) /bin
7+ NUM_PARTITIONS =10
78
89.venv : # # Set up Python virtual environment and install dependencies
910 python3 -m venv $(VENV )
@@ -33,21 +34,20 @@ pre-commit: fmt ## Run all code quality checks
3334
3435ifndef SCALE_FACTOR
3536
36- data/tables/.generated :
37+ .PHONY : data-tables
38+ data-tables :
3739 @echo " SCALE_FACTOR not set, skipping data table generation"
38- @touch $@
3940
40- data/tables/ :
41+ .PHONY : data-tables-partitioned
42+ data-tables-partitioned :
4143 @echo " SCALE_FACTOR not set, skipping data table generation"
42- @mkdir -p $@
43-
44- data/tables/partitioned/ :
45- @echo " SCALE_FACTOR not set, skipping data table generation"
46- @mkdir -p $@
4744
4845else
4946
50- data/tables/.generated : .venv # # Generate data tables
47+ .PHONY : data-tables
48+ data-tables : data/tables/scale-$(SCALE_FACTOR )
49+
50+ data/tables/scale-$(SCALE_FACTOR ) : .venv # # Generate data tables
5151 # use tpch-cli
5252 mkdir -p " data/tables/scale-$( SCALE_FACTOR) "
5353 $(VENV_BIN ) /tpchgen-cli --output-dir=" data/tables/scale-$( SCALE_FACTOR) " --format=tbl -s $(SCALE_FACTOR )
@@ -60,24 +60,23 @@ data/tables/.generated: .venv ## Generate data tables
6060 # mv tpch-dbgen/*.tbl data/tables/scale-$(SCALE_FACTOR)/
6161 # $(VENV_BIN)/python -m scripts.prepare_data --num-parts=1 --tpch_gen_folder="data/tables/scale-$(SCALE_FACTOR)"
6262 rm -rf data/tables/scale-$(SCALE_FACTOR)/*.tbl
63- touch $@
6463
65- data/tables/ : data/ tables/.generated
66- @true
64+ .PHONY : data- tables-partitioned
65+ data-tables-partitioned : data/tables/scale- $( SCALE_FACTOR ) /${NUM_PARTITIONS}
6766
68- data/tables/partitioned/ : .venv # # Generate partitioned data tables (these are not yet runnable with current repo)
67+ data/tables/scale- $( SCALE_FACTOR ) /${NUM_PARTITIONS} : .venv # # Generate partitioned data tables (these are not yet runnable with current repo)
6968 $(MAKE ) -C tpch-dbgen dbgen
70- $(VENV_BIN ) /python -m scripts.prepare_data --num-parts=10 --tpch_gen_folder=" data/tables/scale-$( SCALE_FACTOR) "
69+ $(VENV_BIN ) /python -m scripts.prepare_data --num-parts=${NUM_PARTITIONS} --tpch_gen_folder=" data/tables/scale-$( SCALE_FACTOR) "
7170
7271
7372endif
7473
7574.PHONY : run-polars
76- run-polars : .venv data/ tables/.generated # # Run Polars benchmarks
75+ run-polars : .venv data- tables # # Run Polars benchmarks
7776 $(VENV_BIN ) /python -m queries.polars
7877
7978.PHONY : run-polars-no-env
80- run-polars-no-env : data/ tables/ # # Run Polars benchmarks
79+ run-polars-no-env : data- tables # # Run Polars benchmarks
8180 $(MAKE ) -C tpch-dbgen dbgen
8281 cd tpch-dbgen && ./dbgen -f -s $(SCALE_FACTOR ) && cd ..
8382 mkdir -p " data/tables/scale-$( SCALE_FACTOR) "
@@ -91,23 +90,23 @@ run-polars-gpu-no-env: run-polars-no-env data/tables/ ## Run Polars CPU and GPU
9190 RUN_POLARS_GPU=true CUDA_MODULE_LOADING=EAGER python -m queries.polars
9291
9392.PHONY : run-duckdb
94- run-duckdb : .venv data/ tables/.generated # # Run DuckDB benchmarks
93+ run-duckdb : .venv data- tables # # Run DuckDB benchmarks
9594 $(VENV_BIN ) /python -m queries.duckdb
9695
9796.PHONY : run-pandas
98- run-pandas : .venv data/ tables/.generated # # Run pandas benchmarks
97+ run-pandas : .venv data- tables # # Run pandas benchmarks
9998 $(VENV_BIN ) /python -m queries.pandas
10099
101100.PHONY : run-pyspark
102- run-pyspark : .venv data/ tables/.generated # # Run PySpark benchmarks
101+ run-pyspark : .venv data- tables # # Run PySpark benchmarks
103102 $(VENV_BIN ) /python -m queries.pyspark
104103
105104.PHONY : run-dask
106- run-dask : .venv data/ tables/.generated # # Run Dask benchmarks
105+ run-dask : .venv data- tables # # Run Dask benchmarks
107106 $(VENV_BIN ) /python -m queries.dask
108107
109108.PHONY : run-modin
110- run-modin : .venv data/ tables/.generated # # Run Modin benchmarks
109+ run-modin : .venv data- tables # # Run Modin benchmarks
111110 $(VENV_BIN ) /python -m queries.modin
112111
113112.PHONY : run-all
0 commit comments