Skip to content

Commit 629193c

Browse files
authored
Improve data generation in Makefile (#165)
1 parent 1c2d356 commit 629193c

File tree

1 file changed

+20
-21
lines changed

1 file changed

+20
-21
lines changed

Makefile

Lines changed: 20 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ PYTHONPATH=
44
SHELL=/bin/bash
55
VENV=.venv
66
VENV_BIN=$(VENV)/bin
7+
NUM_PARTITIONS=10
78

89
.venv: ## Set up Python virtual environment and install dependencies
910
python3 -m venv $(VENV)
@@ -33,21 +34,20 @@ pre-commit: fmt ## Run all code quality checks
3334

3435
ifndef SCALE_FACTOR
3536

36-
data/tables/.generated:
37+
.PHONY: data-tables
38+
data-tables:
3739
@echo "SCALE_FACTOR not set, skipping data table generation"
38-
@touch $@
3940

40-
data/tables/:
41+
.PHONY: data-tables-partitioned
42+
data-tables-partitioned:
4143
@echo "SCALE_FACTOR not set, skipping data table generation"
42-
@mkdir -p $@
43-
44-
data/tables/partitioned/:
45-
@echo "SCALE_FACTOR not set, skipping data table generation"
46-
@mkdir -p $@
4744

4845
else
4946

50-
data/tables/.generated: .venv ## Generate data tables
47+
.PHONY: data-tables
48+
data-tables: data/tables/scale-$(SCALE_FACTOR)
49+
50+
data/tables/scale-$(SCALE_FACTOR): .venv ## Generate data tables
5151
# use tpch-cli
5252
mkdir -p "data/tables/scale-$(SCALE_FACTOR)"
5353
$(VENV_BIN)/tpchgen-cli --output-dir="data/tables/scale-$(SCALE_FACTOR)" --format=tbl -s $(SCALE_FACTOR)
@@ -60,24 +60,23 @@ data/tables/.generated: .venv ## Generate data tables
6060
# mv tpch-dbgen/*.tbl data/tables/scale-$(SCALE_FACTOR)/
6161
# $(VENV_BIN)/python -m scripts.prepare_data --num-parts=1 --tpch_gen_folder="data/tables/scale-$(SCALE_FACTOR)"
6262
rm -rf data/tables/scale-$(SCALE_FACTOR)/*.tbl
63-
touch $@
6463

65-
data/tables/: data/tables/.generated
66-
@true
64+
.PHONY: data-tables-partitioned
65+
data-tables-partitioned: data/tables/scale-$(SCALE_FACTOR)/${NUM_PARTITIONS}
6766

68-
data/tables/partitioned/: .venv ## Generate partitioned data tables (these are not yet runnable with current repo)
67+
data/tables/scale-$(SCALE_FACTOR)/${NUM_PARTITIONS}: .venv ## Generate partitioned data tables (these are not yet runnable with current repo)
6968
$(MAKE) -C tpch-dbgen dbgen
70-
$(VENV_BIN)/python -m scripts.prepare_data --num-parts=10 --tpch_gen_folder="data/tables/scale-$(SCALE_FACTOR)"
69+
$(VENV_BIN)/python -m scripts.prepare_data --num-parts=${NUM_PARTITIONS} --tpch_gen_folder="data/tables/scale-$(SCALE_FACTOR)"
7170

7271

7372
endif
7473

7574
.PHONY: run-polars
76-
run-polars: .venv data/tables/.generated ## Run Polars benchmarks
75+
run-polars: .venv data-tables ## Run Polars benchmarks
7776
$(VENV_BIN)/python -m queries.polars
7877

7978
.PHONY: run-polars-no-env
80-
run-polars-no-env: data/tables/ ## Run Polars benchmarks
79+
run-polars-no-env: data-tables ## Run Polars benchmarks
8180
$(MAKE) -C tpch-dbgen dbgen
8281
cd tpch-dbgen && ./dbgen -f -s $(SCALE_FACTOR) && cd ..
8382
mkdir -p "data/tables/scale-$(SCALE_FACTOR)"
@@ -91,23 +90,23 @@ run-polars-gpu-no-env: run-polars-no-env data/tables/ ## Run Polars CPU and GPU
9190
RUN_POLARS_GPU=true CUDA_MODULE_LOADING=EAGER python -m queries.polars
9291

9392
.PHONY: run-duckdb
94-
run-duckdb: .venv data/tables/.generated ## Run DuckDB benchmarks
93+
run-duckdb: .venv data-tables ## Run DuckDB benchmarks
9594
$(VENV_BIN)/python -m queries.duckdb
9695

9796
.PHONY: run-pandas
98-
run-pandas: .venv data/tables/.generated ## Run pandas benchmarks
97+
run-pandas: .venv data-tables ## Run pandas benchmarks
9998
$(VENV_BIN)/python -m queries.pandas
10099

101100
.PHONY: run-pyspark
102-
run-pyspark: .venv data/tables/.generated ## Run PySpark benchmarks
101+
run-pyspark: .venv data-tables ## Run PySpark benchmarks
103102
$(VENV_BIN)/python -m queries.pyspark
104103

105104
.PHONY: run-dask
106-
run-dask: .venv data/tables/.generated ## Run Dask benchmarks
105+
run-dask: .venv data-tables ## Run Dask benchmarks
107106
$(VENV_BIN)/python -m queries.dask
108107

109108
.PHONY: run-modin
110-
run-modin: .venv data/tables/.generated ## Run Modin benchmarks
109+
run-modin: .venv data-tables ## Run Modin benchmarks
111110
$(VENV_BIN)/python -m queries.modin
112111

113112
.PHONY: run-all

0 commit comments

Comments
 (0)