Skip to content

Commit 6d28c0c

Browse files
committed
chore: extract comparison tool from fuzzer
1 parent ce2a5b2 commit 6d28c0c

File tree

1 file changed

+6
-1
lines changed

1 file changed

+6
-1
lines changed

dev/benchmarks/tpcbench.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,12 @@ def main(benchmark: str, data_path: str, query_path: str, iterations: int, outpu
111111
# coming across for running DDL stmt
112112
if len(df.columns) > 0:
113113
output_path = f"{write_path}/q{query}"
114-
dedup_columns(df).coalesce(1).write.mode("overwrite").parquet(output_path)
114+
# sort by all columns to have predictable output dataset for comparison
115+
df_sorted = df.orderBy(*df.columns)
116+
# rename same column names for output
117+
# output doesn't allow non unique column names
118+
# a, a, b, b => a, a_1, b, b_1
119+
dedup_columns(df_sorted).coalesce(1).write.mode("overwrite").parquet(output_path)
115120
print(f"Query {query} results written to {output_path}")
116121
else:
117122
print(f"Skipping write: DataFrame has no schema for {output_path}")

0 commit comments

Comments
 (0)