Skip to content

Commit 0182ef9

Browse files
committed
Some vx-bench fixes
Signed-off-by: Adam Gutglick <[email protected]>
1 parent eab8dcb commit 0182ef9

File tree

5 files changed

+19
-85
lines changed

5 files changed

+19
-85
lines changed

bench-orchestrator/README.md

Lines changed: 6 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,6 @@ vx-bench compare [options]
6565
- `--base, -b`: Base reference (`engine:format@run`)
6666
- `--target, -t`: Target reference (`engine:format@run`)
6767
- `--threshold`: Significance threshold (default: 0.10 = 10%)
68-
- `--markdown, -m`: Output as GitHub-compatible markdown
6968

7069
### `list` - List Benchmark Runs
7170

@@ -129,7 +128,7 @@ git checkout feature/my-optimization
129128
vx-bench run tpch -e datafusion -f parquet,vortex -l feature
130129

131130
# Compare the runs
132-
vx-bench compare --runs baseline,feature --markdown
131+
vx-bench compare --runs baseline,feature
133132
```
134133

135134
### 2. Quick Regression Check
@@ -172,25 +171,9 @@ vx-bench run tpch \
172171
vx-bench compare \
173172
--base "datafusion:parquet@format-analysis" \
174173
--target "datafusion:vortex@format-analysis" \
175-
--markdown
176174
```
177175

178-
### 5. CI/CD Integration
179-
180-
Generate markdown output for pull request comments:
181-
182-
```bash
183-
# Run benchmarks
184-
vx-bench run tpch -e datafusion -f parquet,vortex -l "pr-$PR_NUMBER"
185-
186-
# Generate comparison for PR comment
187-
vx-bench compare \
188-
--base "datafusion:parquet@main" \
189-
--target "datafusion:parquet@pr-$PR_NUMBER" \
190-
--markdown > benchmark-report.md
191-
```
192-
193-
### 6. Memory Usage Analysis
176+
### 5. Memory Usage Analysis
194177

195178
Track memory usage alongside performance:
196179

@@ -204,7 +187,7 @@ vx-bench run tpch \
204187
vx-bench show memory-profiling
205188
```
206189

207-
### 7. Scale Factor Testing
190+
### 6. Scale Factor Testing
208191

209192
Test performance at different data scales:
210193

@@ -219,7 +202,7 @@ vx-bench run tpch -s 10 -l sf10
219202
vx-bench compare --runs sf1,sf10
220203
```
221204

222-
### 8. Excluding Problematic Queries
205+
### 7. Excluding Problematic Queries
223206

224207
Skip queries that are known to fail or take too long:
225208

@@ -228,7 +211,7 @@ Skip queries that are known to fail or take too long:
228211
vx-bench run tpch --exclude-queries 15,21 -l partial-run
229212
```
230213

231-
### 9. Historical Analysis
214+
### 8. Historical Analysis
232215

233216
Find runs from the past week and compare trends:
234217

@@ -240,7 +223,7 @@ vx-bench list --since "7 days" --benchmark tpch
240223
vx-bench compare --runs <run-id-1>,<run-id-2>
241224
```
242225

243-
### 10. Cleanup Old Results
226+
### 9. Cleanup Old Results
244227

245228
Keep your results directory manageable:
246229

@@ -291,17 +274,6 @@ Default output uses rich formatting with color-coded ratios:
291274
- Red (with down arrow): Regression (>10% slower)
292275
- Yellow: Neutral (within 10%)
293276

294-
### Markdown Output
295-
296-
Use `--markdown` for GitHub-compatible tables suitable for PR comments:
297-
298-
```markdown
299-
| Query | base | target | Ratio |
300-
|-------|------|--------|-------|
301-
| q1 | 1.2s | 0.9s | 0.750x |
302-
| q2 | 2.5s | 2.6s | 1.040x |
303-
```
304-
305277
## Data Storage
306278

307279
Results are stored in `<workspace>/target/vortex-bench/runs/`. Each run creates a directory containing:

bench-orchestrator/bench_orchestrator/cli.py

Lines changed: 10 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ def run(
7676
exclude_list = parse_queries(exclude_queries)
7777

7878
# Build options dict
79-
options = {}
79+
options: dict[str, str] = {}
8080
if scale_factor:
8181
options["scale_factor"] = scale_factor
8282

@@ -170,7 +170,6 @@ def compare(
170170
typer.Option("--runs", "-r", help="Two runs to compare (comma-separated)"),
171171
] = None,
172172
threshold: Annotated[float, typer.Option("--threshold", help="Significance threshold (default 10%)")] = 0.10,
173-
markdown: Annotated[bool, typer.Option("--markdown", "-m", help="Output as markdown")] = False,
174173
) -> None:
175174
"""Compare benchmark results."""
176175
store = ResultStore()
@@ -244,18 +243,13 @@ def compare(
244243

245244
reporter = BenchmarkReporter(comparison, stats, threshold)
246245

247-
if markdown:
248-
console.print(reporter.summary())
249-
console.print()
250-
console.print(reporter.to_markdown(base_label, target_label))
251-
else:
252-
table = reporter.to_rich_table(
253-
title="Benchmark Comparison",
254-
base_label=base_label,
255-
target_label=target_label,
256-
)
257-
console.print(table)
258-
reporter.print_summary()
246+
table = reporter.to_rich_table(
247+
title="Benchmark Comparison",
248+
base_label=base_label,
249+
target_label=target_label,
250+
)
251+
console.print(table)
252+
reporter.print_summary()
259253

260254

261255
@app.command("list")
@@ -288,7 +282,7 @@ def list_runs(
288282
return
289283

290284
table = Table(title="Benchmark Runs")
291-
table.add_column("Run ID", style="cyan")
285+
table.add_column("Run ID", style="cyan", no_wrap=True)
292286
table.add_column("Label", style="green")
293287
table.add_column("Benchmark")
294288
table.add_column("Engines")
@@ -298,7 +292,7 @@ def list_runs(
298292
for run in runs:
299293
status = "[yellow]partial[/yellow]" if run.partial else "[green]complete[/green]"
300294
table.add_row(
301-
run.run_id[:30] + "..." if len(run.run_id) > 30 else run.run_id,
295+
run.run_id,
302296
run.label or "-",
303297
run.benchmark,
304298
", ".join(run.engines),

bench-orchestrator/bench_orchestrator/comparison/reporter.py

Lines changed: 0 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -96,38 +96,6 @@ def to_rich_table(
9696

9797
return table
9898

99-
def to_markdown(
100-
self,
101-
base_label: str = "base",
102-
target_label: str = "target",
103-
) -> str:
104-
"""Generate markdown table (GitHub-compatible)."""
105-
lines = []
106-
107-
# Header
108-
lines.append(f"| Query | {base_label} | {target_label} | Ratio |")
109-
lines.append("|-------|---------|--------|-------|")
110-
111-
for _, row in self.df.iterrows():
112-
name = str(row.get("name", ""))
113-
if "/" in name:
114-
name = name.split("/")[0]
115-
116-
base_val = row.get("value_base", float("nan"))
117-
target_val = row.get("value_target", float("nan"))
118-
ratio = row.get("ratio", float("nan"))
119-
120-
ratio_str = f"{ratio:.3f}x" if not pd.isna(ratio) else "N/A"
121-
if not pd.isna(ratio):
122-
if ratio < (1.0 - self.threshold):
123-
ratio_str += " \U0001f680" # Rocket
124-
elif ratio > (1.0 + self.threshold):
125-
ratio_str += " \U0001f6a8" # Alarm
126-
127-
lines.append(f"| {name} | {_format_time_ns(base_val)} | {_format_time_ns(target_val)} | {ratio_str} |")
128-
129-
return "\n".join(lines)
130-
13199
def summary(self) -> str:
132100
"""Generate summary statistics."""
133101
lines = ["## Summary", ""]

bench-orchestrator/bench_orchestrator/storage/store.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,8 @@ def __exit__(self, exc_type, exc_val, exc_tb) -> None:
8181
# Mark as partial if there was an exception
8282
if exc_type is not None:
8383
self.metadata.partial = True
84-
else:
85-
self.metadata.completed_at = datetime.now()
84+
85+
self.metadata.completed_at = datetime.now()
8686

8787
# Write metadata
8888
with open(self.run_dir / "metadata.json", "w") as f:
@@ -146,7 +146,6 @@ def create_run(self, config: RunConfig, build_config: BuildConfig) -> Iterator[R
146146
env_triple=env_triple,
147147
rustflags=build_config.rustflags,
148148
profile=build_config.profile,
149-
partial=True, # Will be set to False on successful completion
150149
)
151150

152151
ctx = RunContext(run_dir, metadata)

vortex-bench/src/statpopgen/statpopgen_benchmark.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@ impl Benchmark for StatPopGenBenchmark {
129129
Ok(())
130130
}
131131

132+
#[allow(clippy::cast_possible_truncation)]
132133
fn expected_row_counts(&self) -> Option<Vec<usize>> {
133134
let n_rows = self.n_rows as usize;
134135
match self.scale_factor {

0 commit comments

Comments
 (0)