-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun_all_benchmarks.sh
More file actions
executable file
·99 lines (83 loc) · 2.27 KB
/
run_all_benchmarks.sh
File metadata and controls
executable file
·99 lines (83 loc) · 2.27 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#!/bin/bash
# Run SQL + multimodal QA benchmarks in smoke/full mode.
set -e
PYTHON_BIN="./venv/bin/python"
if [ ! -x "$PYTHON_BIN" ]; then
PYTHON_BIN="python3"
fi
MODE=${1:-smoke}
MODEL=${2:-deepseek}
if [ "$MODE" != "smoke" ] && [ "$MODE" != "full" ]; then
echo "Usage: bash run_all_benchmarks.sh [smoke|full] [model]"
exit 1
fi
echo "============================================================"
echo "Unified Benchmark Runner"
echo "Mode: $MODE"
echo "Model: $MODEL"
echo "============================================================"
mkdir -p experiments/results
run_unified_optional() {
local benchmark="$1"
local split="$2"
local output="$3"
local required_dir="$4"
local step="$5"
local total="$6"
local label="$7"
if [ ! -d "$required_dir" ]; then
echo ""
echo "$step/$total Skipping $label (missing directory: $required_dir)"
return 0
fi
echo ""
echo "$step/$total Running $label..."
$PYTHON_BIN experiments/unified_multimodal_experiment.py \
--benchmark "$benchmark" \
--data-root data/multimodal \
--split "$split" \
--mode "$MODE" \
--auto-resolve-without-user \
--model "$MODEL" \
--output "$output"
}
echo ""
echo "1/6 Running Spider text-to-SQL..."
$PYTHON_BIN experiments/full_experiment.py --dataset spider --model "$MODEL" --mode "$MODE"
echo ""
echo "2/6 Running BIRD text-to-SQL..."
$PYTHON_BIN experiments/full_experiment.py --dataset bird --model "$MODEL" --mode "$MODE"
run_unified_optional \
"chartqa" \
"test" \
"experiments/results/unified_chartqa_${MODE}.json" \
"data/multimodal/chartqa" \
"3" \
"6" \
"ChartQA unified QA"
run_unified_optional \
"infographicsvqa" \
"val" \
"experiments/results/unified_infographicsvqa_${MODE}.json" \
"data/multimodal/infographicsvqa" \
"4" \
"6" \
"InfographicsVQA unified QA"
run_unified_optional \
"caesura_artwork" \
"test" \
"experiments/results/unified_caesura_artwork_${MODE}.json" \
"data/multimodal/caesura/artwork" \
"5" \
"6" \
"CAESURA artwork unified QA"
run_unified_optional \
"caesura_rotowire" \
"test" \
"experiments/results/unified_caesura_rotowire_${MODE}.json" \
"data/multimodal/caesura/rotowire" \
"6" \
"6" \
"CAESURA rotowire unified QA"
echo ""
echo "Done. Outputs are under experiments/results/"