Skip to content

Commit fbe39cb

Browse files
committed
Expand integration test runner and add validate.sh for thorough validations
1 parent 44d2a21 commit fbe39cb

File tree

2 files changed

+137
-16
lines changed

2 files changed

+137
-16
lines changed

integration-tests/run

Lines changed: 110 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,36 +1,130 @@
11
#!/bin/sh
22
set -e
3+
4+
# Default values
5+
target_dir="integration-tests"
6+
overwrite=0
7+
8+
# Parse arguments
9+
while [ "$#" -gt 0 ]; do
10+
case "$1" in
11+
--overwrite)
12+
overwrite=1
13+
;;
14+
*)
15+
target_dir="$1"
16+
;;
17+
esac
18+
shift
19+
done
20+
21+
322
judge="$PWD"
4-
for test in integration-tests/*; do
5-
[ -d "$test" ] || continue
6-
[ -n "$1" -a "$1" != "$test" ] && continue
7-
allow_compilation_warnings="$(jq -r '.allow_compilation_warnings == true' $test/config.json)"
8-
filename="$(jq -r '.filename' "$test/config.json")"
9-
lang="$(jq -r '.natural_language' "$test/config.json")"
10-
11-
printf 'Testing %s ... ' "${test##*/}"
23+
exact_match=0
24+
description_changed=0
25+
minor_change=0
26+
major_change=0
27+
28+
# Find all config.json files in target directory
29+
while read config; do
30+
test_dir="$(dirname "$config")"
31+
32+
# Extract config values
33+
filename="$(jq -r '.evaluation.filename // .filename' "$config")"
34+
allow_compilation_warnings="$(jq -r '.allow_compilation_warnings == true' "$config")"
35+
lang="$(jq -r '.natural_language // "en"' "$config")"
36+
37+
# Determine submission source and result file
38+
submission_source="$test_dir/submission.java"
39+
config_source="$test_dir/solution/$filename"
40+
if [ -f "$submission_source" ]; then
41+
source="$submission_source"
42+
elif [ -f "$config_source" ]; then
43+
source="$config_source"
44+
else
45+
echo "Warning: No submission.java or $filename found in $test_dir. Skipping." >&2
46+
continue
47+
fi
48+
49+
result_file="$test_dir/result.json"
50+
51+
echo -en "Testing ${test_dir} ...\t"
52+
1253
workdir="$(mktemp -d)"
1354
cd "$workdir"
1455

15-
[ -d "$judge/$test/workdir" ] && find "$judge/$test/workdir" -mindepth 1 -maxdepth 1 -exec cp -r \{\} . \;
16-
echo '{ "resources": "'"$judge/$test/evaluation/"'"
56+
# Copy workdir contents if they exist
57+
if [ -d "$judge/$test_dir/workdir" ]; then
58+
find "$judge/$test_dir/workdir" -mindepth 1 -maxdepth 1 -exec cp -r \{\} . \;
59+
fi
60+
61+
# Run the judge
62+
output=$(echo '{ "resources": "'"$judge/$test_dir/evaluation/"'"
1763
, "judge": "'"$judge"'"
1864
, "natural_language": "'"$lang"'"
1965
, "workdir": "'"$workdir"'"
2066
, "allow_compilation_warnings": "'"$allow_compilation_warnings"'"
2167
, "filename": "'"$filename"'"
2268
, "time_limit": 30
2369
, "memory_limit": 1000000000
24-
, "source": "'"$judge/$test/submission.java"'"
70+
, "source": "'"$judge/$source"'"
2571
}' \
26-
| "$judge"/run \
72+
| (timeout -k 10s 60s "$judge/run" 2> /dev/null) \
2773
| jq --sort-keys 'if(.command == "append-message")
2874
then .message.description |= gsub("\n at [^\n]+\\([^)]+\\)"; "")
2975
else .
30-
end' \
31-
| diff "$judge/$test/result.json" -
76+
end'
77+
)
3278

3379
cd "$judge"
3480
rm -r "$workdir"
35-
echo passed
36-
done
81+
82+
# Count accepted and failed in output
83+
accepted_output="$(echo "$output" | grep '"accepted": true' | wc -l)"
84+
failed_output="$(echo "$output" | grep '"accepted": false' | wc -l)"
85+
echo -en "$accepted_output accepted, $failed_output failed\t"
86+
87+
88+
89+
if [ -f "$result_file" ]; then
90+
# Count accepted and failed in result
91+
accepted_result="$(grep '"accepted": true' "$result_file" | wc -l)"
92+
failed_result="$(grep '"accepted": false' "$result_file" | wc -l)"
93+
94+
95+
# First check if files are exact using diff, then check if they differ except '.description', then check they differ in accepted or failed
96+
if diff "$result_file" <(echo "$output") > /dev/null; then
97+
echo "[EXACT MATCH]"
98+
exact_match=$((exact_match + 1))
99+
elif diff <(jq "del(.description)" "$result_file") <(echo "$output" | jq "del(.description)") > /dev/null; then
100+
echo "[DESCRIPTION CHANGED]"
101+
description_changed=$((description_changed + 1))
102+
elif [ "$accepted_output" -eq "$accepted_result" ] && [ "$failed_output" -eq "$failed_result" ]; then
103+
echo "[MINOR CHANGE] (accepted/failed unchanged)"
104+
minor_change=$((minor_change + 1))
105+
else
106+
echo "[MAJOR CHANGE] (accepted/failed changed, was $accepted_result/$failed_result)"
107+
major_change=$((major_change + 1))
108+
fi
109+
fi
110+
111+
# Check or overwrite results
112+
if [ "$overwrite" -eq 1 ]; then
113+
echo "$output" > "$result_file"
114+
echo "[OVERWRITE]"
115+
fi
116+
done < <(find "$target_dir" -name "config.json")
117+
118+
# Report results
119+
echo
120+
echo "=== RESULTS ==="
121+
echo "Exact matches: $exact_match"
122+
echo "Description changed: $description_changed"
123+
echo "Minor changes: $minor_change"
124+
echo "Major changes: $major_change"
125+
126+
# Exit with error on minor or major changes
127+
if [ "$minor_change" -gt 0 ] || [ "$major_change" -gt 0 ]; then
128+
echo "FAIL. Run with --overwrite to update the results."
129+
exit 1
130+
fi

validate.sh

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
#!/bin/sh
2+
set -e
3+
4+
# Usage: ./validate.sh <path-to-repo> [--overwrite]
5+
6+
if [ -z "$1" ]; then
7+
echo "Usage: $0 <path-to-repo> [--overwrite]"
8+
exit 1
9+
fi
10+
11+
repo_path="$1"
12+
shift
13+
14+
# Check if repo path exists
15+
if [ ! -d "$repo_path" ]; then
16+
echo "Error: Directory '$repo_path' does not exist."
17+
exit 1
18+
fi
19+
20+
# Determine if overwrite flag is passed
21+
overwrite=""
22+
if [ "$1" = "--overwrite" ]; then
23+
overwrite="--overwrite"
24+
fi
25+
26+
# Run the integration test runner with the repo path
27+
./integration-tests/run "$repo_path" $overwrite

0 commit comments

Comments
 (0)