|
1 | 1 | #!/bin/sh |
2 | 2 | set -e |
| 3 | + |
| 4 | +# Default values |
| 5 | +target_dir="integration-tests" |
| 6 | +overwrite=0 |
| 7 | + |
| 8 | +# Parse arguments |
| 9 | +while [ "$#" -gt 0 ]; do |
| 10 | + case "$1" in |
| 11 | + --overwrite) |
| 12 | + overwrite=1 |
| 13 | + ;; |
| 14 | + *) |
| 15 | + target_dir="$1" |
| 16 | + ;; |
| 17 | + esac |
| 18 | + shift |
| 19 | +done |
| 20 | + |
| 21 | + |
3 | 22 | judge="$PWD" |
4 | | -for test in integration-tests/*; do |
5 | | - [ -d "$test" ] || continue |
6 | | - [ -n "$1" -a "$1" != "$test" ] && continue |
7 | | - allow_compilation_warnings="$(jq -r '.allow_compilation_warnings == true' $test/config.json)" |
8 | | - filename="$(jq -r '.filename' "$test/config.json")" |
9 | | - lang="$(jq -r '.natural_language' "$test/config.json")" |
10 | | - |
11 | | - printf 'Testing %s ... ' "${test##*/}" |
| 23 | +exact_match=0 |
| 24 | +description_changed=0 |
| 25 | +minor_change=0 |
| 26 | +major_change=0 |
| 27 | + |
| 28 | +# Find all config.json files in target directory |
| 29 | +while read config; do |
| 30 | + test_dir="$(dirname "$config")" |
| 31 | + |
| 32 | + # Extract config values |
| 33 | + filename="$(jq -r '.evaluation.filename // .filename' "$config")" |
| 34 | + allow_compilation_warnings="$(jq -r '.allow_compilation_warnings == true' "$config")" |
| 35 | + lang="$(jq -r '.natural_language // "en"' "$config")" |
| 36 | + |
| 37 | + # Determine submission source and result file |
| 38 | + submission_source="$test_dir/submission.java" |
| 39 | + config_source="$test_dir/solution/$filename" |
| 40 | + if [ -f "$submission_source" ]; then |
| 41 | + source="$submission_source" |
| 42 | + elif [ -f "$config_source" ]; then |
| 43 | + source="$config_source" |
| 44 | + else |
| 45 | + echo "Warning: No submission.java or $filename found in $test_dir. Skipping." >&2 |
| 46 | + continue |
| 47 | + fi |
| 48 | + |
| 49 | + result_file="$test_dir/result.json" |
| 50 | + |
| 51 | + echo -en "Testing ${test_dir} ...\t" |
| 52 | + |
12 | 53 | workdir="$(mktemp -d)" |
13 | 54 | cd "$workdir" |
14 | 55 |
|
15 | | - [ -d "$judge/$test/workdir" ] && find "$judge/$test/workdir" -mindepth 1 -maxdepth 1 -exec cp -r \{\} . \; |
16 | | - echo '{ "resources": "'"$judge/$test/evaluation/"'" |
| 56 | + # Copy workdir contents if they exist |
| 57 | + if [ -d "$judge/$test_dir/workdir" ]; then |
| 58 | + find "$judge/$test_dir/workdir" -mindepth 1 -maxdepth 1 -exec cp -r \{\} . \; |
| 59 | + fi |
| 60 | + |
| 61 | + # Run the judge |
| 62 | + output=$(echo '{ "resources": "'"$judge/$test_dir/evaluation/"'" |
17 | 63 | , "judge": "'"$judge"'" |
18 | 64 | , "natural_language": "'"$lang"'" |
19 | 65 | , "workdir": "'"$workdir"'" |
20 | 66 | , "allow_compilation_warnings": "'"$allow_compilation_warnings"'" |
21 | 67 | , "filename": "'"$filename"'" |
22 | 68 | , "time_limit": 30 |
23 | 69 | , "memory_limit": 1000000000 |
24 | | - , "source": "'"$judge/$test/submission.java"'" |
| 70 | + , "source": "'"$judge/$source"'" |
25 | 71 | }' \ |
26 | | - | "$judge"/run \ |
| 72 | + | (timeout -k 10s 60s "$judge/run" 2> /dev/null) \ |
27 | 73 | | jq --sort-keys 'if(.command == "append-message") |
28 | 74 | then .message.description |= gsub("\n at [^\n]+\\([^)]+\\)"; "") |
29 | 75 | else . |
30 | | - end' \ |
31 | | - | diff "$judge/$test/result.json" - |
| 76 | + end' |
| 77 | + ) |
32 | 78 |
|
33 | 79 | cd "$judge" |
34 | 80 | rm -r "$workdir" |
35 | | - echo passed |
36 | | -done |
| 81 | + |
| 82 | + # Count accepted and failed in output |
| 83 | + accepted_output="$(echo "$output" | grep '"accepted": true' | wc -l)" |
| 84 | + failed_output="$(echo "$output" | grep '"accepted": false' | wc -l)" |
| 85 | + echo -en "$accepted_output accepted, $failed_output failed\t" |
| 86 | + |
| 87 | + |
| 88 | + |
| 89 | + if [ -f "$result_file" ]; then |
| 90 | + # Count accepted and failed in result |
| 91 | + accepted_result="$(grep '"accepted": true' "$result_file" | wc -l)" |
| 92 | + failed_result="$(grep '"accepted": false' "$result_file" | wc -l)" |
| 93 | + |
| 94 | + |
| 95 | + # First check if files are exact using diff, then check if they differ except '.description', then check they differ in accepted or failed |
| 96 | + if diff "$result_file" <(echo "$output") > /dev/null; then |
| 97 | + echo "[EXACT MATCH]" |
| 98 | + exact_match=$((exact_match + 1)) |
| 99 | + elif diff <(jq "del(.description)" "$result_file") <(echo "$output" | jq "del(.description)") > /dev/null; then |
| 100 | + echo "[DESCRIPTION CHANGED]" |
| 101 | + description_changed=$((description_changed + 1)) |
| 102 | + elif [ "$accepted_output" -eq "$accepted_result" ] && [ "$failed_output" -eq "$failed_result" ]; then |
| 103 | + echo "[MINOR CHANGE] (accepted/failed unchanged)" |
| 104 | + minor_change=$((minor_change + 1)) |
| 105 | + else |
| 106 | + echo "[MAJOR CHANGE] (accepted/failed changed, was $accepted_result/$failed_result)" |
| 107 | + major_change=$((major_change + 1)) |
| 108 | + fi |
| 109 | + fi |
| 110 | + |
| 111 | + # Check or overwrite results |
| 112 | + if [ "$overwrite" -eq 1 ]; then |
| 113 | + echo "$output" > "$result_file" |
| 114 | + echo "[OVERWRITE]" |
| 115 | + fi |
| 116 | +done < <(find "$target_dir" -name "config.json") |
| 117 | + |
| 118 | +# Report results |
| 119 | +echo |
| 120 | +echo "=== RESULTS ===" |
| 121 | +echo "Exact matches: $exact_match" |
| 122 | +echo "Description changed: $description_changed" |
| 123 | +echo "Minor changes: $minor_change" |
| 124 | +echo "Major changes: $major_change" |
| 125 | + |
| 126 | +# Exit with error on minor or major changes |
| 127 | +if [ "$minor_change" -gt 0 ] || [ "$major_change" -gt 0 ]; then |
| 128 | + echo "FAIL. Run with --overwrite to update the results." |
| 129 | + exit 1 |
| 130 | +fi |
0 commit comments