Skip to content

Commit 67a2098

Browse files
Merge pull request #1122 from terrastackai/test/lsf_script_update
Improve integration test script
2 parents 33d178b + 1915f42 commit 67a2098

File tree

1 file changed

+232
-41
lines changed

1 file changed

+232
-41
lines changed

scripts/run_lsf_integrationtest.sh

100644100755
Lines changed: 232 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,44 +1,235 @@
11
#!/bin/bash
2-
# run_lsf_integrationtest.sh
3-
# Run each test (either file or individual test function) in separate LSF jobs with coverage + logs.
4-
#
5-
# Environment variables:
6-
# TEST_DIR - Base directory for tests (default: current directory or first argument)
7-
# TEST_FILE - Test file to run (default: integrationtests/test_base_set.py)
8-
# VENV_PATH - Path to Python virtual environment (default: .venv)
9-
10-
set -euo pipefail
11-
12-
TEST_DIR="${TEST_DIR:-${1:-$(pwd)}}"
13-
TEST_FILE="${1:-${TEST_FILE:-integrationtests/test_base_set.py}}"
14-
VENV_PATH="${VENV_PATH:-.venv}"
15-
LOG_DIR="$TEST_DIR/logs"
16-
COV_DIR="$TEST_DIR/.coverage_jobs"
17-
mkdir -p "$LOG_DIR" "$COV_DIR"
18-
19-
20-
all_tests=$(cd "$TEST_DIR" && \
21-
pytest --collect-only -q "$TEST_FILE" 2>/dev/null | \
22-
grep -E '^integrationtests/test_base_set\.py::' || true)
23-
24-
echo "$all_tests"
25-
26-
for test in $all_tests; do
27-
# Normalize name (pytest nodeid may include "::class::test_func")
28-
test_name=$(echo "$test" | tr '/:' '_')
29-
out="$LOG_DIR/${test_name}.out"
30-
err="$LOG_DIR/${test_name}.err"
31-
32-
33-
echo "Submitting job for $test"
34-
hash=$(echo -n "$test" | sha1sum | cut -c1-10)
35-
job_name="tt_${hash}"
36-
bsub -gpu num=1 -R "rusage[ngpus=1,cpu=4,mem=32GB]" \
37-
-J "terratorch_${job_name}" \
38-
-oo "$out" -eo "$err" \
39-
"cd $TEST_DIR && \
40-
source $VENV_PATH/bin/activate && \
41-
pytest -s -v $test"
2+
3+
# Parse arguments
4+
BRANCH_NAME=""
5+
VENV_BASE_DIR=""
6+
TARGET_DIR=""
7+
SKIP_CLEANUP=false
8+
9+
while [[ $# -gt 0 ]]; do
10+
case $1 in
11+
--no-cleanup)
12+
SKIP_CLEANUP=true
13+
shift
14+
;;
15+
*)
16+
if [ -z "$BRANCH_NAME" ]; then
17+
BRANCH_NAME=$1
18+
elif [[ "$1" == /* ]] || [[ "$1" == .* ]]; then
19+
VENV_BASE_DIR=$1
20+
TARGET_DIR="terratorch.$BRANCH_NAME"
21+
else
22+
TARGET_DIR=$1
23+
fi
24+
shift
25+
;;
26+
esac
27+
done
28+
29+
# Set default TARGET_DIR if not specified
30+
if [ -z "$TARGET_DIR" ]; then
31+
TARGET_DIR="terratorch.$BRANCH_NAME"
32+
fi
33+
34+
# Agnostic Python Discovery
35+
PYTHON_BIN=$(which /dccstor/terratorch/python3.12.3/bin/python3.12 2>/dev/null || which python3.10 2>/dev/null || which python3)
36+
37+
# Path relative to the repository root
38+
TEST_FILE_PATH="integrationtests/test_base_set.py"
39+
40+
# 1. Validation ---
41+
if [ -z "$BRANCH_NAME" ]; then
42+
echo "Usage: $0 <branch_name> [target_dir] [venv_base_path] [--no-cleanup]"
43+
echo " --no-cleanup: Skip running the cleanup test"
44+
exit 1
45+
fi
46+
47+
# 2. Path Setup ---
48+
BASE_PATH=$(pwd)
49+
mkdir -p "$TARGET_DIR"
50+
FULL_PATH=$(cd "$TARGET_DIR" && pwd)
51+
52+
if [ -n "$VENV_BASE_DIR" ]; then
53+
mkdir -p "$VENV_BASE_DIR"
54+
VENV_ROOT=$(cd "$VENV_BASE_DIR" && pwd)
55+
VENV_PATH="$VENV_ROOT/venv_$BRANCH_NAME"
56+
else
57+
VENV_PATH="$FULL_PATH/.venv"
58+
fi
59+
60+
# 3. Clone & Checkout ---
61+
if [ ! -d "$FULL_PATH/.git" ]; then
62+
echo "Cloning Terratorch into $FULL_PATH ---"
63+
git clone git@github.com:terrastackai/terratorch.git "$FULL_PATH"
64+
fi
65+
66+
cd "$FULL_PATH" || exit
67+
git fetch origin
68+
git checkout "$BRANCH_NAME" || git checkout -b "$BRANCH_NAME" "origin/$BRANCH_NAME"
69+
70+
# 4. Environment Setup ---
71+
if [ ! -d "$VENV_PATH" ]; then
72+
echo "Setting up Virtual Environment using $PYTHON_BIN..."
73+
$PYTHON_BIN -m venv "$VENV_PATH"
74+
source "$VENV_PATH/bin/activate"
75+
pip install --upgrade pip
76+
pip install -e ".[test]"
77+
else
78+
source "$VENV_PATH/bin/activate"
79+
fi
80+
81+
# 5. Extract Tests from root-level integrationtests folder ---
82+
if [ ! -f "$TEST_FILE_PATH" ]; then
83+
echo "Error: Test file not found at $FULL_PATH/$TEST_FILE_PATH"
84+
echo "Current directory content:"
85+
ls -F
86+
exit 1
87+
fi
88+
89+
# Pull test names (agnostic to whitespace/tabs)
90+
TEST_LIST=$(grep -E '^def test_' "$TEST_FILE_PATH" | sed 's/def //g' | cut -d'(' -f1 | tr -d ' ')
91+
92+
# 6. Submit Individual Jobs ---
93+
LOG_DIR="$FULL_PATH/lsf_logs_$(date +%Y%m%d_%H%M%S)"
94+
mkdir -p "$LOG_DIR"
95+
96+
echo "Found $(echo "$TEST_LIST" | wc -l) tests. Submitting individual jobs..."
97+
98+
# Separate test_models_fit from other tests
99+
MODELS_FIT_TEST=""
100+
OTHER_TESTS=""
101+
102+
for TEST_NAME in $TEST_LIST; do
103+
if [[ "$TEST_NAME" == "test_models_fit" ]]; then
104+
MODELS_FIT_TEST="$TEST_NAME"
105+
else
106+
OTHER_TESTS="$OTHER_TESTS $TEST_NAME"
107+
fi
42108
done
43109

44-
echo "All jobs submitted. Monitor with: bjobs -u \$USER"
110+
# Define tests that depend on test_models_fit (require its checkpoints)
111+
DEPENDENT_TESTS="test_latest_terratorch_version_buildings_predict test_latest_terratorch_version_floods_predict test_latest_terratorch_version_burnscars_predict"
112+
113+
# Categorize OTHER_TESTS as dependent, independent, or cleanup
114+
DEPENDENT_TEST_LIST=""
115+
INDEPENDENT_TEST_LIST=""
116+
CLEANUP_TEST=""
117+
118+
for TEST_NAME in $OTHER_TESTS; do
119+
if [[ "$TEST_NAME" == "test_cleanup" ]]; then
120+
CLEANUP_TEST="$TEST_NAME"
121+
elif echo "$DEPENDENT_TESTS" | grep -qw "$TEST_NAME"; then
122+
DEPENDENT_TEST_LIST="$DEPENDENT_TEST_LIST $TEST_NAME"
123+
else
124+
INDEPENDENT_TEST_LIST="$INDEPENDENT_TEST_LIST $TEST_NAME"
125+
fi
126+
done
127+
128+
# Track dependent job IDs for cleanup dependency
129+
DEPENDENT_JOB_IDS=""
130+
131+
# Submit test_models_fit first (if it exists)
132+
if [ -n "$MODELS_FIT_TEST" ]; then
133+
echo "Submitting test_models_fit (required prerequisite for dependent tests)..." >&2
134+
JOB_NAME="tt_${USER}_${MODELS_FIT_TEST}"
135+
136+
MODELS_FIT_JOB_ID=$(bsub -gpu "num=1" -R "rusage[ngpus=1, cpu=8, mem=128GB]" \
137+
-J "$JOB_NAME" \
138+
-o "$LOG_DIR/${MODELS_FIT_TEST}.log" \
139+
-e "$LOG_DIR/${MODELS_FIT_TEST}.err" \
140+
"/bin/bash -c 'set -e; source $VENV_PATH/bin/activate && cd $FULL_PATH && pytest $TEST_FILE_PATH -k $MODELS_FIT_TEST; exit \$?'" | grep -oP 'Job <\K[0-9]+')
141+
142+
echo "test_models_fit submitted with Job ID: $MODELS_FIT_JOB_ID" >&2
143+
144+
# Submit dependent tests with dependency on test_models_fit SUCCESS
145+
if [ -n "$DEPENDENT_TEST_LIST" ]; then
146+
echo "Submitting $(echo $DEPENDENT_TEST_LIST | wc -w) dependent test(s) (will wait for test_models_fit):$DEPENDENT_TEST_LIST" >&2
147+
for TEST_NAME in $DEPENDENT_TEST_LIST; do
148+
JOB_NAME="tt_${USER}_${TEST_NAME}"
149+
150+
JOB_ID=$(bsub -gpu "num=1" -R "rusage[ngpus=1, cpu=8, mem=128GB]" \
151+
-w "done($MODELS_FIT_JOB_ID)" \
152+
-J "$JOB_NAME" \
153+
-o "$LOG_DIR/${TEST_NAME}.log" \
154+
-e "$LOG_DIR/${TEST_NAME}.err" \
155+
"/bin/bash -c 'set -e; source $VENV_PATH/bin/activate && cd $FULL_PATH && pytest $TEST_FILE_PATH -k $TEST_NAME; exit \$?'" | grep -oP 'Job <\K[0-9]+')
156+
DEPENDENT_JOB_IDS="$DEPENDENT_JOB_IDS $JOB_ID"
157+
done
158+
fi
159+
160+
# Submit cleanup test last - waits for all dependent tests to complete (success or failure)
161+
if [ -n "$CLEANUP_TEST" ] && [ -n "$DEPENDENT_JOB_IDS" ] && [ "$SKIP_CLEANUP" = false ]; then
162+
echo "Submitting cleanup test (will wait for all dependent tests to complete)..." >&2
163+
JOB_NAME="tt_${USER}_${CLEANUP_TEST}"
164+
165+
# Build dependency condition: wait for all dependent jobs to end (regardless of exit status)
166+
CLEANUP_DEPENDENCY=""
167+
for JID in $DEPENDENT_JOB_IDS; do
168+
if [ -z "$CLEANUP_DEPENDENCY" ]; then
169+
CLEANUP_DEPENDENCY="ended($JID)"
170+
else
171+
CLEANUP_DEPENDENCY="$CLEANUP_DEPENDENCY && ended($JID)"
172+
fi
173+
done
174+
175+
bsub -gpu "num=1" -R "rusage[ngpus=1, cpu=8, mem=128GB]" \
176+
-w "$CLEANUP_DEPENDENCY" \
177+
-J "$JOB_NAME" \
178+
-o "$LOG_DIR/${CLEANUP_TEST}.log" \
179+
-e "$LOG_DIR/${CLEANUP_TEST}.err" \
180+
"/bin/bash -c 'set -e; source $VENV_PATH/bin/activate && cd $FULL_PATH && pytest $TEST_FILE_PATH -k $CLEANUP_TEST; exit \$?'"
181+
182+
echo "Cleanup test will run after all dependent tests complete" >&2
183+
elif [ "$SKIP_CLEANUP" = true ]; then
184+
echo "Skipping cleanup test (--no-cleanup flag set)" >&2
185+
fi
186+
187+
# Submit independent tests immediately (no dependency)
188+
if [ -n "$INDEPENDENT_TEST_LIST" ]; then
189+
echo "Submitting $(echo $INDEPENDENT_TEST_LIST | wc -w) independent test(s) (run immediately):$INDEPENDENT_TEST_LIST" >&2
190+
for TEST_NAME in $INDEPENDENT_TEST_LIST; do
191+
JOB_NAME="tt_${USER}_${TEST_NAME}"
192+
193+
bsub -gpu "num=1" -R "rusage[ngpus=1, cpu=8, mem=128GB]" \
194+
-J "$JOB_NAME" \
195+
-o "$LOG_DIR/${TEST_NAME}.log" \
196+
-e "$LOG_DIR/${TEST_NAME}.err" \
197+
"/bin/bash -c 'set -e; source $VENV_PATH/bin/activate && cd $FULL_PATH && pytest $TEST_FILE_PATH -k $TEST_NAME; exit \$?'"
198+
done
199+
fi
200+
else
201+
echo "Error: test_models_fit not found in test suite." >&2
202+
echo "Error: test_models_fit is a required prerequisite that creates checkpoints for dependent tests." >&2
203+
204+
# Report what will be skipped
205+
if [ -n "$DEPENDENT_TEST_LIST" ]; then
206+
echo "Warning: Skipping $(echo $DEPENDENT_TEST_LIST | wc -w) dependent test(s) (require test_models_fit):$DEPENDENT_TEST_LIST" >&2
207+
fi
208+
209+
# Submit only independent tests
210+
if [ -n "$INDEPENDENT_TEST_LIST" ]; then
211+
echo "Info: Submitting $(echo $INDEPENDENT_TEST_LIST | wc -w) independent test(s):$INDEPENDENT_TEST_LIST" >&2
212+
for TEST_NAME in $INDEPENDENT_TEST_LIST; do
213+
JOB_NAME="tt_${USER}_${TEST_NAME}"
214+
215+
bsub -gpu "num=1" -R "rusage[ngpus=1, cpu=8, mem=128GB]" \
216+
-J "$JOB_NAME" \
217+
-o "$LOG_DIR/${TEST_NAME}.log" \
218+
-e "$LOG_DIR/${TEST_NAME}.err" \
219+
"/bin/bash -c 'set -e; source $VENV_PATH/bin/activate && cd $FULL_PATH && pytest $TEST_FILE_PATH -k $TEST_NAME; exit \$?'"
220+
done
221+
else
222+
echo "Error: No independent tests found. Cannot proceed without test_models_fit." >&2
223+
exit 1
224+
fi
225+
fi
226+
227+
echo "---"
228+
echo "All tests submitted. Check logs in: $LOG_DIR"
229+
echo "Monitor with: bjobs -J 'tt_${USER}_*'"
230+
echo "Note: Dependent tests will only run if test_models_fit passes (exit code 0)"
231+
if [ "$SKIP_CLEANUP" = true ]; then
232+
echo "Note: Cleanup test skipped (--no-cleanup flag set)"
233+
elif [ -n "$CLEANUP_TEST" ]; then
234+
echo "Note: test_cleanup will run last, after all dependent tests complete (regardless of success/failure)"
235+
fi

0 commit comments

Comments
 (0)