|
1 | 1 | #!/bin/bash |
2 | | -# run_lsf_integrationtest.sh |
3 | | -# Run each test (either file or individual test function) in separate LSF jobs with coverage + logs. |
4 | | -# |
5 | | -# Environment variables: |
6 | | -# TEST_DIR - Base directory for tests (default: current directory or first argument) |
7 | | -# TEST_FILE - Test file to run (default: integrationtests/test_base_set.py) |
8 | | -# VENV_PATH - Path to Python virtual environment (default: .venv) |
9 | | - |
10 | | -set -euo pipefail |
11 | | - |
12 | | -TEST_DIR="${TEST_DIR:-${1:-$(pwd)}}" |
13 | | -TEST_FILE="${1:-${TEST_FILE:-integrationtests/test_base_set.py}}" |
14 | | -VENV_PATH="${VENV_PATH:-.venv}" |
15 | | -LOG_DIR="$TEST_DIR/logs" |
16 | | -COV_DIR="$TEST_DIR/.coverage_jobs" |
17 | | -mkdir -p "$LOG_DIR" "$COV_DIR" |
18 | | - |
19 | | - |
20 | | -all_tests=$(cd "$TEST_DIR" && \ |
21 | | - pytest --collect-only -q "$TEST_FILE" 2>/dev/null | \ |
22 | | - grep -E '^integrationtests/test_base_set\.py::' || true) |
23 | | - |
24 | | -echo "$all_tests" |
25 | | - |
26 | | -for test in $all_tests; do |
27 | | - # Normalize name (pytest nodeid may include "::class::test_func") |
28 | | - test_name=$(echo "$test" | tr '/:' '_') |
29 | | - out="$LOG_DIR/${test_name}.out" |
30 | | - err="$LOG_DIR/${test_name}.err" |
31 | | - |
32 | | - |
33 | | - echo "Submitting job for $test" |
34 | | - hash=$(echo -n "$test" | sha1sum | cut -c1-10) |
35 | | - job_name="tt_${hash}" |
36 | | - bsub -gpu num=1 -R "rusage[ngpus=1,cpu=4,mem=32GB]" \ |
37 | | - -J "terratorch_${job_name}" \ |
38 | | - -oo "$out" -eo "$err" \ |
39 | | - "cd $TEST_DIR && \ |
40 | | - source $VENV_PATH/bin/activate && \ |
41 | | - pytest -s -v $test" |
| 2 | + |
| 3 | +# Parse arguments |
| 4 | +BRANCH_NAME="" |
| 5 | +VENV_BASE_DIR="" |
| 6 | +TARGET_DIR="" |
| 7 | +SKIP_CLEANUP=false |
| 8 | + |
| 9 | +while [[ $# -gt 0 ]]; do |
| 10 | + case $1 in |
| 11 | + --no-cleanup) |
| 12 | + SKIP_CLEANUP=true |
| 13 | + shift |
| 14 | + ;; |
| 15 | + *) |
| 16 | + if [ -z "$BRANCH_NAME" ]; then |
| 17 | + BRANCH_NAME=$1 |
| 18 | + elif [[ "$1" == /* ]] || [[ "$1" == .* ]]; then |
| 19 | + VENV_BASE_DIR=$1 |
| 20 | + TARGET_DIR="terratorch.$BRANCH_NAME" |
| 21 | + else |
| 22 | + TARGET_DIR=$1 |
| 23 | + fi |
| 24 | + shift |
| 25 | + ;; |
| 26 | + esac |
| 27 | +done |
| 28 | + |
| 29 | +# Set default TARGET_DIR if not specified |
| 30 | +if [ -z "$TARGET_DIR" ]; then |
| 31 | + TARGET_DIR="terratorch.$BRANCH_NAME" |
| 32 | +fi |
| 33 | + |
| 34 | +# Agnostic Python Discovery |
| 35 | +PYTHON_BIN=$(which /dccstor/terratorch/python3.12.3/bin/python3.12 2>/dev/null || which python3.10 2>/dev/null || which python3) |
| 36 | + |
| 37 | +# Path relative to the repository root |
| 38 | +TEST_FILE_PATH="integrationtests/test_base_set.py" |
| 39 | + |
| 40 | +# 1. Validation --- |
| 41 | +if [ -z "$BRANCH_NAME" ]; then |
| 42 | + echo "Usage: $0 <branch_name> [target_dir] [venv_base_path] [--no-cleanup]" |
| 43 | + echo " --no-cleanup: Skip running the cleanup test" |
| 44 | + exit 1 |
| 45 | +fi |
| 46 | + |
| 47 | +# 2. Path Setup --- |
| 48 | +BASE_PATH=$(pwd) |
| 49 | +mkdir -p "$TARGET_DIR" |
| 50 | +FULL_PATH=$(cd "$TARGET_DIR" && pwd) |
| 51 | + |
| 52 | +if [ -n "$VENV_BASE_DIR" ]; then |
| 53 | + mkdir -p "$VENV_BASE_DIR" |
| 54 | + VENV_ROOT=$(cd "$VENV_BASE_DIR" && pwd) |
| 55 | + VENV_PATH="$VENV_ROOT/venv_$BRANCH_NAME" |
| 56 | +else |
| 57 | + VENV_PATH="$FULL_PATH/.venv" |
| 58 | +fi |
| 59 | + |
| 60 | +# 3. Clone & Checkout --- |
| 61 | +if [ ! -d "$FULL_PATH/.git" ]; then |
| 62 | + echo "Cloning Terratorch into $FULL_PATH ---" |
| 63 | + git clone git@github.com:terrastackai/terratorch.git "$FULL_PATH" |
| 64 | +fi |
| 65 | + |
| 66 | +cd "$FULL_PATH" || exit |
| 67 | +git fetch origin |
| 68 | +git checkout "$BRANCH_NAME" || git checkout -b "$BRANCH_NAME" "origin/$BRANCH_NAME" |
| 69 | + |
| 70 | +# 4. Environment Setup --- |
| 71 | +if [ ! -d "$VENV_PATH" ]; then |
| 72 | + echo "Setting up Virtual Environment using $PYTHON_BIN..." |
| 73 | + $PYTHON_BIN -m venv "$VENV_PATH" |
| 74 | + source "$VENV_PATH/bin/activate" |
| 75 | + pip install --upgrade pip |
| 76 | + pip install -e ".[test]" |
| 77 | +else |
| 78 | + source "$VENV_PATH/bin/activate" |
| 79 | +fi |
| 80 | + |
| 81 | +# 5. Extract Tests from root-level integrationtests folder --- |
| 82 | +if [ ! -f "$TEST_FILE_PATH" ]; then |
| 83 | + echo "Error: Test file not found at $FULL_PATH/$TEST_FILE_PATH" |
| 84 | + echo "Current directory content:" |
| 85 | + ls -F |
| 86 | + exit 1 |
| 87 | +fi |
| 88 | + |
| 89 | +# Pull test names (agnostic to whitespace/tabs) |
| 90 | +TEST_LIST=$(grep -E '^def test_' "$TEST_FILE_PATH" | sed 's/def //g' | cut -d'(' -f1 | tr -d ' ') |
| 91 | + |
| 92 | +# 6. Submit Individual Jobs --- |
| 93 | +LOG_DIR="$FULL_PATH/lsf_logs_$(date +%Y%m%d_%H%M%S)" |
| 94 | +mkdir -p "$LOG_DIR" |
| 95 | + |
| 96 | +echo "Found $(echo "$TEST_LIST" | wc -l) tests. Submitting individual jobs..." |
| 97 | + |
| 98 | +# Separate test_models_fit from other tests |
| 99 | +MODELS_FIT_TEST="" |
| 100 | +OTHER_TESTS="" |
| 101 | + |
| 102 | +for TEST_NAME in $TEST_LIST; do |
| 103 | + if [[ "$TEST_NAME" == "test_models_fit" ]]; then |
| 104 | + MODELS_FIT_TEST="$TEST_NAME" |
| 105 | + else |
| 106 | + OTHER_TESTS="$OTHER_TESTS $TEST_NAME" |
| 107 | + fi |
42 | 108 | done |
43 | 109 |
|
44 | | -echo "All jobs submitted. Monitor with: bjobs -u \$USER" |
| 110 | +# Define tests that depend on test_models_fit (require its checkpoints) |
| 111 | +DEPENDENT_TESTS="test_latest_terratorch_version_buildings_predict test_latest_terratorch_version_floods_predict test_latest_terratorch_version_burnscars_predict" |
| 112 | + |
| 113 | +# Categorize OTHER_TESTS as dependent, independent, or cleanup |
| 114 | +DEPENDENT_TEST_LIST="" |
| 115 | +INDEPENDENT_TEST_LIST="" |
| 116 | +CLEANUP_TEST="" |
| 117 | + |
| 118 | +for TEST_NAME in $OTHER_TESTS; do |
| 119 | + if [[ "$TEST_NAME" == "test_cleanup" ]]; then |
| 120 | + CLEANUP_TEST="$TEST_NAME" |
| 121 | + elif echo "$DEPENDENT_TESTS" | grep -qw "$TEST_NAME"; then |
| 122 | + DEPENDENT_TEST_LIST="$DEPENDENT_TEST_LIST $TEST_NAME" |
| 123 | + else |
| 124 | + INDEPENDENT_TEST_LIST="$INDEPENDENT_TEST_LIST $TEST_NAME" |
| 125 | + fi |
| 126 | +done |
| 127 | + |
| 128 | +# Track dependent job IDs for cleanup dependency |
| 129 | +DEPENDENT_JOB_IDS="" |
| 130 | + |
| 131 | +# Submit test_models_fit first (if it exists) |
| 132 | +if [ -n "$MODELS_FIT_TEST" ]; then |
| 133 | + echo "Submitting test_models_fit (required prerequisite for dependent tests)..." >&2 |
| 134 | + JOB_NAME="tt_${USER}_${MODELS_FIT_TEST}" |
| 135 | + |
| 136 | + MODELS_FIT_JOB_ID=$(bsub -gpu "num=1" -R "rusage[ngpus=1, cpu=8, mem=128GB]" \ |
| 137 | + -J "$JOB_NAME" \ |
| 138 | + -o "$LOG_DIR/${MODELS_FIT_TEST}.log" \ |
| 139 | + -e "$LOG_DIR/${MODELS_FIT_TEST}.err" \ |
| 140 | + "/bin/bash -c 'set -e; source $VENV_PATH/bin/activate && cd $FULL_PATH && pytest $TEST_FILE_PATH -k $MODELS_FIT_TEST; exit \$?'" | grep -oP 'Job <\K[0-9]+') |
| 141 | + |
| 142 | + echo "test_models_fit submitted with Job ID: $MODELS_FIT_JOB_ID" >&2 |
| 143 | + |
| 144 | + # Submit dependent tests with dependency on test_models_fit SUCCESS |
| 145 | + if [ -n "$DEPENDENT_TEST_LIST" ]; then |
| 146 | + echo "Submitting $(echo $DEPENDENT_TEST_LIST | wc -w) dependent test(s) (will wait for test_models_fit):$DEPENDENT_TEST_LIST" >&2 |
| 147 | + for TEST_NAME in $DEPENDENT_TEST_LIST; do |
| 148 | + JOB_NAME="tt_${USER}_${TEST_NAME}" |
| 149 | + |
| 150 | + JOB_ID=$(bsub -gpu "num=1" -R "rusage[ngpus=1, cpu=8, mem=128GB]" \ |
| 151 | + -w "done($MODELS_FIT_JOB_ID)" \ |
| 152 | + -J "$JOB_NAME" \ |
| 153 | + -o "$LOG_DIR/${TEST_NAME}.log" \ |
| 154 | + -e "$LOG_DIR/${TEST_NAME}.err" \ |
| 155 | + "/bin/bash -c 'set -e; source $VENV_PATH/bin/activate && cd $FULL_PATH && pytest $TEST_FILE_PATH -k $TEST_NAME; exit \$?'" | grep -oP 'Job <\K[0-9]+') |
| 156 | + DEPENDENT_JOB_IDS="$DEPENDENT_JOB_IDS $JOB_ID" |
| 157 | + done |
| 158 | + fi |
| 159 | + |
| 160 | + # Submit cleanup test last - waits for all dependent tests to complete (success or failure) |
| 161 | + if [ -n "$CLEANUP_TEST" ] && [ -n "$DEPENDENT_JOB_IDS" ] && [ "$SKIP_CLEANUP" = false ]; then |
| 162 | + echo "Submitting cleanup test (will wait for all dependent tests to complete)..." >&2 |
| 163 | + JOB_NAME="tt_${USER}_${CLEANUP_TEST}" |
| 164 | + |
| 165 | + # Build dependency condition: wait for all dependent jobs to end (regardless of exit status) |
| 166 | + CLEANUP_DEPENDENCY="" |
| 167 | + for JID in $DEPENDENT_JOB_IDS; do |
| 168 | + if [ -z "$CLEANUP_DEPENDENCY" ]; then |
| 169 | + CLEANUP_DEPENDENCY="ended($JID)" |
| 170 | + else |
| 171 | + CLEANUP_DEPENDENCY="$CLEANUP_DEPENDENCY && ended($JID)" |
| 172 | + fi |
| 173 | + done |
| 174 | + |
| 175 | + bsub -gpu "num=1" -R "rusage[ngpus=1, cpu=8, mem=128GB]" \ |
| 176 | + -w "$CLEANUP_DEPENDENCY" \ |
| 177 | + -J "$JOB_NAME" \ |
| 178 | + -o "$LOG_DIR/${CLEANUP_TEST}.log" \ |
| 179 | + -e "$LOG_DIR/${CLEANUP_TEST}.err" \ |
| 180 | + "/bin/bash -c 'set -e; source $VENV_PATH/bin/activate && cd $FULL_PATH && pytest $TEST_FILE_PATH -k $CLEANUP_TEST; exit \$?'" |
| 181 | + |
| 182 | + echo "Cleanup test will run after all dependent tests complete" >&2 |
| 183 | + elif [ "$SKIP_CLEANUP" = true ]; then |
| 184 | + echo "Skipping cleanup test (--no-cleanup flag set)" >&2 |
| 185 | + fi |
| 186 | + |
| 187 | + # Submit independent tests immediately (no dependency) |
| 188 | + if [ -n "$INDEPENDENT_TEST_LIST" ]; then |
| 189 | + echo "Submitting $(echo $INDEPENDENT_TEST_LIST | wc -w) independent test(s) (run immediately):$INDEPENDENT_TEST_LIST" >&2 |
| 190 | + for TEST_NAME in $INDEPENDENT_TEST_LIST; do |
| 191 | + JOB_NAME="tt_${USER}_${TEST_NAME}" |
| 192 | + |
| 193 | + bsub -gpu "num=1" -R "rusage[ngpus=1, cpu=8, mem=128GB]" \ |
| 194 | + -J "$JOB_NAME" \ |
| 195 | + -o "$LOG_DIR/${TEST_NAME}.log" \ |
| 196 | + -e "$LOG_DIR/${TEST_NAME}.err" \ |
| 197 | + "/bin/bash -c 'set -e; source $VENV_PATH/bin/activate && cd $FULL_PATH && pytest $TEST_FILE_PATH -k $TEST_NAME; exit \$?'" |
| 198 | + done |
| 199 | + fi |
| 200 | +else |
| 201 | + echo "Error: test_models_fit not found in test suite." >&2 |
| 202 | + echo "Error: test_models_fit is a required prerequisite that creates checkpoints for dependent tests." >&2 |
| 203 | + |
| 204 | + # Report what will be skipped |
| 205 | + if [ -n "$DEPENDENT_TEST_LIST" ]; then |
| 206 | + echo "Warning: Skipping $(echo $DEPENDENT_TEST_LIST | wc -w) dependent test(s) (require test_models_fit):$DEPENDENT_TEST_LIST" >&2 |
| 207 | + fi |
| 208 | + |
| 209 | + # Submit only independent tests |
| 210 | + if [ -n "$INDEPENDENT_TEST_LIST" ]; then |
| 211 | + echo "Info: Submitting $(echo $INDEPENDENT_TEST_LIST | wc -w) independent test(s):$INDEPENDENT_TEST_LIST" >&2 |
| 212 | + for TEST_NAME in $INDEPENDENT_TEST_LIST; do |
| 213 | + JOB_NAME="tt_${USER}_${TEST_NAME}" |
| 214 | + |
| 215 | + bsub -gpu "num=1" -R "rusage[ngpus=1, cpu=8, mem=128GB]" \ |
| 216 | + -J "$JOB_NAME" \ |
| 217 | + -o "$LOG_DIR/${TEST_NAME}.log" \ |
| 218 | + -e "$LOG_DIR/${TEST_NAME}.err" \ |
| 219 | + "/bin/bash -c 'set -e; source $VENV_PATH/bin/activate && cd $FULL_PATH && pytest $TEST_FILE_PATH -k $TEST_NAME; exit \$?'" |
| 220 | + done |
| 221 | + else |
| 222 | + echo "Error: No independent tests found. Cannot proceed without test_models_fit." >&2 |
| 223 | + exit 1 |
| 224 | + fi |
| 225 | +fi |
| 226 | + |
| 227 | +echo "---" |
| 228 | +echo "All tests submitted. Check logs in: $LOG_DIR" |
| 229 | +echo "Monitor with: bjobs -J 'tt_${USER}_*'" |
| 230 | +echo "Note: Dependent tests will only run if test_models_fit passes (exit code 0)" |
| 231 | +if [ "$SKIP_CLEANUP" = true ]; then |
| 232 | + echo "Note: Cleanup test skipped (--no-cleanup flag set)" |
| 233 | +elif [ -n "$CLEANUP_TEST" ]; then |
| 234 | + echo "Note: test_cleanup will run last, after all dependent tests complete (regardless of success/failure)" |
| 235 | +fi |
0 commit comments