-
Notifications
You must be signed in to change notification settings - Fork 126
207 lines (185 loc) · 7.69 KB
/
unit-tests-recipes.yml
File metadata and controls
207 lines (185 loc) · 7.69 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
name: "BioNeMo Recipes CI"
on:
push:
branches:
- "pull-request/[0-9]+"
- "dependabot/**"
merge_group:
types: [checks_requested]
schedule:
- cron: "0 9 * * *" # Runs at 9 AM UTC daily (2 AM MST)
defaults:
run:
shell: bash -x -e -u -o pipefail {0}
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
changed-dirs:
runs-on: ubuntu-latest
outputs:
any_changed: ${{ steps.changed-files.outputs.any_changed }}
all_changed_files: ${{ steps.changed-files.outputs.all_changed_files }}
dirs: ${{ steps.set-dirs.outputs.dirs }}
steps:
- id: get-pr-info
if: ${{ startsWith(github.ref_name, 'pull-request/') }}
uses: nv-gha-runners/get-pr-info@main
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Get merge-base commit
id: merge-base
run: |
# Get the merge-base between current branch and main
MERGE_BASE=$(git merge-base HEAD origin/main)
echo "merge-base=$MERGE_BASE" >> $GITHUB_OUTPUT
echo "Merge-base commit: $MERGE_BASE"
- name: Get changed files
id: changed-files
uses: step-security/changed-files@v46
with:
json: true
matrix: true
base_sha: ${{ steps.merge-base.outputs.merge-base }}
dir_names: true
dir_names_max_depth: 3
files: |
bionemo-recipes/models/**
bionemo-recipes/recipes/**
- id: set-dirs
name: Determine which directories to run
env:
EVENT_NAME: ${{ github.event_name }}
PR_INFO: ${{ steps.get-pr-info.outputs.pr-info }}
CHANGED_FILES: ${{ steps.changed-files.outputs.all_changed_files }}
run: |
# Get all recipe and model directories
ALL_DIRS=$(ls -d bionemo-recipes/models/*/ bionemo-recipes/recipes/*/ 2>/dev/null | jq -R -s -c 'split("\n")[:-1] | map(rtrimstr("/"))')
# Helper to check for a PR label
has_label() {
[[ "$PR_INFO" != "null" && "$PR_INFO" != "" ]] && \
echo "$PR_INFO" | jq -e ".labels[]? | select(.name == \"$1\")" > /dev/null 2>&1
}
# Determine which directories to run
if [[ "$EVENT_NAME" == "schedule" ]]; then
echo "Scheduled run - running all directories"
DIRS="$ALL_DIRS"
elif has_label "ciflow:skip"; then
echo "Found 'ciflow:skip' label - skipping all recipe tests"
DIRS="[]"
elif has_label "ciflow:all-recipes"; then
echo "Found 'ciflow:all-recipes' label - running all directories"
DIRS="$ALL_DIRS"
else
# Filter directories to only those that have changed files
DIRS=$(echo "$ALL_DIRS" | jq -c --argjson changed "$CHANGED_FILES" '
map(select(. as $dir | $changed | index($dir) != null))
')
fi
# Assign Docker images to the selected directories
# Currently, AMPLIFY is the only folder that needs a custom base image, since we have to support both TE and
# xformers-based models for golden value testing. The rest of the models use the default pytorch image.
# This uses a squashed version of the pytorch:26.02-py3 image, generated with `docker-squash
# nvcr.io/nvidia/pytorch:26.02-py3 -t svcbionemo023/bionemo-framework:pytorch26.02-py3-squashed --output
# type=registry,compression=zstd,force-compression=true,oci-mediatypes=true,compression-level=15` and pushed
# to the dockerhub registry. Our github actions are able to cache image pulls from dockerhub but not nvcr, so
# hopefully this cuts down slightly on CI time at the expense of having a slightly in-directed image location.
DIRS_WITH_IMAGES=$(echo "$DIRS" | jq -c '
map({
dir: .,
name: (. | sub("^bionemo-recipes/"; "")),
image: (
if . == "bionemo-recipes/models/amplify" then
"svcbionemo023/bionemo-framework:amplify-model-devcontainer-082025"
else
# "nvcr.io/nvidia/pytorch:26.02-py3"
"svcbionemo023/bionemo-framework:pytorch26.02-py3-squashed"
end
)
})
')
echo "dirs=$DIRS_WITH_IMAGES" >> $GITHUB_OUTPUT
- name: Show output
run: |
echo "=== Changed Files Analysis ==="
echo "Current branch: ${{ github.ref_name }}"
echo "Merge-base commit: ${{ steps.merge-base.outputs.merge-base }}"
echo "Changed files compared to merge-base:"
echo '${{ steps.changed-files.outputs.all_changed_files }}' | jq -r '.[]' | sed 's/^/ - /'
echo "Total changed files: $(echo '${{ steps.changed-files.outputs.all_changed_files }}' | jq '. | length')"
echo '${{ toJSON(steps.changed-files.outputs) }}'
echo '${{ toJSON(steps.set-dirs.outputs) }}'
shell: bash
unit-tests:
needs: changed-dirs
runs-on: linux-amd64-gpu-l4-latest-1
if: ${{ needs.changed-dirs.outputs.dirs != '[]' }}
name: "unit-tests (${{ matrix.recipe.name }})"
container:
image: ${{ matrix.recipe.image }}
options: --shm-size=16G
env:
CI: true
HF_TOKEN: ${{ secrets.HF_TOKEN }}
HF_HOME: /cache/huggingface
strategy:
matrix:
recipe: ${{ fromJson(needs.changed-dirs.outputs.dirs) }}
fail-fast: false
steps:
- name: Show GPU info
run: nvidia-smi
- name: Setup proxy cache
uses: nv-gha-runners/setup-proxy-cache@main
- name: Checkout repository
uses: actions/checkout@v4
with:
sparse-checkout: "${{ matrix.recipe.dir }}"
sparse-checkout-cone-mode: false
- name: Cache Hugging Face models
uses: actions/cache@v4
with:
path: /cache/huggingface
key: ${{ runner.os }}-huggingface-${{ matrix.recipe.name }}-${{ github.sha }}
restore-keys: |
${{ runner.os }}-huggingface-${{ matrix.recipe.name }}-
${{ runner.os }}-huggingface-
- name: Install dependencies
working-directory: ${{ matrix.recipe.dir }}
run: |
if [ -f .ci_build.sh ]; then
bash .ci_build.sh
elif [ -f pyproject.toml ] || [ -f setup.py ]; then
PIP_CONSTRAINT= pip install -e .
echo "Installed ${{ matrix.recipe.dir }} as editable package"
elif [ -f requirements.txt ]; then
PIP_CONSTRAINT= pip install -r requirements.txt
echo "Installed ${{ matrix.recipe.dir }} from requirements.txt"
else
echo "No pyproject.toml, setup.py, or requirements.txt found in ${{ matrix.recipe.dir }}"
exit 1
fi
- name: Run tests
working-directory: ${{ matrix.recipe.dir }}
run: |
if [ -f .ci_test_env.sh ]; then
source .ci_test_env.sh
fi
pytest -v .
verify-recipe-tests:
# This job checks the status of the unit-tests matrix and fails if any matrix job failed or was cancelled.
# Use this job as the required check for PRs.
needs: unit-tests
runs-on: ubuntu-latest
if: always()
steps:
- name: Check unit-tests matrix status
run: |
if [[ "${{ needs.unit-tests.result }}" == "failure" || "${{ needs.unit-tests.result }}" == "cancelled" ]]; then
echo "Some unit-tests matrix jobs have failed or been cancelled!"
exit 1
else
echo "All unit-tests matrix jobs have completed successfully or were skipped!"
exit 0
fi