Skip to content

Commit ea2103b

Browse files
committed
update workflow
1 parent 7c30e6f commit ea2103b

File tree

1 file changed

+59
-7
lines changed

1 file changed

+59
-7
lines changed

.github/workflows/enroot-tests.yml

Lines changed: 59 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ on:
4747
type: boolean
4848
default: false
4949
testbed_file:
50-
description: 'Path to testbed file (e.g. tests/enroot/testbeds/mi325.yaml) - defaults to secrets.TESTBED_FILE'
50+
description: 'Path to testbed file (overrides secret-based testbed). If not provided, uses SINGLE_NODE_TESTBED_FILE or MULTI_NODE_TESTBED_FILE secrets (which should contain YAML content).'
5151
required: false
5252
type: string
5353
default: ''
@@ -106,6 +106,44 @@ jobs:
106106
python3 -m pip install --upgrade pip
107107
pip install -r tests/enroot/requirements.txt
108108
109+
- name: Create testbed file from secret
110+
if: |
111+
${{
112+
github.event_name == 'push' ||
113+
(github.event_name == 'workflow_dispatch' && (
114+
(matrix.test_name == 'test_single_node_pytorch' && inputs.run_single_node_test == true) ||
115+
(matrix.test_name == 'test_multi_node_distributed_pytorch' && inputs.run_multi_node_test == true) ||
116+
(matrix.test_name == 'test_multi_node_rccl' && inputs.run_rccl_test == true)
117+
))
118+
}}
119+
working-directory: tests/enroot
120+
env:
121+
SINGLE_NODE_TESTBED: ${{ secrets.SINGLE_NODE_TESTBED_FILE }}
122+
MULTI_NODE_TESTBED: ${{ secrets.MULTI_NODE_TESTBED_FILE }}
123+
run: |
124+
# Create testbed files from secrets (secrets contain YAML content)
125+
mkdir -p testbed
126+
127+
# Write single-node testbed if secret exists
128+
if [ -n "$SINGLE_NODE_TESTBED" ]; then
129+
printf '%s\n' "$SINGLE_NODE_TESTBED" > testbed/single_node_tb.yml
130+
echo "Created testbed/single_node_tb.yml from secret"
131+
else
132+
echo "[WARNING] SINGLE_NODE_TESTBED_FILE secret is not set"
133+
fi
134+
135+
# Write multi-node testbed if secret exists
136+
if [ -n "$MULTI_NODE_TESTBED" ]; then
137+
printf '%s\n' "$MULTI_NODE_TESTBED" > testbed/multi_node_tb.yml
138+
echo "Created testbed/multi_node_tb.yml from secret"
139+
else
140+
echo "[WARNING] MULTI_NODE_TESTBED_FILE secret is not set"
141+
fi
142+
143+
# List created testbed files for debugging
144+
echo "Testbed files created:"
145+
ls -la testbed/ || echo "No testbed directory"
146+
109147
- name: Run enroot tests
110148
if: |
111149
${{
@@ -123,11 +161,11 @@ jobs:
123161
124162
# Determine testbed file and docker image based on test type and event
125163
if [ "${{ github.event_name }}" = "push" ]; then
126-
# For push events: use test-type-specific secrets and default images from batch scripts
164+
# For push events: use test-type-specific testbed files and default images from batch scripts
127165
if [ "$TEST_NAME" = "test_single_node_pytorch" ]; then
128-
TESTBED_FILE="${{ secrets.SINGLE_NODE_TESTBED_FILE }}"
166+
TESTBED_FILE="testbed/single_node_tb.yml"
129167
else
130-
TESTBED_FILE="${{ secrets.MULTI_NODE_TESTBED_FILE }}"
168+
TESTBED_FILE="testbed/multi_node_tb.yml"
131169
fi
132170
DOCKER_IMAGE=""
133171
NO_INSTALL="false"
@@ -138,9 +176,9 @@ jobs:
138176
TESTBED_FILE="${{ inputs.testbed_file }}"
139177
else
140178
if [ "$TEST_NAME" = "test_single_node_pytorch" ]; then
141-
TESTBED_FILE="${{ secrets.SINGLE_NODE_TESTBED_FILE }}"
179+
TESTBED_FILE="testbed/single_node_tb.yml"
142180
else
143-
TESTBED_FILE="${{ secrets.MULTI_NODE_TESTBED_FILE }}"
181+
TESTBED_FILE="testbed/multi_node_tb.yml"
144182
fi
145183
fi
146184
NO_INSTALL="${{ inputs.no_install }}"
@@ -156,6 +194,17 @@ jobs:
156194
fi
157195
fi
158196
197+
# Validate testbed file exists
198+
if [ ! -f "$TESTBED_FILE" ]; then
199+
echo "[ERROR] Testbed file not found: $TESTBED_FILE"
200+
echo "Please ensure the appropriate secret is set:"
201+
echo " - SINGLE_NODE_TESTBED_FILE for single-node tests"
202+
echo " - MULTI_NODE_TESTBED_FILE for multi-node tests"
203+
echo "Or provide a custom testbed_file input via workflow_dispatch."
204+
exit 1
205+
fi
206+
echo "Using testbed file: $TESTBED_FILE"
207+
159208
# Run RCCL test differently (pytest directly)
160209
if [ "$TEST_NAME" = "test_multi_node_rccl" ]; then
161210
# For RCCL test: extract version tag from docker image if provided
@@ -168,10 +217,13 @@ jobs:
168217
echo "Using RCCL Docker image version: $DOCKER_IMAGE_VERSION"
169218
fi
170219
220+
# Convert testbed file to absolute path before changing directory
221+
TESTBED_FILE_ABS="$(pwd)/$TESTBED_FILE"
222+
171223
# Set PYTHONPATH and cd to testsuites directory for pytest
172224
export PYTHONPATH=$(pwd):$PYTHONPATH
173225
cd testsuites
174-
python3 -m pytest test_enroot.py --testbed "$TESTBED_FILE" -k test_multi_node_rccl --no-install --no-uninstall
226+
python3 -m pytest test_enroot.py --testbed "$TESTBED_FILE_ABS" -k test_multi_node_rccl --no-install --no-uninstall
175227
else
176228
# For other tests: use run_test.py
177229
python3 run_test.py "$TEST_NAME" "$DOCKER_IMAGE" "$NO_INSTALL" "$NO_UNINSTALL" "$TESTBED_FILE"

0 commit comments

Comments
 (0)