Skip to content

Commit f6adbff

Browse files
committed
ci: Introduce python 3.12 e2e large job flavor
Signed-off-by: Ihar Hrachyshka <ihar.hrachyshka@gmail.com>
1 parent 1532531 commit f6adbff

File tree

3 files changed

+459
-218
lines changed

3 files changed

+459
-218
lines changed

.github/actions/run-e2e/action.yml

Lines changed: 232 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,232 @@
1+
name: 'Run e2e tests'
2+
description: 'Runs e2e tests'
3+
inputs:
4+
python-version:
5+
required: true
6+
description: >-
7+
Python version to use. Must be in the form of "3.xx".
8+
runs:
9+
using: "composite"
10+
steps:
11+
- name: "Harden Runner"
12+
# v2.10.1
13+
uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0
14+
with:
15+
egress-policy: audit
16+
17+
- name: Install Packages
18+
shell: bash
19+
run: |
20+
cat /etc/os-release
21+
mkdir -p "${TMPDIR}"
22+
sudo dnf install -y gcc gcc-c++ make git python${{ inputs.python-version }} python${{ inputs.python-version }}-devel
23+
24+
- name: Checkout instructlab/instructlab
25+
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
26+
with:
27+
repository: "instructlab/instructlab"
28+
path: "instructlab"
29+
# https://github.com/actions/checkout/issues/249
30+
fetch-depth: 0
31+
32+
- name: Determine if pr_or_branch is a PR number
33+
id: check_pr
34+
shell: bash
35+
run: |
36+
PR_OR_BRANCH=${{ github.event.inputs.pr_or_branch || 'main' }} # Default to 'main' if not set
37+
if [[ "$PR_OR_BRANCH" =~ ^[0-9]+$ ]]; then
38+
echo "is_pr=true" >> "$GITHUB_OUTPUT"
39+
else
40+
echo "is_pr=false" >> "$GITHUB_OUTPUT"
41+
fi
42+
echo "pr_or_branch=$PR_OR_BRANCH" >> "$GITHUB_OUTPUT"
43+
44+
- name: Check if gh cli is installed
45+
id: gh_cli
46+
shell: bash
47+
run: |
48+
if command -v gh &> /dev/null ; then
49+
echo "gh_cli_installed=true" >> "$GITHUB_OUTPUT"
50+
else
51+
echo "gh_cli_installed=false" >> "$GITHUB_OUTPUT"
52+
fi
53+
54+
- name: Install gh CLI
55+
if: steps.gh_cli.outputs.gh_cli_installed == 'false'
56+
shell: bash
57+
run: |
58+
sudo dnf install 'dnf-command(config-manager)' -y
59+
sudo dnf config-manager --add-repo https://cli.github.com/packages/rpm/gh-cli.repo
60+
sudo dnf install gh --repo gh-cli -y
61+
62+
- name: test gh CLI
63+
shell: bash
64+
run: |
65+
gh --version
66+
67+
- name: set default repo
68+
working-directory: ./training
69+
shell: bash
70+
run: |
71+
gh repo set-default ${{ github.server_url }}/${{ github.repository }}
72+
env:
73+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
74+
75+
- name: Add comment to PR
76+
if: steps.check_pr.outputs.is_pr == 'true'
77+
working-directory: ./training
78+
shell: bash
79+
run: |
80+
gh pr comment "${{ steps.check_pr.outputs.pr_or_branch }}" -b "${{ github.workflow }} workflow launched on this PR: [View run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})"
81+
env:
82+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
83+
84+
- name: Fetch and checkout PR
85+
if: steps.check_pr.outputs.is_pr == 'true'
86+
working-directory: ./training
87+
shell: bash
88+
run: |
89+
gh pr checkout ${{ steps.check_pr.outputs.pr_or_branch }}
90+
env:
91+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
92+
93+
- name: Checkout branch
94+
if: steps.check_pr.outputs.is_pr == 'false'
95+
working-directory: ./training
96+
shell: bash
97+
run: |
98+
git checkout ${{ steps.check_pr.outputs.pr_or_branch }}
99+
100+
- name: Install ilab
101+
working-directory: ./instructlab
102+
shell: bash
103+
run: |
104+
./scripts/install-ilab-with-cuda.sh
105+
106+
- name: Update instructlab-training library
107+
working-directory: ./training
108+
shell: bash
109+
run: |
110+
. ../instructlab/venv/bin/activate
111+
pip install .
112+
pip install .[cuda]
113+
114+
- name: Check disk before tests
115+
if: always()
116+
shell: bash
117+
run: |
118+
df -h
119+
120+
- name: Run e2e test
121+
working-directory: ./instructlab
122+
env:
123+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
124+
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
125+
shell: bash
126+
run: |
127+
. venv/bin/activate
128+
129+
# set preserve to true so we can retain the logs
130+
./scripts/e2e-ci.sh -lp
131+
132+
# we know that the file will be named something like f"/training_params_and_metrics_global{os.environ['RANK']}.jsonl" in python
133+
# and we know that it will be written into a directory created by `mktemp -d`.
134+
# Given this information, we can use the following command to find the file:
135+
log_files=$(find /tmp/ -name "training_params_and_metrics_global0.jsonl")
136+
phase_num=1;
137+
for log_file in $log_files; do
138+
mv "${log_file}" phase-${phase_num}-training-log.jsonl
139+
((phase_num++))
140+
done
141+
142+
- name: Check disk after tests
143+
if: always()
144+
shell: bash
145+
run: |
146+
df -h
147+
148+
- name: Upload training logs Phase 1
149+
uses: actions/upload-artifact@v4
150+
with:
151+
name: phase-1-training-log.jsonl
152+
path: ./instructlab/phase-1-training-log.jsonl
153+
retention-days: 1
154+
overwrite: true
155+
156+
- name: Upload training logs Phase 2
157+
uses: actions/upload-artifact@v4
158+
with:
159+
name: phase-2-training-log.jsonl
160+
path: ./instructlab/phase-2-training-log.jsonl
161+
retention-days: 1
162+
overwrite: true
163+
164+
- name: Add comment to PR if the workflow failed
165+
if: failure() && steps.check_pr.outputs.is_pr == 'true'
166+
working-directory: ./training
167+
shell: bash
168+
run: |
169+
gh pr comment "${{ steps.check_pr.outputs.pr_or_branch }}" -b "e2e workflow failed on this PR: [View run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}), please investigate."
170+
env:
171+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
172+
173+
- name: Add comment to PR if the workflow succeeded
174+
if: success() && steps.check_pr.outputs.is_pr == 'true'
175+
working-directory: ./training
176+
shell: bash
177+
run: |
178+
gh pr comment "${{ steps.check_pr.outputs.pr_or_branch }}" -b "e2e workflow succeeded on this PR: [View run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}), congrats!"
179+
env:
180+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
181+
182+
- name: Post job results to Slack if the workflow failed
183+
if: failure() && steps.check_pr.outputs.is_pr == 'false'
184+
id: slack-report-failure
185+
uses: slackapi/slack-github-action@b0fa283ad8fea605de13dc3f449259339835fc52 # v2.1.0
186+
with:
187+
token: ${{ secrets.SON_OF_JEEVES_TOKEN }}
188+
method: chat.postMessage
189+
payload: |
190+
# Slack channel id, channel name, or user id to post message.
191+
# See also: https://api.slack.com/methods/chat.postMessage#channels
192+
# You can pass in multiple channels to post to by providing a comma-delimited list of channel IDs.
193+
channel: 'e2e-ci-results'
194+
text: "*e2e-nvidia-l40s-x4* job in *${{ github.repository }}* running on branch `${{ steps.check_pr.outputs.pr_or_branch }}` completed *with failures* :meow_sad-rain: | ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
195+
196+
- name: Post job results to Slack if the workflow succeeded
197+
if: success() && steps.check_pr.outputs.is_pr == 'false'
198+
id: slack-report-success
199+
uses: slackapi/slack-github-action@b0fa283ad8fea605de13dc3f449259339835fc52 # v2.1.0
200+
with:
201+
token: ${{ secrets.SON_OF_JEEVES_TOKEN }}
202+
method: chat.postMessage
203+
payload: |
204+
# Slack channel id, channel name, or user id to post message.
205+
# See also: https://api.slack.com/methods/chat.postMessage#channels
206+
# You can pass in multiple channels to post to by providing a comma-delimited list of channel IDs.
207+
channel: 'e2e-ci-results'
208+
text: "*e2e-nvidia-l40s-x4* job in *${{ github.repository }}* running on branch `${{ steps.check_pr.outputs.pr_or_branch }}` completed *successfully* :meow_party: | ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
209+
210+
- name: Send Discord notification for failure
211+
if: failure() && steps.check_pr.outputs.is_pr == 'false'
212+
uses: sarisia/actions-status-discord@5ddd3b114a98457dd80a39b2f00b6a998cd69008 # v1.15.3
213+
with:
214+
webhook: ${{ secrets.SON_OF_JEEVES_DISCORD_WEBHOOK }}
215+
status: ${{ job.status }}
216+
title: "e2e-nvidia-l40s-x4"
217+
description: |
218+
Job in **${{ github.repository }}** running on branch `${{ steps.check_pr.outputs.pr_or_branch }}` completed **with failures** ❌
219+
Click [here](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) to view details.
220+
color: 0xCB2431 # Red color for failure
221+
222+
- name: Send Discord notification for success
223+
if: success() && steps.check_pr.outputs.is_pr == 'false'
224+
uses: sarisia/actions-status-discord@5ddd3b114a98457dd80a39b2f00b6a998cd69008 # v1.15.3
225+
with:
226+
webhook: ${{ secrets.SON_OF_JEEVES_DISCORD_WEBHOOK }}
227+
status: ${{ job.status }}
228+
title: "e2e-nvidia-l40s-x4"
229+
description: |
230+
Job in **${{ github.repository }}** running on branch `${{ steps.check_pr.outputs.pr_or_branch }}` completed **successfully** ✅
231+
Click [here](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) to view details.
232+
color: 0x28A745 # Green color for success

0 commit comments

Comments
 (0)