Skip to content

Commit 3a1ae78

Browse files
committed
ci: Introduce python 3.12 e2e large job flavor
Signed-off-by: Ihar Hrachyshka <ihar.hrachyshka@gmail.com>
1 parent 1532531 commit 3a1ae78

File tree

3 files changed

+489
-218
lines changed

3 files changed

+489
-218
lines changed

.github/actions/run-e2e/action.yml

Lines changed: 252 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,252 @@
1+
name: 'Run e2e tests'
2+
description: 'Runs e2e tests'
3+
inputs:
4+
python-version:
5+
required: true
6+
description: >-
7+
Python version to use. Must be in the form of "3.xx".
8+
gh-token:
9+
required: true
10+
description: >-
11+
GitHub token to use for authentication.
12+
hf-token:
13+
required: true
14+
description: >-
15+
Hugging Face token to use for authentication.
16+
openai-api-key:
17+
required: true
18+
description: >-
19+
OpenAI API key to use for authentication.
20+
son-of-jeeves-token:
21+
required: true
22+
description: >-
23+
Son of Jeeves token (Slack).
24+
son-of-jeeves-discord-webhook:
25+
required: true
26+
description: >-
27+
Son of Jeeves webhook (Discord).
28+
runs:
29+
using: "composite"
30+
steps:
31+
- name: "Harden Runner"
32+
# v2.10.1
33+
uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0
34+
with:
35+
egress-policy: audit
36+
37+
- name: Install Packages
38+
shell: bash
39+
run: |
40+
cat /etc/os-release
41+
mkdir -p "${TMPDIR}"
42+
sudo dnf install -y gcc gcc-c++ make git python${{ inputs.python-version }} python${{ inputs.python-version }}-devel
43+
44+
- name: Checkout instructlab/instructlab
45+
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
46+
with:
47+
repository: "instructlab/instructlab"
48+
path: "instructlab"
49+
# https://github.com/actions/checkout/issues/249
50+
fetch-depth: 0
51+
52+
- name: Determine if pr_or_branch is a PR number
53+
id: check_pr
54+
shell: bash
55+
run: |
56+
PR_OR_BRANCH=${{ github.event.inputs.pr_or_branch || 'main' }} # Default to 'main' if not set
57+
if [[ "$PR_OR_BRANCH" =~ ^[0-9]+$ ]]; then
58+
echo "is_pr=true" >> "$GITHUB_OUTPUT"
59+
else
60+
echo "is_pr=false" >> "$GITHUB_OUTPUT"
61+
fi
62+
echo "pr_or_branch=$PR_OR_BRANCH" >> "$GITHUB_OUTPUT"
63+
64+
- name: Check if gh cli is installed
65+
id: gh_cli
66+
shell: bash
67+
run: |
68+
if command -v gh &> /dev/null ; then
69+
echo "gh_cli_installed=true" >> "$GITHUB_OUTPUT"
70+
else
71+
echo "gh_cli_installed=false" >> "$GITHUB_OUTPUT"
72+
fi
73+
74+
- name: Install gh CLI
75+
if: steps.gh_cli.outputs.gh_cli_installed == 'false'
76+
shell: bash
77+
run: |
78+
sudo dnf install 'dnf-command(config-manager)' -y
79+
sudo dnf config-manager --add-repo https://cli.github.com/packages/rpm/gh-cli.repo
80+
sudo dnf install gh --repo gh-cli -y
81+
82+
- name: test gh CLI
83+
shell: bash
84+
run: |
85+
gh --version
86+
87+
- name: set default repo
88+
working-directory: ./training
89+
shell: bash
90+
run: |
91+
gh repo set-default ${{ github.server_url }}/${{ github.repository }}
92+
env:
93+
GH_TOKEN: ${{ inputs.gh-token }}
94+
95+
- name: Add comment to PR
96+
if: steps.check_pr.outputs.is_pr == 'true'
97+
working-directory: ./training
98+
shell: bash
99+
run: |
100+
gh pr comment "${{ steps.check_pr.outputs.pr_or_branch }}" -b "${{ github.workflow }} workflow launched on this PR: [View run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})"
101+
env:
102+
GH_TOKEN: ${{ inputs.gh-token }}
103+
104+
- name: Fetch and checkout PR
105+
if: steps.check_pr.outputs.is_pr == 'true'
106+
working-directory: ./training
107+
shell: bash
108+
run: |
109+
gh pr checkout ${{ steps.check_pr.outputs.pr_or_branch }}
110+
env:
111+
GH_TOKEN: ${{ inputs.gh-token }}
112+
113+
- name: Checkout branch
114+
if: steps.check_pr.outputs.is_pr == 'false'
115+
working-directory: ./training
116+
shell: bash
117+
run: |
118+
git checkout ${{ steps.check_pr.outputs.pr_or_branch }}
119+
120+
- name: Install ilab
121+
working-directory: ./instructlab
122+
shell: bash
123+
run: |
124+
PYTHON=python${{ inputs.python-version }} ./scripts/install-ilab-with-cuda.sh
125+
126+
- name: Update instructlab-training library
127+
working-directory: ./training
128+
shell: bash
129+
run: |
130+
. ../instructlab/venv/bin/activate
131+
pip install .
132+
pip install .[cuda]
133+
134+
- name: Check disk before tests
135+
if: always()
136+
shell: bash
137+
run: |
138+
df -h
139+
140+
- name: Run e2e test
141+
working-directory: ./instructlab
142+
env:
143+
HF_TOKEN: ${{ inputs.hf-token }}
144+
OPENAI_API_KEY: ${{ inputs.openai-api-key }}
145+
shell: bash
146+
run: |
147+
. venv/bin/activate
148+
149+
# set preserve to true so we can retain the logs
150+
./scripts/e2e-ci.sh -lp
151+
152+
# we know that the file will be named something like f"/training_params_and_metrics_global{os.environ['RANK']}.jsonl" in python
153+
# and we know that it will be written into a directory created by `mktemp -d`.
154+
# Given this information, we can use the following command to find the file:
155+
log_files=$(find /tmp/ -name "training_params_and_metrics_global0.jsonl")
156+
phase_num=1;
157+
for log_file in $log_files; do
158+
mv "${log_file}" phase-${phase_num}-training-log.jsonl
159+
((phase_num++))
160+
done
161+
162+
- name: Check disk after tests
163+
if: always()
164+
shell: bash
165+
run: |
166+
df -h
167+
168+
- name: Upload training logs Phase 1
169+
uses: actions/upload-artifact@v4
170+
with:
171+
name: phase-1-training-log.jsonl
172+
path: ./instructlab/phase-1-training-log.jsonl
173+
retention-days: 1
174+
overwrite: true
175+
176+
- name: Upload training logs Phase 2
177+
uses: actions/upload-artifact@v4
178+
with:
179+
name: phase-2-training-log.jsonl
180+
path: ./instructlab/phase-2-training-log.jsonl
181+
retention-days: 1
182+
overwrite: true
183+
184+
- name: Add comment to PR if the workflow failed
185+
if: failure() && steps.check_pr.outputs.is_pr == 'true'
186+
working-directory: ./training
187+
shell: bash
188+
run: |
189+
gh pr comment "${{ steps.check_pr.outputs.pr_or_branch }}" -b "e2e workflow failed on this PR: [View run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}), please investigate."
190+
env:
191+
GH_TOKEN: ${{ inputs.gh-token }}
192+
193+
- name: Add comment to PR if the workflow succeeded
194+
if: success() && steps.check_pr.outputs.is_pr == 'true'
195+
working-directory: ./training
196+
shell: bash
197+
run: |
198+
gh pr comment "${{ steps.check_pr.outputs.pr_or_branch }}" -b "e2e workflow succeeded on this PR: [View run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}), congrats!"
199+
env:
200+
GH_TOKEN: ${{ inputs.gh-token }}
201+
202+
- name: Post job results to Slack if the workflow failed
203+
if: failure() && steps.check_pr.outputs.is_pr == 'false'
204+
id: slack-report-failure
205+
uses: slackapi/slack-github-action@b0fa283ad8fea605de13dc3f449259339835fc52 # v2.1.0
206+
with:
207+
token: ${{ inputs.son-of-jeeves-token }}
208+
method: chat.postMessage
209+
payload: |
210+
# Slack channel id, channel name, or user id to post message.
211+
# See also: https://api.slack.com/methods/chat.postMessage#channels
212+
# You can pass in multiple channels to post to by providing a comma-delimited list of channel IDs.
213+
channel: 'e2e-ci-results'
214+
text: "*e2e-nvidia-l40s-x4* job in *${{ github.repository }}* running on branch `${{ steps.check_pr.outputs.pr_or_branch }}` completed *with failures* :meow_sad-rain: | ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
215+
216+
- name: Post job results to Slack if the workflow succeeded
217+
if: success() && steps.check_pr.outputs.is_pr == 'false'
218+
id: slack-report-success
219+
uses: slackapi/slack-github-action@b0fa283ad8fea605de13dc3f449259339835fc52 # v2.1.0
220+
with:
221+
token: ${{ inputs.son-of-jeeves-token }}
222+
method: chat.postMessage
223+
payload: |
224+
# Slack channel id, channel name, or user id to post message.
225+
# See also: https://api.slack.com/methods/chat.postMessage#channels
226+
# You can pass in multiple channels to post to by providing a comma-delimited list of channel IDs.
227+
channel: 'e2e-ci-results'
228+
text: "*e2e-nvidia-l40s-x4* job in *${{ github.repository }}* running on branch `${{ steps.check_pr.outputs.pr_or_branch }}` completed *successfully* :meow_party: | ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
229+
230+
- name: Send Discord notification for failure
231+
if: failure() && steps.check_pr.outputs.is_pr == 'false'
232+
uses: sarisia/actions-status-discord@5ddd3b114a98457dd80a39b2f00b6a998cd69008 # v1.15.3
233+
with:
234+
webhook: ${{ inputs.son-of-jeeves-discord-webhook }}
235+
status: ${{ job.status }}
236+
title: "e2e-nvidia-l40s-x4"
237+
description: |
238+
Job in **${{ github.repository }}** running on branch `${{ steps.check_pr.outputs.pr_or_branch }}` completed **with failures** ❌
239+
Click [here](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) to view details.
240+
color: 0xCB2431 # Red color for failure
241+
242+
- name: Send Discord notification for success
243+
if: success() && steps.check_pr.outputs.is_pr == 'false'
244+
uses: sarisia/actions-status-discord@5ddd3b114a98457dd80a39b2f00b6a998cd69008 # v1.15.3
245+
with:
246+
webhook: ${{ inputs.son-of-jeeves-discord-webhook }}
247+
status: ${{ job.status }}
248+
title: "e2e-nvidia-l40s-x4"
249+
description: |
250+
Job in **${{ github.repository }}** running on branch `${{ steps.check_pr.outputs.pr_or_branch }}` completed **successfully** ✅
251+
Click [here](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) to view details.
252+
color: 0x28A745 # Green color for success

0 commit comments

Comments
 (0)