Skip to content

Commit 59dd169

Browse files
committed
merge conflicts
2 parents d12083a + efd2883 commit 59dd169

File tree

13 files changed

+321
-153
lines changed

13 files changed

+321
-153
lines changed

.github/workflows/doca.yml

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
name: Test DOCA extra build
2+
on:
3+
workflow_dispatch:
4+
push:
5+
branches:
6+
- main
7+
paths:
8+
- 'environments/.stackhpc/terraform/cluster_image.auto.tfvars.json'
9+
- 'ansible/roles/doca/**'
10+
- '.github/workflows/doca'
11+
pull_request:
12+
paths:
13+
- 'environments/.stackhpc/terraform/cluster_image.auto.tfvars.json'
14+
- 'ansible/roles/doca/**'
15+
- '.github/workflows/doca'
16+
17+
jobs:
18+
doca:
19+
name: doca-build
20+
concurrency:
21+
group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.build.image_name }} # to branch/PR + OS
22+
cancel-in-progress: true
23+
runs-on: ubuntu-22.04
24+
strategy:
25+
fail-fast: false # allow other matrix jobs to continue even if one fails
26+
matrix: # build RL8, RL9
27+
build:
28+
- image_name: openhpc-doca-RL8
29+
source_image_name_key: RL8 # key into environments/.stackhpc/terraform/cluster_image.auto.tfvars.json
30+
inventory_groups: doca
31+
- image_name: openhpc-doca-RL9
32+
source_image_name_key: RL9
33+
inventory_groups: doca
34+
env:
35+
ANSIBLE_FORCE_COLOR: True
36+
OS_CLOUD: openstack
37+
CI_CLOUD: ${{ vars.CI_CLOUD }} # default from repo settings
38+
ARK_PASSWORD: ${{ secrets.ARK_PASSWORD }}
39+
40+
steps:
41+
- uses: actions/checkout@v2
42+
43+
- name: Load current fat images into GITHUB_ENV
44+
# see https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/workflow-commands-for-github-actions#example-of-a-multiline-string
45+
run: |
46+
{
47+
echo 'FAT_IMAGES<<EOF'
48+
cat environments/.stackhpc/terraform/cluster_image.auto.tfvars.json
49+
echo EOF
50+
} >> "$GITHUB_ENV"
51+
52+
- name: Record settings
53+
run: |
54+
echo CI_CLOUD: ${{ env.CI_CLOUD }}
55+
echo FAT_IMAGES: ${FAT_IMAGES}
56+
57+
- name: Setup ssh
58+
run: |
59+
set -x
60+
mkdir ~/.ssh
61+
echo "${{ secrets[format('{0}_SSH_KEY', env.CI_CLOUD)] }}" > ~/.ssh/id_rsa
62+
chmod 0600 ~/.ssh/id_rsa
63+
shell: bash
64+
65+
- name: Add bastion's ssh key to known_hosts
66+
run: cat environments/.stackhpc/bastion_fingerprints >> ~/.ssh/known_hosts
67+
shell: bash
68+
69+
- name: Install ansible etc
70+
run: dev/setup-env.sh
71+
72+
- name: Write clouds.yaml
73+
run: |
74+
mkdir -p ~/.config/openstack/
75+
echo "${{ secrets[format('{0}_CLOUDS_YAML', env.CI_CLOUD)] }}" > ~/.config/openstack/clouds.yaml
76+
shell: bash
77+
78+
- name: Setup environment
79+
run: |
80+
. venv/bin/activate
81+
. environments/.stackhpc/activate
82+
83+
- name: Build fat image with packer
84+
id: packer_build
85+
run: |
86+
set -x
87+
. venv/bin/activate
88+
. environments/.stackhpc/activate
89+
cd packer/
90+
packer init .
91+
92+
PACKER_LOG=1 packer build \
93+
-on-error=${{ vars.PACKER_ON_ERROR }} \
94+
-var-file=$PKR_VAR_environment_root/${{ env.CI_CLOUD }}.pkrvars.hcl \
95+
-var "source_image_name=${{ fromJSON(env.FAT_IMAGES)['cluster_image'][matrix.build.source_image_name_key] }}" \
96+
-var "image_name=${{ matrix.build.image_name }}" \
97+
-var "inventory_groups=${{ matrix.build.inventory_groups }}" \
98+
openstack.pkr.hcl
99+
100+
- name: Get created image names from manifest
101+
id: manifest
102+
run: |
103+
. venv/bin/activate
104+
IMAGE_ID=$(jq --raw-output '.builds[-1].artifact_id' packer/packer-manifest.json)
105+
while ! openstack image show -f value -c name $IMAGE_ID; do
106+
sleep 5
107+
done
108+
IMAGE_NAME=$(openstack image show -f value -c name $IMAGE_ID)
109+
echo "image-name=${IMAGE_NAME}" >> "$GITHUB_OUTPUT"
110+
echo "image-id=$IMAGE_ID" >> "$GITHUB_OUTPUT"
111+
echo $IMAGE_ID > image-id.txt
112+
echo $IMAGE_NAME > image-name.txt
113+
114+
- name: Make image usable for further builds
115+
run: |
116+
. venv/bin/activate
117+
openstack image unset --property signature_verified "${{ steps.manifest.outputs.image-id }}"
118+
119+
- name: Delete image for automatically-run workflows
120+
run: |
121+
. venv/bin/activate
122+
openstack image delete "${{ steps.manifest.outputs.image-id }}"
123+
if: ${{ github.event_name != 'workflow_dispatch' }}
124+
125+
- name: Upload manifest artifact
126+
uses: actions/upload-artifact@v4
127+
with:
128+
name: image-details-${{ matrix.build.image_name }}
129+
path: |
130+
./image-id.txt
131+
./image-name.txt
132+
overwrite: true

.github/workflows/fatimage.yml

Lines changed: 18 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -15,30 +15,23 @@ jobs:
1515
openstack:
1616
name: openstack-imagebuild
1717
concurrency:
18-
group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.os_version }}-${{ matrix.build }} # to branch/PR + OS + build
18+
group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.build.image_name }} # to branch/PR + OS
1919
cancel-in-progress: true
2020
runs-on: ubuntu-22.04
2121
strategy:
2222
fail-fast: false # allow other matrix jobs to continue even if one fails
2323
matrix: # build RL8, RL9
24-
os_version:
25-
- RL8
26-
- RL9
2724
build:
28-
- openstack.openhpc
25+
- image_name: openhpc-RL8
26+
source_image_name: rocky-latest-RL8
27+
inventory_groups: control,compute,login
28+
- image_name: openhpc-RL9
29+
source_image_name: rocky-latest-RL9
30+
inventory_groups: control,compute,login
2931
env:
3032
ANSIBLE_FORCE_COLOR: True
3133
OS_CLOUD: openstack
3234
CI_CLOUD: ${{ github.event.inputs.ci_cloud }}
33-
SOURCE_IMAGES_MAP: |
34-
{
35-
"RL8": {
36-
"openstack.openhpc": "rocky-latest-RL8"
37-
},
38-
"RL9": {
39-
"openstack.openhpc": "rocky-latest-RL9"
40-
}
41-
}
4235
ARK_PASSWORD: ${{ secrets.ARK_PASSWORD }}
4336
LEAFCLOUD_PULP_PASSWORD: ${{ secrets.LEAFCLOUD_PULP_PASSWORD }}
4437

@@ -86,13 +79,11 @@ jobs:
8679
8780
PACKER_LOG=1 packer build \
8881
-on-error=${{ vars.PACKER_ON_ERROR }} \
89-
-only=${{ matrix.build }} \
9082
-var-file=$PKR_VAR_environment_root/${{ env.CI_CLOUD }}.pkrvars.hcl \
91-
-var "source_image_name=${{ env.SOURCE_IMAGE }}" \
83+
-var "source_image_name=${{ matrix.build.source_image_name }}" \
84+
-var "image_name=${{ matrix.build.image_name }}" \
85+
-var "inventory_groups=${{ matrix.build.inventory_groups }}" \
9286
openstack.pkr.hcl
93-
env:
94-
PKR_VAR_os_version: ${{ matrix.os_version }}
95-
SOURCE_IMAGE: ${{ fromJSON(env.SOURCE_IMAGES_MAP)[matrix.os_version][matrix.build] }}
9687
9788
- name: Get created image names from manifest
9889
id: manifest
@@ -103,13 +94,20 @@ jobs:
10394
sleep 5
10495
done
10596
IMAGE_NAME=$(openstack image show -f value -c name $IMAGE_ID)
97+
echo "image-name=${IMAGE_NAME}" >> "$GITHUB_OUTPUT"
98+
echo "image-id=$IMAGE_ID" >> "$GITHUB_OUTPUT"
10699
echo $IMAGE_ID > image-id.txt
107100
echo $IMAGE_NAME > image-name.txt
108101
102+
- name: Make image usable for further builds
103+
run: |
104+
. venv/bin/activate
105+
openstack image unset --property signature_verified "${{ steps.manifest.outputs.image-id }}"
106+
109107
- name: Upload manifest artifact
110108
uses: actions/upload-artifact@v4
111109
with:
112-
name: image-details-${{ matrix.build }}-${{ matrix.os_version }}
110+
name: image-details-${{ matrix.build.image_name }}
113111
path: |
114112
./image-id.txt
115113
./image-name.txt

.github/workflows/nightlybuild.yml

Lines changed: 20 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -11,32 +11,29 @@ on:
1111
- SMS
1212
- ARCUS
1313
schedule:
14-
- cron: '0 0 * * *' # Run at midnight
14+
- cron: '0 0 * * *' # Run at midnight on default branch
1515

1616
jobs:
1717
openstack:
1818
name: openstack-imagebuild
1919
concurrency:
20-
group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.os_version }}-${{ matrix.build }} # to branch/PR + OS + build
20+
group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.build.image_name }} # to branch/PR + OS
2121
cancel-in-progress: true
2222
runs-on: ubuntu-22.04
2323
strategy:
2424
fail-fast: false # allow other matrix jobs to continue even if one fails
2525
matrix: # build RL8, RL9
26-
os_version:
27-
- RL8
28-
- RL9
2926
build:
30-
- openstack.rocky-latest
27+
- image_name: rocky-latest-RL8
28+
source_image_name: Rocky-8-GenericCloud-Base-8.9-20231119.0.x86_64.qcow2
29+
inventory_groups: update
30+
- image_name: rocky-latest-RL9
31+
source_image_name: Rocky-9-GenericCloud-Base-9.4-20240523.0.x86_64.qcow2
32+
inventory_groups: update
3133
env:
3234
ANSIBLE_FORCE_COLOR: True
3335
OS_CLOUD: openstack
3436
CI_CLOUD: ${{ github.event.inputs.ci_cloud || vars.CI_CLOUD }}
35-
SOURCE_IMAGES_MAP: |
36-
{
37-
"RL8": "Rocky-8-GenericCloud-Base-8.9-20231119.0.x86_64.qcow2",
38-
"RL9": "Rocky-9-GenericCloud-Base-9.4-20240523.0.x86_64.qcow2"
39-
}
4037
ARK_PASSWORD: ${{ secrets.ARK_PASSWORD }}
4138
LEAFCLOUD_PULP_PASSWORD: ${{ secrets.LEAFCLOUD_PULP_PASSWORD }}
4239

@@ -84,15 +81,12 @@ jobs:
8481
8582
PACKER_LOG=1 packer build \
8683
-on-error=${{ vars.PACKER_ON_ERROR }} \
87-
-only=${{ matrix.build }} \
8884
-var-file=$PKR_VAR_environment_root/${{ env.CI_CLOUD }}.pkrvars.hcl \
89-
-var "source_image_name=${{ env.SOURCE_IMAGE }}" \
85+
-var "source_image_name=${{ matrix.build.source_image_name }}" \
86+
-var "image_name=${{ matrix.build.image_name }}" \
87+
-var "inventory_groups=${{ matrix.build.inventory_groups }}" \
9088
openstack.pkr.hcl
9189
92-
env:
93-
PKR_VAR_os_version: ${{ matrix.os_version }}
94-
SOURCE_IMAGE: ${{ fromJSON(env.SOURCE_IMAGES_MAP)[matrix.os_version] }}
95-
9690
- name: Get created image names from manifest
9791
id: manifest
9892
run: |
@@ -126,7 +120,7 @@ jobs:
126120
name: upload-nightly-targets
127121
needs: openstack
128122
concurrency:
129-
group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.os_version }}-${{ matrix.image }}-${{ matrix.target_cloud }}
123+
group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.build.image_name }}-${{ matrix.target_cloud }}
130124
cancel-in-progress: true
131125
runs-on: ubuntu-22.04
132126
strategy:
@@ -136,18 +130,15 @@ jobs:
136130
- LEAFCLOUD
137131
- SMS
138132
- ARCUS
139-
os_version:
140-
- RL8
141-
- RL9
142-
image:
143-
- rocky-latest
133+
build:
134+
- image_name: rocky-latest-RL8
135+
- image_name: rocky-latest-RL9
144136
exclude:
145137
- target_cloud: LEAFCLOUD
146138
env:
147139
OS_CLOUD: openstack
148140
SOURCE_CLOUD: ${{ github.event.inputs.ci_cloud || vars.CI_CLOUD }}
149141
TARGET_CLOUD: ${{ matrix.target_cloud }}
150-
IMAGE_NAME: "${{ matrix.image }}-${{ matrix.os_version }}"
151142
steps:
152143
- uses: actions/checkout@v2
153144

@@ -162,42 +153,37 @@ jobs:
162153
. venv/bin/activate
163154
pip install -U pip
164155
pip install $(grep -o 'python-openstackclient[><=0-9\.]*' requirements.txt)
165-
shell: bash
166156
167157
- name: Write clouds.yaml
168158
run: |
169159
mkdir -p ~/.config/openstack/
170160
echo "${{ secrets[format('{0}_CLOUDS_YAML', env.SOURCE_CLOUD)] }}" > ~/.config/openstack/source_clouds.yaml
171161
echo "${{ secrets[format('{0}_CLOUDS_YAML', env.TARGET_CLOUD)] }}" > ~/.config/openstack/target_clouds.yaml
172-
shell: bash
173162
174163
- name: Download source image
175164
run: |
176165
. venv/bin/activate
177166
export OS_CLIENT_CONFIG_FILE=~/.config/openstack/source_clouds.yaml
178-
openstack image save --file ${{ env.IMAGE_NAME }} ${{ env.IMAGE_NAME }}
179-
shell: bash
167+
openstack image save --file ${{ matrix.build.image_name }} ${{ matrix.build.image_name }}
180168
181169
- name: Upload to target cloud
182170
run: |
183171
. venv/bin/activate
184172
export OS_CLIENT_CONFIG_FILE=~/.config/openstack/target_clouds.yaml
185173
186-
openstack image create "${{ env.IMAGE_NAME }}" \
187-
--file "${{ env.IMAGE_NAME }}" \
174+
openstack image create "${{ matrix.build.image_name }}" \
175+
--file "${{ matrix.build.image_name }}" \
188176
--disk-format qcow2 \
189-
shell: bash
190177
191178
- name: Delete old latest image from target cloud
192179
run: |
193180
. venv/bin/activate
194181
export OS_CLIENT_CONFIG_FILE=~/.config/openstack/target_clouds.yaml
195182
196-
IMAGE_COUNT=$(openstack image list --name ${{ env.IMAGE_NAME }} -f value -c ID | wc -l)
183+
IMAGE_COUNT=$(openstack image list --name ${{ matrix.build.image_name }} -f value -c ID | wc -l)
197184
if [ "$IMAGE_COUNT" -gt 1 ]; then
198-
OLD_IMAGE_ID=$(openstack image list --sort created_at:asc --name "${{ env.IMAGE_NAME }}" -f value -c ID | head -n 1)
185+
OLD_IMAGE_ID=$(openstack image list --sort created_at:asc --name "${{ matrix.build.image_name }}" -f value -c ID | head -n 1)
199186
openstack image delete "$OLD_IMAGE_ID"
200187
else
201188
echo "Only one image exists, skipping deletion."
202189
fi
203-
shell: bash

ansible/.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,3 +68,5 @@ roles/*
6868
!roles/dnf_repos/**
6969
!roles/pulp_site/
7070
!roles/pulp_site/**
71+
!roles/doca/
72+
!roles/doca/**

ansible/cleanup.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,5 +61,10 @@
6161
os: "{{ ansible_distribution }} {{ ansible_distribution_version }}"
6262
kernel: "{{ ansible_kernel }}"
6363
ofed: "{{ ansible_facts.packages['mlnx-ofa_kernel'].0.version | default('-') }}"
64+
doca: "{{ ansible_facts.packages[doca_profile | default('doca-ofed') ].0.version | default('-') }}"
6465
cuda: "{{ ansible_facts.packages['cuda'].0.version | default('-') }}"
6566
slurm-ohpc: "{{ ansible_facts.packages['slurm-ohpc'].0.version | default('-') }}"
67+
68+
- name: Show image summary
69+
debug:
70+
var: image_info

ansible/fatimage.yml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@
66
tasks:
77
- name: Report hostname (= final image name)
88
command: hostname
9+
- name: Report inventory groups
10+
debug:
11+
var: group_names
912

1013
- name: Run pre.yml hook
1114
vars:
@@ -218,6 +221,14 @@
218221
name: cloudalchemy.grafana
219222
tasks_from: install.yml
220223

224+
- hosts: doca
225+
become: yes
226+
gather_facts: yes
227+
tasks:
228+
- name: Install NVIDIA DOCA
229+
import_role:
230+
name: doca
231+
221232
- hosts: dnf_repos
222233
become: yes
223234
tasks:

0 commit comments

Comments
 (0)