Skip to content

Commit cae0372

Browse files
committed
Implement CI caching of expensive native build
1 parent 734df98 commit cae0372

File tree

4 files changed

+231
-39
lines changed

4 files changed

+231
-39
lines changed

.github/workflows/build.yml

Lines changed: 188 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@ jobs:
1515
build-linux:
1616
runs-on: ubuntu-latest
1717
if: true
18+
env:
19+
MKL_URL: ""
20+
# MKL_URL: "https://registrationcenter-download.intel.com/akdlm/irc_nas/tec/16917/l_mkl_2020.4.304.tgz"
1821
steps:
1922
- name: Checkout repository
2023
uses: actions/checkout@v5
@@ -26,29 +29,71 @@ jobs:
2629
export TAG=$(git tag --points-at HEAD)
2730
echo "TAG: $TAG"
2831
if [[ $TAG ]]; then
32+
echo "KALDI_BRANCH: kag-$TAG"
2933
echo "KALDI_BRANCH=kag-$TAG" >> $GITHUB_ENV
3034
echo "KALDI_BRANCH=kag-$TAG" >> $GITHUB_OUTPUT
3135
else
36+
echo "KALDI_BRANCH: ${GITHUB_REF/refs\/heads\//}"
3237
echo "KALDI_BRANCH=${GITHUB_REF/refs\/heads\//}" >> $GITHUB_ENV
3338
echo "KALDI_BRANCH=${GITHUB_REF/refs\/heads\//}" >> $GITHUB_OUTPUT
3439
fi
3540
41+
- name: Get Kaldi commit hash
42+
id: get-kaldi-commit
43+
run: |
44+
KALDI_COMMIT=$(git ls-remote https://github.com/daanzu/kaldi-fork-active-grammar.git $KALDI_BRANCH | cut -f1)
45+
echo "KALDI_COMMIT: $KALDI_COMMIT"
46+
echo "KALDI_COMMIT=$KALDI_COMMIT" >> $GITHUB_OUTPUT
47+
48+
- name: Restore cached native binaries
49+
id: cache-native-binaries-restore
50+
uses: actions/cache/restore@v4
51+
with:
52+
key: native-${{ runner.os }}-${{ steps.get-kaldi-commit.outputs.KALDI_COMMIT }}-${{ env.MKL_URL }}-v1
53+
path: kaldi_active_grammar/exec/linux
54+
3655
- name: Setup just
3756
run: |
3857
ls -al $HOME
3958
mkdir $HOME/bin
4059
curl --proto '=https' --tlsv1.2 -sSf https://just.systems/install.sh | bash -s -- --to $HOME/bin/
4160
echo "$HOME/bin" >> $GITHUB_PATH
4261
43-
- name: Build with dockcross
62+
- name: Build with dockcross (native binaries & python wheel)
4463
run: |
4564
echo "KALDI_BRANCH: $KALDI_BRANCH"
4665
echo "MKL_URL: $MKL_URL"
47-
# Example MKL_URL: https://registrationcenter-download.intel.com/akdlm/irc_nas/tec/16917/l_mkl_2020.4.304.tgz
48-
just build-dockcross $KALDI_BRANCH $MKL_URL
49-
# cp dist/* wheelhouse/
66+
just build-dockcross ${{ steps.cache-native-binaries-restore.outputs.cache-hit == 'true' && '--skip-native' || '' }} $KALDI_BRANCH $MKL_URL
5067
ls -al wheelhouse/
5168
69+
- name: Extract native binaries from wheel after auditwheel repair, to save to cache
70+
if: steps.cache-native-binaries-restore.outputs.cache-hit != 'true'
71+
run: |
72+
# Assert there is only one wheel
73+
WHEEL_COUNT=$(ls wheelhouse/*.whl | wc -l)
74+
if [ "$WHEEL_COUNT" -ne 1 ]; then
75+
echo "Error: Expected exactly 1 wheel, found $WHEEL_COUNT"
76+
ls -al wheelhouse/
77+
exit 1
78+
fi
79+
WHEEL_FILE=$(ls wheelhouse/*.whl)
80+
echo "Extracting from wheel: $WHEEL_FILE"
81+
unzip -j -o $WHEEL_FILE 'kaldi_active_grammar/exec/linux/*' -d kaldi_active_grammar/exec/linux/
82+
ls -al kaldi_active_grammar/exec/linux/
83+
84+
- name: Save cached native binaries
85+
if: steps.cache-native-binaries-restore.outputs.cache-hit != 'true'
86+
uses: actions/cache/save@v4
87+
with:
88+
key: ${{ steps.cache-native-binaries-restore.outputs.cache-primary-key }}
89+
path: kaldi_active_grammar/exec/linux
90+
91+
- name: Upload native binaries to artifacts
92+
uses: actions/upload-artifact@v4
93+
with:
94+
name: native-linux
95+
path: kaldi_active_grammar/exec/linux
96+
5297
- name: Upload Linux wheels
5398
uses: actions/upload-artifact@v4
5499
with:
@@ -74,33 +119,54 @@ jobs:
74119
path: main
75120

76121
- name: Get KALDI_BRANCH (kag-$TAG tag if commit is tagged; current branch name if not)
77-
id: get_kaldi_branch
122+
id: get-kaldi-branch
123+
working-directory: main
78124
run: |
79125
# Fetch tags on the one fetched commit (shallow clone)
80126
git fetch --depth=1 origin "+refs/tags/*:refs/tags/*"
81127
export TAG=$(git tag --points-at HEAD)
128+
echo "TAG: $TAG"
82129
if [[ $TAG ]]; then
130+
echo "KALDI_BRANCH: kag-$TAG"
83131
echo "KALDI_BRANCH=kag-$TAG" >> $GITHUB_ENV
84132
echo "KALDI_BRANCH=kag-$TAG" >> $GITHUB_OUTPUT
85133
else
134+
echo "KALDI_BRANCH: ${GITHUB_REF/refs\/heads\//}"
86135
echo "KALDI_BRANCH=${GITHUB_REF/refs\/heads\//}" >> $GITHUB_ENV
87136
echo "KALDI_BRANCH=${GITHUB_REF/refs\/heads\//}" >> $GITHUB_OUTPUT
88137
fi
89138
139+
- name: Get Kaldi commit hash
140+
id: get-kaldi-commit
141+
run: |
142+
KALDI_COMMIT=$(git ls-remote https://github.com/daanzu/kaldi-fork-active-grammar.git $KALDI_BRANCH | cut -f1)
143+
echo "KALDI_COMMIT: $KALDI_COMMIT"
144+
echo "KALDI_COMMIT=$KALDI_COMMIT" >> $GITHUB_OUTPUT
145+
146+
- name: Restore cached native binaries
147+
id: cache-native-binaries-restore
148+
uses: actions/cache/restore@v4
149+
with:
150+
key: native-${{ runner.os }}-${{ steps.get-kaldi-commit.outputs.KALDI_COMMIT }}-${{ env.VS_VERSION }}-${{ env.PLATFORM_TOOLSET }}-${{ env.WINDOWS_TARGET_PLATFORM_VERSION }}-${{ env.MKL_VERSION }}-v1
151+
path: main/kaldi_active_grammar/exec/windows
152+
90153
- name: Checkout OpenFST repository
154+
if: steps.cache-native-binaries-restore.outputs.cache-hit != 'true'
91155
uses: actions/checkout@v5
92156
with:
93157
repository: daanzu/openfst
94158
path: openfst
95159

96160
- name: Checkout Kaldi repository
161+
if: steps.cache-native-binaries-restore.outputs.cache-hit != 'true'
97162
uses: actions/checkout@v5
98163
with:
99164
repository: daanzu/kaldi-fork-active-grammar
100165
path: kaldi
101-
ref: ${{ steps.get_kaldi_branch.outputs.KALDI_BRANCH }}
166+
ref: ${{ steps.get-kaldi-branch.outputs.KALDI_BRANCH }}
102167

103168
- name: Gather system information
169+
if: steps.cache-native-binaries-restore.outputs.cache-hit != 'true'
104170
run: |
105171
echo $GITHUB_WORKSPACE
106172
df -h
@@ -118,6 +184,7 @@ jobs:
118184
vswhere -find 'VC\Redist\**\VC_redist.x64.exe'
119185
120186
- name: Setup Kaldi build configuration
187+
if: steps.cache-native-binaries-restore.outputs.cache-hit != 'true'
121188
run: |
122189
cd kaldi/windows
123190
cp kaldiwin_mkl.props kaldiwin.props
@@ -136,28 +203,49 @@ jobs:
136203
perl get_version.pl
137204
138205
- name: Add msbuild to PATH
206+
if: steps.cache-native-binaries-restore.outputs.cache-hit != 'true'
139207
uses: microsoft/setup-msbuild@v2
140208

141209
- name: Install MKL
210+
if: steps.cache-native-binaries-restore.outputs.cache-hit != 'true'
142211
run: winget install --id=Intel.oneMKL -v "${MKL_VERSION}" -e --accept-package-agreements --accept-source-agreements --disable-interactivity
143212

144213
- name: Build OpenFST
214+
if: steps.cache-native-binaries-restore.outputs.cache-hit != 'true'
145215
shell: cmd
146216
run: msbuild -t:Build -p:Configuration=Release -p:Platform=x64 -p:PlatformToolset=%PLATFORM_TOOLSET% -maxCpuCount -verbosity:minimal openfst/openfst.sln
147217

148218
- name: Build Kaldi
219+
if: steps.cache-native-binaries-restore.outputs.cache-hit != 'true'
149220
shell: cmd
150221
run: msbuild -t:Build -p:Configuration=Release -p:Platform=x64 -p:PlatformToolset=%PLATFORM_TOOLSET% -p:WindowsTargetPlatformVersion=%WINDOWS_TARGET_PLATFORM_VERSION% -maxCpuCount -verbosity:minimal kaldi/kaldiwin_%VS_VERSION%_MKL/kaldiwin/kaldi-dragonfly/kaldi-dragonfly.vcxproj
151222

223+
- name: Copy native binaries
224+
if: steps.cache-native-binaries-restore.outputs.cache-hit != 'true'
225+
run: |
226+
mkdir -p main/kaldi_active_grammar/exec/windows
227+
cp kaldi/kaldiwin_${VS_VERSION}_MKL/kaldiwin/kaldi-dragonfly/x64/Release/kaldi-dragonfly.dll main/kaldi_active_grammar/exec/windows/
228+
229+
- name: Save cached native binaries
230+
if: steps.cache-native-binaries-restore.outputs.cache-hit != 'true'
231+
uses: actions/cache/save@v4
232+
with:
233+
key: ${{ steps.cache-native-binaries-restore.outputs.cache-primary-key }}
234+
path: main/kaldi_active_grammar/exec/windows
235+
236+
- name: Upload native binaries to artifacts
237+
uses: actions/upload-artifact@v4
238+
with:
239+
name: native-windows
240+
path: main/kaldi_active_grammar/exec/windows
241+
152242
- name: Build Python wheel
243+
working-directory: main
153244
run: |
154-
cd main
155245
python -m pip -V
156246
python -m pip install --upgrade setuptools wheel scikit-build cmake ninja
157247
# ls -alR ../
158-
mkdir -p kaldi_active_grammar/exec/windows
159-
cp ../kaldi/kaldiwin_${VS_VERSION}_MKL/kaldiwin/kaldi-dragonfly/x64/Release/kaldi-dragonfly.dll kaldi_active_grammar/exec/windows/
160-
env KALDIAG_SETUP_RAW=1 python setup.py bdist_wheel
248+
env KALDIAG_BUILD_SKIP_NATIVE=1 python setup.py bdist_wheel
161249
ls -al dist/
162250
163251
- name: Upload Windows wheels
@@ -188,27 +276,38 @@ jobs:
188276
uses: actions/checkout@v5
189277

190278
- name: Get KALDI_BRANCH (kag-$TAG tag if commit is tagged; current branch name if not)
279+
id: get-kaldi-branch
191280
run: |
192281
# Fetch tags on the one fetched commit (shallow clone)
193282
git fetch --depth=1 origin "+refs/tags/*:refs/tags/*"
194283
export TAG=$(git tag --points-at HEAD)
284+
echo "TAG: $TAG"
195285
if [[ $TAG ]]; then
286+
echo "KALDI_BRANCH: kag-$TAG"
196287
echo "KALDI_BRANCH=kag-$TAG" >> $GITHUB_ENV
197288
echo "KALDI_BRANCH=kag-$TAG" >> $GITHUB_OUTPUT
198289
else
290+
echo "KALDI_BRANCH: ${GITHUB_REF/refs\/heads\//}"
199291
echo "KALDI_BRANCH=${GITHUB_REF/refs\/heads\//}" >> $GITHUB_ENV
200292
echo "KALDI_BRANCH=${GITHUB_REF/refs\/heads\//}" >> $GITHUB_OUTPUT
201293
fi
202294
203-
- name: Install dependencies
295+
- name: Get Kaldi commit hash
296+
id: get-kaldi-commit
204297
run: |
205-
python3 -m pip install --break-system-packages --user --upgrade setuptools wheel scikit-build cmake ninja
206-
brew install automake sox libtool
207-
brew reinstall gfortran # For openblas
208-
# brew install autoconf
298+
KALDI_COMMIT=$(git ls-remote https://github.com/daanzu/kaldi-fork-active-grammar.git $KALDI_BRANCH | cut -f1)
299+
echo "KALDI_COMMIT: $KALDI_COMMIT"
300+
echo "KALDI_COMMIT=$KALDI_COMMIT" >> $GITHUB_OUTPUT
301+
302+
- name: Restore cached native binaries
303+
id: cache-native-binaries-restore
304+
uses: actions/cache/restore@v4
305+
with:
306+
key: native-${{ runner.os }}-arm-${{ steps.get-kaldi-commit.outputs.KALDI_COMMIT }}-${{ env.MACOSX_DEPLOYMENT_TARGET }}-${{ env.MKL_URL }}-v1
307+
path: kaldi_active_grammar/exec/macos
209308

210309
- name: Install MKL (if enabled)
211-
if: ${{ env.MKL_URL != '' }}
310+
if: ${{ env.MKL_URL != '' && steps.cache-native-binaries-restore.outputs.cache-hit != 'true' }}
212311
run: |
213312
echo "Installing MKL from: $MKL_URL"
214313
export MKL_FILE=${MKL_URL##*/}
@@ -219,12 +318,38 @@ jobs:
219318
sed -i.bak -e 's/decline/accept/g' silent.cfg
220319
sudo /Volumes/${MKL_FILE}/${MKL_FILE}.app/Contents/MacOS/install.sh --silent silent.cfg
221320
321+
- name: Install dependencies for native build
322+
if: steps.cache-native-binaries-restore.outputs.cache-hit != 'true'
323+
run: |
324+
python3 -m pip install --break-system-packages --user --upgrade scikit-build cmake ninja
325+
brew install automake sox libtool
326+
brew reinstall gfortran # For openblas
327+
# brew install autoconf
328+
329+
- name: Install dependencies for python build
330+
run: |
331+
python3 -m pip install --break-system-packages --user --upgrade setuptools wheel
332+
222333
- name: Build Python wheel
223334
run: |
224335
echo "KALDI_BRANCH: $KALDI_BRANCH"
225-
python3 setup.py bdist_wheel
336+
echo "MKL_URL: $MKL_URL"
337+
${{ steps.cache-native-binaries-restore.outputs.cache-hit == 'true' && 'KALDIAG_BUILD_SKIP_NATIVE=1' || '' }} python3 setup.py bdist_wheel
226338
ls -al dist/
227339
340+
- name: Save cached native binaries
341+
if: steps.cache-native-binaries-restore.outputs.cache-hit != 'true'
342+
uses: actions/cache/save@v4
343+
with:
344+
key: ${{ steps.cache-native-binaries-restore.outputs.cache-primary-key }}
345+
path: kaldi_active_grammar/exec/macos
346+
347+
- name: Upload native binaries to artifacts
348+
uses: actions/upload-artifact@v4
349+
with:
350+
name: native-macos-arm
351+
path: kaldi_active_grammar/exec/macos
352+
228353
- name: Upload MacOS ARM wheels
229354
uses: actions/upload-artifact@v4
230355
with:
@@ -243,27 +368,38 @@ jobs:
243368
uses: actions/checkout@v5
244369

245370
- name: Get KALDI_BRANCH (kag-$TAG tag if commit is tagged; current branch name if not)
371+
id: get-kaldi-branch
246372
run: |
247373
# Fetch tags on the one fetched commit (shallow clone)
248374
git fetch --depth=1 origin "+refs/tags/*:refs/tags/*"
249375
export TAG=$(git tag --points-at HEAD)
376+
echo "TAG: $TAG"
250377
if [[ $TAG ]]; then
378+
echo "KALDI_BRANCH: kag-$TAG"
251379
echo "KALDI_BRANCH=kag-$TAG" >> $GITHUB_ENV
252380
echo "KALDI_BRANCH=kag-$TAG" >> $GITHUB_OUTPUT
253381
else
382+
echo "KALDI_BRANCH: ${GITHUB_REF/refs\/heads\//}"
254383
echo "KALDI_BRANCH=${GITHUB_REF/refs\/heads\//}" >> $GITHUB_ENV
255384
echo "KALDI_BRANCH=${GITHUB_REF/refs\/heads\//}" >> $GITHUB_OUTPUT
256385
fi
257386
258-
- name: Install dependencies
387+
- name: Get Kaldi commit hash
388+
id: get-kaldi-commit
259389
run: |
260-
python3 -m pip install --break-system-packages --user --upgrade setuptools wheel scikit-build cmake ninja
261-
brew install automake sox
262-
brew reinstall gfortran # For openblas
263-
# brew install autoconf libtool
390+
KALDI_COMMIT=$(git ls-remote https://github.com/daanzu/kaldi-fork-active-grammar.git $KALDI_BRANCH | cut -f1)
391+
echo "KALDI_COMMIT: $KALDI_COMMIT"
392+
echo "KALDI_COMMIT=$KALDI_COMMIT" >> $GITHUB_OUTPUT
393+
394+
- name: Restore cached native binaries
395+
id: cache-native-binaries-restore
396+
uses: actions/cache/restore@v4
397+
with:
398+
key: native-${{ runner.os }}-intel-${{ steps.get-kaldi-commit.outputs.KALDI_COMMIT }}-${{ env.MACOSX_DEPLOYMENT_TARGET }}-${{ env.MKL_URL }}-v1
399+
path: kaldi_active_grammar/exec/macos
264400

265401
- name: Install MKL (if enabled)
266-
if: ${{ env.MKL_URL != '' }}
402+
if: ${{ env.MKL_URL != '' && steps.cache-native-binaries-restore.outputs.cache-hit != 'true' }}
267403
run: |
268404
echo "Installing MKL from: $MKL_URL"
269405
export MKL_FILE=${MKL_URL##*/}
@@ -274,16 +410,43 @@ jobs:
274410
sed -i.bak -e 's/decline/accept/g' silent.cfg
275411
sudo /Volumes/${MKL_FILE}/${MKL_FILE}.app/Contents/MacOS/install.sh --silent silent.cfg
276412
413+
- name: Install dependencies for native build
414+
if: steps.cache-native-binaries-restore.outputs.cache-hit != 'true'
415+
run: |
416+
python3 -m pip install --break-system-packages --user --upgrade scikit-build cmake ninja
417+
brew install automake sox
418+
brew reinstall gfortran # For openblas
419+
# brew install autoconf libtool
420+
421+
- name: Install dependencies for python build
422+
run: |
423+
python3 -m pip install --break-system-packages --user --upgrade setuptools wheel
424+
277425
- name: Build Python wheel
278426
run: |
279-
python3 setup.py bdist_wheel
427+
echo "KALDI_BRANCH: $KALDI_BRANCH"
428+
echo "MKL_URL: $MKL_URL"
429+
${{ steps.cache-native-binaries-restore.outputs.cache-hit == 'true' && 'KALDIAG_BUILD_SKIP_NATIVE=1' || '' }} python3 setup.py bdist_wheel
280430
ls -al dist/
281431
432+
- name: Save cached native binaries
433+
if: steps.cache-native-binaries-restore.outputs.cache-hit != 'true'
434+
uses: actions/cache/save@v4
435+
with:
436+
key: ${{ steps.cache-native-binaries-restore.outputs.cache-primary-key }}
437+
path: kaldi_active_grammar/exec/macos
438+
439+
- name: Upload native binaries to artifacts
440+
uses: actions/upload-artifact@v4
441+
with:
442+
name: native-macos-intel
443+
path: kaldi_active_grammar/exec/macos
444+
282445
- name: Upload MacOS Intel wheels
283446
uses: actions/upload-artifact@v4
284447
with:
285448
name: wheels-macos-intel
286-
path: dist/*
449+
path: dist/*.whl
287450

288451
merge-wheels:
289452
runs-on: ubuntu-latest

Justfile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,15 @@ build-linux python='python3':
1515
# MKL with INTEL_MKL_DIR=/opt/intel/mkl/
1616
{{python}} setup.py bdist_wheel
1717

18-
build-dockcross kaldi_branch mkl_url="":
19-
building/dockcross-manylinux2010-x64 bash building/build-wheel-dockcross.sh manylinux2010_x86_64 {{kaldi_branch}} {{mkl_url}}
18+
build-dockcross *args='':
19+
building/dockcross-manylinux2010-x64 bash building/build-wheel-dockcross.sh manylinux2010_x86_64 {{args}}
2020

2121
setup-dockcross:
2222
docker run --rm dockcross/manylinux2010-x64:20210127-72b83fc > building/dockcross-manylinux2010-x64 && chmod +x building/dockcross-manylinux2010-x64
2323
@# [ ! -e building/dockcross-manylinux2010-x64 ] && docker run --rm dockcross/manylinux2010-x64 > building/dockcross-manylinux2010-x64 && chmod +x building/dockcross-manylinux2010-x64 || true
2424

2525
pip-install-develop:
26-
KALDIAG_SETUP_RAW=1 pip3 install --user -e .
26+
KALDIAG_BUILD_SKIP_NATIVE=1 pip3 install --user -e .
2727

2828
# Setup an editable development environment on linux
2929
setup-linux-develop kaldi_root_dir:

0 commit comments

Comments
 (0)