Skip to content

Commit 521fa07

Browse files
committed
Implement CI caching of expensive native build
1 parent 734df98 commit 521fa07

File tree

5 files changed

+245
-40
lines changed

5 files changed

+245
-40
lines changed

.github/workflows/build.yml

Lines changed: 196 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@ jobs:
1515
build-linux:
1616
runs-on: ubuntu-latest
1717
if: true
18+
env:
19+
MKL_URL: ""
20+
# MKL_URL: "https://registrationcenter-download.intel.com/akdlm/irc_nas/tec/16917/l_mkl_2020.4.304.tgz"
1821
steps:
1922
- name: Checkout repository
2023
uses: actions/checkout@v5
@@ -26,29 +29,79 @@ jobs:
2629
export TAG=$(git tag --points-at HEAD)
2730
echo "TAG: $TAG"
2831
if [[ $TAG ]]; then
32+
echo "KALDI_BRANCH: kag-$TAG"
2933
echo "KALDI_BRANCH=kag-$TAG" >> $GITHUB_ENV
3034
echo "KALDI_BRANCH=kag-$TAG" >> $GITHUB_OUTPUT
3135
else
36+
echo "KALDI_BRANCH: ${GITHUB_REF/refs\/heads\//}"
3237
echo "KALDI_BRANCH=${GITHUB_REF/refs\/heads\//}" >> $GITHUB_ENV
3338
echo "KALDI_BRANCH=${GITHUB_REF/refs\/heads\//}" >> $GITHUB_OUTPUT
3439
fi
3540
41+
- name: Get Kaldi commit hash
42+
id: get-kaldi-commit
43+
run: |
44+
KALDI_COMMIT=$(git ls-remote https://github.com/daanzu/kaldi-fork-active-grammar.git $KALDI_BRANCH | cut -f1)
45+
echo "KALDI_COMMIT: $KALDI_COMMIT"
46+
echo "KALDI_COMMIT=$KALDI_COMMIT" >> $GITHUB_OUTPUT
47+
48+
- name: Restore cached native binaries
49+
id: cache-native-binaries-restore
50+
uses: actions/cache/restore@v4
51+
with:
52+
key: native-${{ runner.os }}-${{ steps.get-kaldi-commit.outputs.KALDI_COMMIT }}-${{ env.MKL_URL }}-v1
53+
path: |
54+
kaldi_active_grammar/exec/linux
55+
kaldi_active_grammar.libs
56+
3657
- name: Setup just
3758
run: |
3859
ls -al $HOME
3960
mkdir $HOME/bin
4061
curl --proto '=https' --tlsv1.2 -sSf https://just.systems/install.sh | bash -s -- --to $HOME/bin/
4162
echo "$HOME/bin" >> $GITHUB_PATH
4263
43-
- name: Build with dockcross
64+
- name: Build with dockcross (native binaries & python wheel)
4465
run: |
4566
echo "KALDI_BRANCH: $KALDI_BRANCH"
4667
echo "MKL_URL: $MKL_URL"
47-
# Example MKL_URL: https://registrationcenter-download.intel.com/akdlm/irc_nas/tec/16917/l_mkl_2020.4.304.tgz
48-
just build-dockcross $KALDI_BRANCH $MKL_URL
49-
# cp dist/* wheelhouse/
68+
just build-dockcross ${{ steps.cache-native-binaries-restore.outputs.cache-hit == 'true' && '--skip-native' || '' }} $KALDI_BRANCH $MKL_URL
5069
ls -al wheelhouse/
5170
71+
- name: Extract native binaries from wheel after auditwheel repair, to save to cache
72+
if: steps.cache-native-binaries-restore.outputs.cache-hit != 'true'
73+
run: |
74+
# Assert there is only one wheel
75+
WHEEL_COUNT=$(ls wheelhouse/*.whl | wc -l)
76+
if [ "$WHEEL_COUNT" -ne 1 ]; then
77+
echo "Error: Expected exactly 1 wheel, found $WHEEL_COUNT"
78+
ls -al wheelhouse/
79+
exit 1
80+
fi
81+
WHEEL_FILE=$(ls wheelhouse/*.whl)
82+
echo "Extracting from wheel: $WHEEL_FILE"
83+
unzip -j -o $WHEEL_FILE 'kaldi_active_grammar/exec/linux/*' -d kaldi_active_grammar/exec/linux/
84+
unzip -o $WHEEL_FILE 'kaldi_active_grammar.libs/*'
85+
ls -al kaldi_active_grammar/exec/linux/
86+
readelf -d kaldi_active_grammar/exec/linux/libkaldi-dragonfly.so | egrep 'NEEDED|RUNPATH|RPATH'
87+
88+
- name: Save cached native binaries
89+
if: steps.cache-native-binaries-restore.outputs.cache-hit != 'true'
90+
uses: actions/cache/save@v4
91+
with:
92+
key: ${{ steps.cache-native-binaries-restore.outputs.cache-primary-key }}
93+
path: |
94+
kaldi_active_grammar/exec/linux
95+
kaldi_active_grammar.libs
96+
97+
- name: Upload native binaries to artifacts
98+
uses: actions/upload-artifact@v4
99+
with:
100+
name: native-linux
101+
path: |
102+
kaldi_active_grammar/exec/linux
103+
kaldi_active_grammar.libs
104+
52105
- name: Upload Linux wheels
53106
uses: actions/upload-artifact@v4
54107
with:
@@ -74,33 +127,54 @@ jobs:
74127
path: main
75128

76129
- name: Get KALDI_BRANCH (kag-$TAG tag if commit is tagged; current branch name if not)
77-
id: get_kaldi_branch
130+
id: get-kaldi-branch
131+
working-directory: main
78132
run: |
79133
# Fetch tags on the one fetched commit (shallow clone)
80134
git fetch --depth=1 origin "+refs/tags/*:refs/tags/*"
81135
export TAG=$(git tag --points-at HEAD)
136+
echo "TAG: $TAG"
82137
if [[ $TAG ]]; then
138+
echo "KALDI_BRANCH: kag-$TAG"
83139
echo "KALDI_BRANCH=kag-$TAG" >> $GITHUB_ENV
84140
echo "KALDI_BRANCH=kag-$TAG" >> $GITHUB_OUTPUT
85141
else
142+
echo "KALDI_BRANCH: ${GITHUB_REF/refs\/heads\//}"
86143
echo "KALDI_BRANCH=${GITHUB_REF/refs\/heads\//}" >> $GITHUB_ENV
87144
echo "KALDI_BRANCH=${GITHUB_REF/refs\/heads\//}" >> $GITHUB_OUTPUT
88145
fi
89146
147+
- name: Get Kaldi commit hash
148+
id: get-kaldi-commit
149+
run: |
150+
KALDI_COMMIT=$(git ls-remote https://github.com/daanzu/kaldi-fork-active-grammar.git $KALDI_BRANCH | cut -f1)
151+
echo "KALDI_COMMIT: $KALDI_COMMIT"
152+
echo "KALDI_COMMIT=$KALDI_COMMIT" >> $GITHUB_OUTPUT
153+
154+
- name: Restore cached native binaries
155+
id: cache-native-binaries-restore
156+
uses: actions/cache/restore@v4
157+
with:
158+
key: native-${{ runner.os }}-${{ steps.get-kaldi-commit.outputs.KALDI_COMMIT }}-${{ env.VS_VERSION }}-${{ env.PLATFORM_TOOLSET }}-${{ env.WINDOWS_TARGET_PLATFORM_VERSION }}-${{ env.MKL_VERSION }}-v1
159+
path: main/kaldi_active_grammar/exec/windows
160+
90161
- name: Checkout OpenFST repository
162+
if: steps.cache-native-binaries-restore.outputs.cache-hit != 'true'
91163
uses: actions/checkout@v5
92164
with:
93165
repository: daanzu/openfst
94166
path: openfst
95167

96168
- name: Checkout Kaldi repository
169+
if: steps.cache-native-binaries-restore.outputs.cache-hit != 'true'
97170
uses: actions/checkout@v5
98171
with:
99172
repository: daanzu/kaldi-fork-active-grammar
100173
path: kaldi
101-
ref: ${{ steps.get_kaldi_branch.outputs.KALDI_BRANCH }}
174+
ref: ${{ steps.get-kaldi-branch.outputs.KALDI_BRANCH }}
102175

103176
- name: Gather system information
177+
if: steps.cache-native-binaries-restore.outputs.cache-hit != 'true'
104178
run: |
105179
echo $GITHUB_WORKSPACE
106180
df -h
@@ -118,6 +192,7 @@ jobs:
118192
vswhere -find 'VC\Redist\**\VC_redist.x64.exe'
119193
120194
- name: Setup Kaldi build configuration
195+
if: steps.cache-native-binaries-restore.outputs.cache-hit != 'true'
121196
run: |
122197
cd kaldi/windows
123198
cp kaldiwin_mkl.props kaldiwin.props
@@ -136,28 +211,49 @@ jobs:
136211
perl get_version.pl
137212
138213
- name: Add msbuild to PATH
214+
if: steps.cache-native-binaries-restore.outputs.cache-hit != 'true'
139215
uses: microsoft/setup-msbuild@v2
140216

141217
- name: Install MKL
218+
if: steps.cache-native-binaries-restore.outputs.cache-hit != 'true'
142219
run: winget install --id=Intel.oneMKL -v "${MKL_VERSION}" -e --accept-package-agreements --accept-source-agreements --disable-interactivity
143220

144221
- name: Build OpenFST
222+
if: steps.cache-native-binaries-restore.outputs.cache-hit != 'true'
145223
shell: cmd
146224
run: msbuild -t:Build -p:Configuration=Release -p:Platform=x64 -p:PlatformToolset=%PLATFORM_TOOLSET% -maxCpuCount -verbosity:minimal openfst/openfst.sln
147225

148226
- name: Build Kaldi
227+
if: steps.cache-native-binaries-restore.outputs.cache-hit != 'true'
149228
shell: cmd
150229
run: msbuild -t:Build -p:Configuration=Release -p:Platform=x64 -p:PlatformToolset=%PLATFORM_TOOLSET% -p:WindowsTargetPlatformVersion=%WINDOWS_TARGET_PLATFORM_VERSION% -maxCpuCount -verbosity:minimal kaldi/kaldiwin_%VS_VERSION%_MKL/kaldiwin/kaldi-dragonfly/kaldi-dragonfly.vcxproj
151230

231+
- name: Copy native binaries
232+
if: steps.cache-native-binaries-restore.outputs.cache-hit != 'true'
233+
run: |
234+
mkdir -p main/kaldi_active_grammar/exec/windows
235+
cp kaldi/kaldiwin_${VS_VERSION}_MKL/kaldiwin/kaldi-dragonfly/x64/Release/kaldi-dragonfly.dll main/kaldi_active_grammar/exec/windows/
236+
237+
- name: Save cached native binaries
238+
if: steps.cache-native-binaries-restore.outputs.cache-hit != 'true'
239+
uses: actions/cache/save@v4
240+
with:
241+
key: ${{ steps.cache-native-binaries-restore.outputs.cache-primary-key }}
242+
path: main/kaldi_active_grammar/exec/windows
243+
244+
- name: Upload native binaries to artifacts
245+
uses: actions/upload-artifact@v4
246+
with:
247+
name: native-windows
248+
path: main/kaldi_active_grammar/exec/windows
249+
152250
- name: Build Python wheel
251+
working-directory: main
153252
run: |
154-
cd main
155253
python -m pip -V
156254
python -m pip install --upgrade setuptools wheel scikit-build cmake ninja
157255
# ls -alR ../
158-
mkdir -p kaldi_active_grammar/exec/windows
159-
cp ../kaldi/kaldiwin_${VS_VERSION}_MKL/kaldiwin/kaldi-dragonfly/x64/Release/kaldi-dragonfly.dll kaldi_active_grammar/exec/windows/
160-
env KALDIAG_SETUP_RAW=1 python setup.py bdist_wheel
256+
env KALDIAG_BUILD_SKIP_NATIVE=1 python setup.py bdist_wheel
161257
ls -al dist/
162258
163259
- name: Upload Windows wheels
@@ -188,27 +284,38 @@ jobs:
188284
uses: actions/checkout@v5
189285

190286
- name: Get KALDI_BRANCH (kag-$TAG tag if commit is tagged; current branch name if not)
287+
id: get-kaldi-branch
191288
run: |
192289
# Fetch tags on the one fetched commit (shallow clone)
193290
git fetch --depth=1 origin "+refs/tags/*:refs/tags/*"
194291
export TAG=$(git tag --points-at HEAD)
292+
echo "TAG: $TAG"
195293
if [[ $TAG ]]; then
294+
echo "KALDI_BRANCH: kag-$TAG"
196295
echo "KALDI_BRANCH=kag-$TAG" >> $GITHUB_ENV
197296
echo "KALDI_BRANCH=kag-$TAG" >> $GITHUB_OUTPUT
198297
else
298+
echo "KALDI_BRANCH: ${GITHUB_REF/refs\/heads\//}"
199299
echo "KALDI_BRANCH=${GITHUB_REF/refs\/heads\//}" >> $GITHUB_ENV
200300
echo "KALDI_BRANCH=${GITHUB_REF/refs\/heads\//}" >> $GITHUB_OUTPUT
201301
fi
202302
203-
- name: Install dependencies
303+
- name: Get Kaldi commit hash
304+
id: get-kaldi-commit
204305
run: |
205-
python3 -m pip install --break-system-packages --user --upgrade setuptools wheel scikit-build cmake ninja
206-
brew install automake sox libtool
207-
brew reinstall gfortran # For openblas
208-
# brew install autoconf
306+
KALDI_COMMIT=$(git ls-remote https://github.com/daanzu/kaldi-fork-active-grammar.git $KALDI_BRANCH | cut -f1)
307+
echo "KALDI_COMMIT: $KALDI_COMMIT"
308+
echo "KALDI_COMMIT=$KALDI_COMMIT" >> $GITHUB_OUTPUT
309+
310+
- name: Restore cached native binaries
311+
id: cache-native-binaries-restore
312+
uses: actions/cache/restore@v4
313+
with:
314+
key: native-${{ runner.os }}-arm-${{ steps.get-kaldi-commit.outputs.KALDI_COMMIT }}-${{ env.MACOSX_DEPLOYMENT_TARGET }}-${{ env.MKL_URL }}-v1
315+
path: kaldi_active_grammar/exec/macos
209316

210317
- name: Install MKL (if enabled)
211-
if: ${{ env.MKL_URL != '' }}
318+
if: ${{ env.MKL_URL != '' && steps.cache-native-binaries-restore.outputs.cache-hit != 'true' }}
212319
run: |
213320
echo "Installing MKL from: $MKL_URL"
214321
export MKL_FILE=${MKL_URL##*/}
@@ -219,12 +326,38 @@ jobs:
219326
sed -i.bak -e 's/decline/accept/g' silent.cfg
220327
sudo /Volumes/${MKL_FILE}/${MKL_FILE}.app/Contents/MacOS/install.sh --silent silent.cfg
221328
329+
- name: Install dependencies for native build
330+
if: steps.cache-native-binaries-restore.outputs.cache-hit != 'true'
331+
run: |
332+
python3 -m pip install --break-system-packages --user --upgrade scikit-build cmake ninja
333+
brew install automake sox libtool
334+
brew reinstall gfortran # For openblas
335+
# brew install autoconf
336+
337+
- name: Install dependencies for python build
338+
run: |
339+
python3 -m pip install --break-system-packages --user --upgrade setuptools wheel
340+
222341
- name: Build Python wheel
223342
run: |
224343
echo "KALDI_BRANCH: $KALDI_BRANCH"
225-
python3 setup.py bdist_wheel
344+
echo "MKL_URL: $MKL_URL"
345+
${{ steps.cache-native-binaries-restore.outputs.cache-hit == 'true' && 'KALDIAG_BUILD_SKIP_NATIVE=1' || '' }} python3 setup.py bdist_wheel
226346
ls -al dist/
227347
348+
- name: Save cached native binaries
349+
if: steps.cache-native-binaries-restore.outputs.cache-hit != 'true'
350+
uses: actions/cache/save@v4
351+
with:
352+
key: ${{ steps.cache-native-binaries-restore.outputs.cache-primary-key }}
353+
path: kaldi_active_grammar/exec/macos
354+
355+
- name: Upload native binaries to artifacts
356+
uses: actions/upload-artifact@v4
357+
with:
358+
name: native-macos-arm
359+
path: kaldi_active_grammar/exec/macos
360+
228361
- name: Upload MacOS ARM wheels
229362
uses: actions/upload-artifact@v4
230363
with:
@@ -243,27 +376,38 @@ jobs:
243376
uses: actions/checkout@v5
244377

245378
- name: Get KALDI_BRANCH (kag-$TAG tag if commit is tagged; current branch name if not)
379+
id: get-kaldi-branch
246380
run: |
247381
# Fetch tags on the one fetched commit (shallow clone)
248382
git fetch --depth=1 origin "+refs/tags/*:refs/tags/*"
249383
export TAG=$(git tag --points-at HEAD)
384+
echo "TAG: $TAG"
250385
if [[ $TAG ]]; then
386+
echo "KALDI_BRANCH: kag-$TAG"
251387
echo "KALDI_BRANCH=kag-$TAG" >> $GITHUB_ENV
252388
echo "KALDI_BRANCH=kag-$TAG" >> $GITHUB_OUTPUT
253389
else
390+
echo "KALDI_BRANCH: ${GITHUB_REF/refs\/heads\//}"
254391
echo "KALDI_BRANCH=${GITHUB_REF/refs\/heads\//}" >> $GITHUB_ENV
255392
echo "KALDI_BRANCH=${GITHUB_REF/refs\/heads\//}" >> $GITHUB_OUTPUT
256393
fi
257394
258-
- name: Install dependencies
395+
- name: Get Kaldi commit hash
396+
id: get-kaldi-commit
259397
run: |
260-
python3 -m pip install --break-system-packages --user --upgrade setuptools wheel scikit-build cmake ninja
261-
brew install automake sox
262-
brew reinstall gfortran # For openblas
263-
# brew install autoconf libtool
398+
KALDI_COMMIT=$(git ls-remote https://github.com/daanzu/kaldi-fork-active-grammar.git $KALDI_BRANCH | cut -f1)
399+
echo "KALDI_COMMIT: $KALDI_COMMIT"
400+
echo "KALDI_COMMIT=$KALDI_COMMIT" >> $GITHUB_OUTPUT
401+
402+
- name: Restore cached native binaries
403+
id: cache-native-binaries-restore
404+
uses: actions/cache/restore@v4
405+
with:
406+
key: native-${{ runner.os }}-intel-${{ steps.get-kaldi-commit.outputs.KALDI_COMMIT }}-${{ env.MACOSX_DEPLOYMENT_TARGET }}-${{ env.MKL_URL }}-v1
407+
path: kaldi_active_grammar/exec/macos
264408

265409
- name: Install MKL (if enabled)
266-
if: ${{ env.MKL_URL != '' }}
410+
if: ${{ env.MKL_URL != '' && steps.cache-native-binaries-restore.outputs.cache-hit != 'true' }}
267411
run: |
268412
echo "Installing MKL from: $MKL_URL"
269413
export MKL_FILE=${MKL_URL##*/}
@@ -274,16 +418,43 @@ jobs:
274418
sed -i.bak -e 's/decline/accept/g' silent.cfg
275419
sudo /Volumes/${MKL_FILE}/${MKL_FILE}.app/Contents/MacOS/install.sh --silent silent.cfg
276420
421+
- name: Install dependencies for native build
422+
if: steps.cache-native-binaries-restore.outputs.cache-hit != 'true'
423+
run: |
424+
python3 -m pip install --break-system-packages --user --upgrade scikit-build cmake ninja
425+
brew install automake sox
426+
brew reinstall gfortran # For openblas
427+
# brew install autoconf libtool
428+
429+
- name: Install dependencies for python build
430+
run: |
431+
python3 -m pip install --break-system-packages --user --upgrade setuptools wheel
432+
277433
- name: Build Python wheel
278434
run: |
279-
python3 setup.py bdist_wheel
435+
echo "KALDI_BRANCH: $KALDI_BRANCH"
436+
echo "MKL_URL: $MKL_URL"
437+
${{ steps.cache-native-binaries-restore.outputs.cache-hit == 'true' && 'KALDIAG_BUILD_SKIP_NATIVE=1' || '' }} python3 setup.py bdist_wheel
280438
ls -al dist/
281439
440+
- name: Save cached native binaries
441+
if: steps.cache-native-binaries-restore.outputs.cache-hit != 'true'
442+
uses: actions/cache/save@v4
443+
with:
444+
key: ${{ steps.cache-native-binaries-restore.outputs.cache-primary-key }}
445+
path: kaldi_active_grammar/exec/macos
446+
447+
- name: Upload native binaries to artifacts
448+
uses: actions/upload-artifact@v4
449+
with:
450+
name: native-macos-intel
451+
path: kaldi_active_grammar/exec/macos
452+
282453
- name: Upload MacOS Intel wheels
283454
uses: actions/upload-artifact@v4
284455
with:
285456
name: wheels-macos-intel
286-
path: dist/*
457+
path: dist/*.whl
287458

288459
merge-wheels:
289460
runs-on: ubuntu-latest

CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,8 @@ endif()
130130
# binaries. After package installation is done, these directories are
131131
# deleted and the dynamic libraries cannot be loaded. The following
132132
# commands generate a shell script that fixes the paths to the dynamic
133-
# libraries in the built executables and the libraries themselves.
133+
# libraries in the built executables and the libraries themselves, so
134+
# that they can be loaded when placed in the same directory.
134135
# Also the commands add a custom target to invoke the generated script
135136
# after the external project (kaldi) has been built. An alternative
136137
# would be to change the kaldi engine build system to accept a path to

0 commit comments

Comments
 (0)