Skip to content

Commit 3103434

Browse files
authored
Change desktop packaging script to run merge_libraries in parallel. (#218)
By far the slowest part of C++ desktop packaging is running merge_libraries, especially on Windows. This PR allows the merge_libraries step -- which uses Linux runners to merge Windows libraries - to run in parallel as long as the "parallel" command is installed on the machine, if you pass in the -j flag. To enable this change, a bit of code is added to merge_libraries.py to lock and unlock the cache file (which is used across merge_libraries instances) using flock(). It requests a shared lock while reading, and an exclusive lock while writing. The flock() call will block until the process can obtain the lock. This change saves nearly an hour in packaging time for the longest packaging platform, Windows Debug X64 Static (and Dynamic).
1 parent d3a8bcc commit 3103434

File tree

3 files changed

+101
-28
lines changed

3 files changed

+101
-28
lines changed

.github/workflows/cpp-packaging.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -433,7 +433,7 @@ jobs:
433433
for pkg in artifacts/firebase-cpp-sdk-${{ matrix.sdk_platform }}${{ matrix.suffix }}*-build/*.tgz; do
434434
# determine the build variant based on the artifact filename
435435
variant=$(sdk-src/build_scripts/desktop/get_variant.sh "${pkg}")
436-
sdk-src/build_scripts/desktop/package.sh -b ${pkg} -o firebase-cpp-sdk-${{ matrix.sdk_platform }}${{ matrix.suffix }}-package -p ${{ matrix.sdk_platform }} -t bin -d ${variant} -P python3
436+
sdk-src/build_scripts/desktop/package.sh -b ${pkg} -o firebase-cpp-sdk-${{ matrix.sdk_platform }}${{ matrix.suffix }}-package -p ${{ matrix.sdk_platform }} -t bin -d ${variant} -P python3 -j
437437
done
438438
if [[ "${{ matrix.sdk_platform }}" == "darwin" ]]; then
439439
# Darwin has a final step after all the variants are done,

build_scripts/desktop/package.sh

Lines changed: 95 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,17 @@ set -e
66
usage(){
77
echo "Usage: $0 -b <built sdk path> -o <output package path> -p <platform> [options]
88
options:
9-
-b, built sdk path or tar file required
10-
-o, output path required
11-
-p, platform to package required, one of: linux windows darwin
12-
-d, build variant directory to create default: .
13-
-m, merge_libraries.py path default: <script dir>/../../scripts/merge_libraries.py
14-
-P, python command default: python
15-
-t, packaging tools directory default: ~/bin
9+
-b, built sdk path or tar file required
10+
-o, output path required
11+
-p, platform to package required, one of: linux windows darwin
12+
-d, build variant directory to create default: .
13+
-m, merge_libraries.py path default: <script dir>/../../scripts/merge_libraries.py
14+
-P, python command default: python
15+
-t, packaging tools directory default: ~/bin
16+
-j, run merge_libraries jobs in parallel
1617
-v, enable verbose mode
1718
example:
18-
build_scripts/desktop/package.sh -b firebase-cpp-sdk-linux -p linux -o package_out -v x86"
19+
build_scripts/desktop/package.sh -b firebase-cpp-sdk-linux -p linux -o package_out -v x86 -j"
1920
}
2021

2122
built_sdk_path=
@@ -29,6 +30,7 @@ merge_libraries_script=${root_dir}/scripts/merge_libraries.py
2930
tools_path=~/bin
3031
built_sdk_tarfile=
3132
temp_dir=
33+
run_in_parallel=0
3234

3335
. "${root_dir}/build_scripts/packaging.conf"
3436

@@ -42,11 +44,14 @@ abspath(){
4244
fi
4345
}
4446

45-
while getopts ":b:o:p:d:m:P:t:hv" opt; do
47+
while getopts "b:o:p:d:m:P:t:hjv" opt; do
4648
case $opt in
4749
b)
4850
built_sdk_path=$OPTARG
4951
;;
52+
j)
53+
run_in_parallel=1
54+
;;
5055
o)
5156
output_package_path=$OPTARG
5257
;;
@@ -88,6 +93,23 @@ while getopts ":b:o:p:d:m:P:t:hv" opt; do
8893
esac
8994
done
9095

96+
readonly parallel_command=parallel
97+
# GNU and non-GNU versions of 'parallel' command take different arguments, so we check which is installed.
98+
use_gnu_parallel=0
99+
100+
if [[ ${run_in_parallel} -ne 0 ]]; then
101+
if [[ ! $(which "${parallel_command}") ]]; then
102+
echo "Warning: Ignoring -j option since '${parallel_command}' command cannot be found."
103+
run_in_parallel=0
104+
else
105+
set +e
106+
if ("${parallel_command}" --version 2>&1 | grep -q GNU); then
107+
use_gnu_parallel=1
108+
fi
109+
set -e
110+
fi
111+
fi
112+
91113
if [[ -z "${built_sdk_path}" ]]; then
92114
echo "Missing required option: -b <built sdk path>"
93115
exit 2
@@ -214,14 +236,15 @@ trap "rm -f \"\${cache_file}\"" SIGKILL SIGTERM SIGQUIT EXIT
214236

215237
declare -a merge_libraries_params
216238
merge_libraries_params=(
217-
--cache=${cache_file}
218239
--binutils_nm_cmd=${binutils_nm}
219240
--binutils_ar_cmd=${binutils_ar}
220241
--binutils_objcopy_cmd=${binutils_objcopy}
221242
--demangle_cmds=${demangle_cmds}
222243
--platform=${platform}
223244
--hide_cpp_namespaces=$(echo "${rename_namespaces[*]}" | sed 's| |,|g')
224245
)
246+
cache_param=--cache=${cache_file}
247+
225248
if [[ ${platform} == "windows" ]]; then
226249
# Windows has a hard time with strict C++ demangling.
227250
merge_libraries_params+=(--nostrict_cpp)
@@ -251,6 +274,13 @@ for lib in $(find . -name "*.${ext}"); do
251274
allfiles+="${lib}"
252275
done
253276

277+
merge_libraries_tmp=$(mktemp -d)
278+
trap "rm -rf \"\${merge_libraries_tmp}\"" SIGKILL SIGTERM SIGQUIT EXIT
279+
280+
if [[ ${run_in_parallel} -ne 0 ]]; then
281+
echo "Queueing jobs..."
282+
fi
283+
254284
# Make sure we only copy the libraries in product_list (specified in packaging.conf)
255285
for product in ${product_list[*]}; do
256286
libfile_src="${product}/${subdir}${prefix}firebase_${product}.${ext}"
@@ -293,30 +323,68 @@ for product in ${product_list[*]}; do
293323
done
294324
done
295325
fi
296-
echo -n "${libfile_out}"
297-
if [[ ! -z ${deps_basenames[*]} ]]; then
298-
echo -n " <- ${deps[*]}"
299-
fi
300-
echo
301326
outfile="${full_output_path}/${libfile_out}"
302327
rm -f "${outfile}"
303328
if [[ ${verbose} -eq 1 ]]; then
304-
echo "${python_cmd}" "${merge_libraries_script}" \
305-
${merge_libraries_params[*]} \
306-
--output="${outfile}" \
307-
--scan_libs="${allfiles}" \
308-
--hide_c_symbols="${deps_hidden}" \
309-
${libfile_src} ${deps[*]}
329+
echo "${python_cmd}" "${merge_libraries_script}" \
330+
${merge_libraries_params[*]} \
331+
${cache_param} \
332+
--output="${outfile}" \
333+
--scan_libs="${allfiles}" \
334+
--hide_c_symbols="${deps_hidden}" \
335+
${libfile_src} ${deps[*]}
336+
fi
337+
# Place the merge command in a script so we can optionally run them in parallel.
338+
echo "#!/bin/bash -e" > "${merge_libraries_tmp}/merge_${product}.sh"
339+
if [[ ! -z ${deps_basenames[*]} ]]; then
340+
echo "echo \"${libfile_out} <- ${deps[*]}\"" >> "${merge_libraries_tmp}/merge_${product}.sh"
341+
else
342+
echo "echo \"${libfile_out}\"" >> "${merge_libraries_tmp}/merge_${product}.sh"
310343
fi
311-
"${python_cmd}" "${merge_libraries_script}" \
312-
${merge_libraries_params[*]} \
313-
--output="${outfile}" \
314-
--scan_libs="${allfiles}" \
315-
--hide_c_symbols="${deps_hidden}" \
316-
${libfile_src} ${deps[*]}
344+
if [[ ! -z ${deps_basenames[*]} ]]; then
345+
echo -n >> "${merge_libraries_tmp}/merge_${product}.sh"
346+
fi
347+
echo >> "${merge_libraries_tmp}/merge_${product}.sh"
348+
echo "\"${python_cmd}\" \\
349+
\"${merge_libraries_script}\" \\
350+
${merge_libraries_params[*]} \\
351+
\"${cache_param}\" \\
352+
--output=\"${outfile}\" \\
353+
--scan_libs=\"${allfiles}\" \\
354+
--hide_c_symbols=\"${deps_hidden}\" \\
355+
\"${libfile_src}\" ${deps[*]}" >> "${merge_libraries_tmp}/merge_${product}.sh"
356+
chmod u+x "${merge_libraries_tmp}/merge_${product}.sh"
357+
if [[ ${run_in_parallel} -eq 0 ]]; then
358+
# Run immediately if not set to run in parallel.
359+
"${merge_libraries_tmp}/merge_${product}.sh"
360+
else
361+
echo "echo \"${libfile_out}\" DONE" >> "${merge_libraries_tmp}/merge_${product}.sh"
362+
fi
317363
done
364+
365+
if [[ ${run_in_parallel} -ne 0 ]]; then
366+
# Analytics is the smallest SDK, so it should be the shortest job.
367+
shortest=analytics
368+
echo "There are ${#product_list[@]} jobs to run."
369+
echo "Running shortest job to populate cache, then remaining jobs in parallel..."
370+
"${merge_libraries_tmp}/merge_${shortest}.sh"
371+
# Zero out the job that we already did.
372+
echo "#!/bin/bash" > "${merge_libraries_tmp}/merge_${shortest}.sh"
373+
if [[ ${use_gnu_parallel} -eq 1 ]]; then
374+
# ls -S sorts by size, largest first. Use script file size as a proxy for
375+
# job length in order to queue up the longest jobs first.
376+
# In GNU parallel, --lb means to not buffer the output, so we can see the
377+
# jobs run and finish in realtime.
378+
"${parallel_command}" --lb ::: $(ls -S "${merge_libraries_tmp}"/merge_*.sh)
379+
else
380+
# Default version of parallel has a slightly different syntax.
381+
"${parallel_command}" -- $(ls -S "${merge_libraries_tmp}"/merge_*.sh)
382+
fi
383+
echo "All jobs finished!"
384+
fi
318385
cd "${run_path}"
319386

387+
echo "Copying extra header files..."
320388
# Copy generated headers for app and analytics into the package's include directory.
321389
mkdir -p "${output_package_path}/include/firebase"
322390
cp -av \

scripts/merge_libraries.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
import hashlib
2121
import os
22+
import fcntl
2223
import pickle
2324
import re
2425
import shutil
@@ -850,7 +851,9 @@ def init_cache():
850851
FLAGS.cache) and os.path.getsize(FLAGS.cache) > 0:
851852
# If a data cache was specified, load it now.
852853
with open(FLAGS.cache, "rb") as handle:
854+
fcntl.lockf(handle, fcntl.LOCK_SH) # For reading, shared lock is OK.
853855
_cache.update(pickle.load(handle))
856+
fcntl.lockf(handle, fcntl.LOCK_UN)
854857
else:
855858
# Set up a default cache dictionary.
856859
# _cache["symbols"] is indexed by abspath of library file
@@ -866,7 +869,9 @@ def shutdown_cache():
866869
if os.path.isfile(FLAGS.cache):
867870
os.unlink(FLAGS.cache)
868871
with open(FLAGS.cache, "wb") as handle:
872+
fcntl.lockf(handle, fcntl.LOCK_EX) # For writing, need exclusive lock.
869873
pickle.dump(_cache, handle, protocol=pickle.HIGHEST_PROTOCOL)
874+
fcntl.lockf(handle, fcntl.LOCK_UN)
870875

871876

872877
def main(argv):

0 commit comments

Comments
 (0)