Skip to content

Commit d40a4ab

Browse files
setup.py: Fixes for mupdf master.
We use a shorter build directory name with mupdf-1.26 to avoid problems on some machines with the link command being too long. Avoid mupdf build error with latest MuPDF's updated tesseract on Github macos-13 with mupdf master. We need to use Clang/LLVM (Homebrew) 15.0.7 in $(brew --prefix llvm@15)/bin/clang.
1 parent 97d33df commit d40a4ab

File tree

1 file changed

+81
-9
lines changed

1 file changed

+81
-9
lines changed

setup.py

Lines changed: 81 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,11 @@ def log( text):
208208
sys.stdout.flush()
209209

210210

211+
def run(command, check=1):
212+
log(f'Running: {command}')
213+
return subprocess.run( command, shell=1, check=check)
214+
215+
211216
if 1:
212217
# For debugging.
213218
log(f'### Starting.')
@@ -218,6 +223,7 @@ def log( text):
218223
log(f'CPU bits: {32 if sys.maxsize == 2**31 - 1 else 64} {sys.maxsize=}')
219224
log(f'__file__: {__file__!r}')
220225
log(f'os.getcwd(): {os.getcwd()!r}')
226+
log(f'getconf ARG_MAX: {pipcl.run("getconf ARG_MAX", capture=1, check=0, verbose=0)!r}')
221227
log(f'sys.argv ({len(sys.argv)}):')
222228
for i, arg in enumerate(sys.argv):
223229
log(f' {i}: {arg!r}')
@@ -271,11 +277,6 @@ def error_fn(fn, path, excinfo):
271277
assert not os.path.exists( path)
272278

273279

274-
def run(command, check=1):
275-
log(f'Running: {command}')
276-
return subprocess.run( command, shell=1, check=check)
277-
278-
279280
def _git_get_branch( directory):
280281
command = f'cd {directory} && git branch --show-current'
281282
log( f'Running: {command}')
@@ -364,7 +365,7 @@ def tar_extract(path, mode='r:gz', prefix=None, exists='raise'):
364365
return prefix_actual
365366

366367

367-
def get_git_id( directory):
368+
def git_info( directory):
368369
'''
369370
Returns `(sha, comment, diff, branch)`, all items are str or None if not
370371
available.
@@ -390,10 +391,42 @@ def get_git_id( directory):
390391
)
391392
if cp.returncode == 0:
392393
branch = cp.stdout.strip()
393-
log(f'get_git_id(): directory={directory!r} returning branch={branch!r} sha={sha!r} comment={comment!r}')
394+
log(f'git_info(): directory={directory!r} returning branch={branch!r} sha={sha!r} comment={comment!r}')
394395
return sha, comment, diff, branch
395396

396397

398+
def git_patch(directory, patch, hard=False):
399+
'''
400+
Applies string <patch> with `git patch` in <directory>.
401+
402+
If <hard> is true we clean the tree with `git checkout .` and then apply
403+
the patch.
404+
405+
Otherwise we apply patch only if it is not already applied; this might fail
406+
if there are conflicting changes in the tree.
407+
'''
408+
log(f'Applying patch in {directory}:\n{textwrap.indent(patch, " ")}')
409+
if not patch:
410+
return
411+
# Carriage returns break `git apply` so we use `newline='\n'` in open().
412+
path = os.path.abspath(f'{directory}/pymupdf_patch.txt')
413+
with open(path, 'w', newline='\n') as f:
414+
f.write(patch)
415+
log(f'Using patch file: {path}')
416+
if hard:
417+
run(f'cd {directory} && git checkout .')
418+
run(f'cd {directory} && git apply {path}')
419+
log(f'Have applied patch in {directory}.')
420+
else:
421+
e = run( f'cd {directory} && git apply --check --reverse {path}', check=0)
422+
if e == 0:
423+
log(f'Not patching {directory} because already patched.')
424+
else:
425+
run(f'cd {directory} && git apply {path}')
426+
log(f'Have applied patch in {directory}.')
427+
run(f'cd {directory} && git diff')
428+
429+
397430
mupdf_tgz = os.path.abspath( f'{__file__}/../mupdf.tgz')
398431

399432
def get_mupdf_internal(out, location=None, sha=None, local_tgz=None):
@@ -444,7 +477,8 @@ def get_mupdf_internal(out, location=None, sha=None, local_tgz=None):
444477
if e:
445478
# No existing git checkout, so do a fresh clone.
446479
_fs_remove(local_dir)
447-
run(f'git clone --recursive --depth 1 --shallow-submodules {location[4:]} {local_dir}')
480+
gitargs = location[4:]
481+
run(f'git clone --recursive --depth 1 --shallow-submodules {gitargs} {local_dir}')
448482

449483
# Show sha of checkout.
450484
run( f'cd {local_dir} && git show --pretty=oneline|head -n 1', check=False)
@@ -856,6 +890,34 @@ def build_mupdf_unix(
856890

857891
if openbsd or freebsd:
858892
env_add(env, 'CXX', 'c++', ' ')
893+
894+
if darwin and os.environ.get('GITHUB_ACTIONS') == 'true':
895+
if os.environ.get('ImageOS') == 'macos13':
896+
# On Github macos13 we need to use Clang/LLVM (Homebrew) 15.0.7,
897+
# otherwise mupdf:thirdparty/tesseract/src/api/baseapi.cpp fails to
898+
# compile with:
899+
#
900+
# thirdparty/tesseract/src/api/baseapi.cpp:150:25: error: 'recursive_directory_iterator' is unavailable: introduced in macOS 10.15
901+
#
902+
# See:
903+
# https://github.com/actions/runner-images/blob/main/images/macos/macos-13-Readme.md
904+
#
905+
log(f'Using llvm@15 clang and clang++')
906+
cl15 = pipcl.run(f'brew --prefix llvm@15', capture=1)
907+
log(f'{cl15=}')
908+
cl15 = cl15.strip()
909+
pipcl.run(f'ls -lL {cl15}')
910+
pipcl.run(f'ls -lL {cl15}/bin')
911+
cc = f'{cl15}/bin/clang'
912+
cxx = f'{cl15}/bin/clang++'
913+
env['CC'] = cc
914+
env['CXX'] = cxx
915+
916+
# Show compiler versions.
917+
cc = env.get('CC', 'cc')
918+
cxx = env.get('CXX', 'c++')
919+
pipcl.run(f'{cc} --version')
920+
pipcl.run(f'{cxx} --version')
859921

860922
# Add extra flags for MacOS cross-compilation, where ARCHFLAGS can be
861923
# '-arch arm64'.
@@ -865,6 +927,8 @@ def build_mupdf_unix(
865927
env_add(env, 'XCFLAGS', archflags)
866928
env_add(env, 'XLIBS', archflags)
867929

930+
mupdf_version_tuple = get_mupdf_version(mupdf_local)
931+
868932
# We specify a build directory path containing 'pymupdf' so that we
869933
# coexist with non-PyMuPDF builds (because PyMuPDF builds have a
870934
# different config.h).
@@ -877,7 +941,16 @@ def build_mupdf_unix(
877941
# $_PYTHON_HOST_PLATFORM allows cross-compiled cibuildwheel builds
878942
# to coexist, e.g. on github.
879943
#
944+
# Have experimented with looking at getconf_ARG_MAX to decide whether to
945+
# omit `PyMuPDF-` from the build directory, to avoid command-too-long
946+
# errors with mupdf-1.26. But it seems that `getconf ARG_MAX` returns
947+
# a system limit, not the actual limit of the current shell, and there
948+
# doesn't seem to be a way to find the current shell's limit.
949+
#
880950
build_prefix = f'PyMuPDF-'
951+
if mupdf_version_tuple >= (1, 26):
952+
# Avoid link command length problems seen on musllinux.
953+
build_prefix = ''
881954
if pyodide:
882955
build_prefix += 'pyodide-'
883956
else:
@@ -894,7 +967,6 @@ def build_mupdf_unix(
894967
log(f'PYMUPDF_SETUP_MUPDF_TESSERACT=0 so building mupdf without tesseract.')
895968
else:
896969
build_prefix += 'tesseract-'
897-
mupdf_version_tuple = get_mupdf_version(mupdf_local)
898970
if (
899971
linux
900972
and os.environ.get('PYMUPDF_SETUP_MUPDF_BSYMBOLIC', '1') == '1'

0 commit comments

Comments
 (0)