Skip to content

Commit cf40955

Browse files
committed
librt base64: use existing SIMD CPU dispatch by customizing build flags
Inspired by https://stackoverflow.com/a/68508804
1 parent 66797fc commit cf40955

File tree

9 files changed

+128
-34
lines changed

9 files changed

+128
-34
lines changed

mypy_self_check.ini

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ pretty = True
88
always_false = MYPYC
99
plugins = mypy.plugins.proper_plugin
1010
python_version = 3.9
11-
exclude = mypy/typeshed/|mypyc/test-data/|mypyc/lib-rt/
11+
exclude = mypy/typeshed/|mypyc/test-data/
1212
enable_error_code = ignore-without-code,redundant-expr
1313
enable_incomplete_feature = PreciseTupleTypes
1414
show_error_code_links = True

mypyc/build.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -36,11 +36,12 @@
3636
from mypy.util import write_junit_xml
3737
from mypyc.annotate import generate_annotated_html
3838
from mypyc.codegen import emitmodule
39-
from mypyc.common import IS_FREE_THREADED, RUNTIME_C_FILES, X86_64, shared_lib_name
39+
from mypyc.common import IS_FREE_THREADED, RUNTIME_C_FILES, shared_lib_name
4040
from mypyc.errors import Errors
4141
from mypyc.ir.pprint import format_modules
4242
from mypyc.namegen import exported_name
4343
from mypyc.options import CompilerOptions
44+
import mypyc.build_setup # noqa: F401
4445

4546

4647
class ModDesc(NamedTuple):
@@ -70,6 +71,11 @@ class ModDesc(NamedTuple):
7071
"base64/arch/neon64/codec.c",
7172
],
7273
[
74+
"base64/arch/avx/enc_loop_asm.c",
75+
"base64/arch/avx2/enc_loop_asm.c",
76+
"base64/arch/avx2/enc_translate.c",
77+
"base64/arch/avx2/dec_loop.c",
78+
"base64/arch/avx2/dec_reshuffle.c",
7379
"base64/arch/generic/32/enc_loop.c",
7480
"base64/arch/generic/64/enc_loop.c",
7581
"base64/arch/generic/32/dec_loop.c",
@@ -661,9 +667,6 @@ def mypycify(
661667
# See https://github.com/mypyc/mypyc/issues/956
662668
"-Wno-cpp",
663669
]
664-
if X86_64:
665-
# Enable SIMD extensions. All CPUs released since ~2010 support SSE4.2.
666-
cflags.append("-msse4.2")
667670
if log_trace:
668671
cflags.append("-DMYPYC_LOG_TRACE")
669672
if experimental_features:
@@ -692,10 +695,6 @@ def mypycify(
692695
# that we actually get the compilation speed and memory
693696
# use wins that multi-file mode is intended for.
694697
cflags += ["/GL-", "/wd9025"] # warning about overriding /GL
695-
if X86_64:
696-
# Enable SIMD extensions. All CPUs released since ~2010 support SSE4.2.
697-
# Also Windows 11 requires SSE4.2 since 24H2.
698-
cflags.append("/arch:SSE4.2")
699698
if log_trace:
700699
cflags.append("/DMYPYC_LOG_TRACE")
701700
if experimental_features:

mypyc/build_setup.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
import platform
2+
import sys
3+
4+
try:
5+
# Import setuptools so that it monkey-patch overrides distutils
6+
import setuptools
7+
except ImportError:
8+
pass
9+
10+
if sys.version_info >= (3, 12):
11+
# From setuptools' monkeypatch
12+
from distutils import ccompiler # type: ignore[import-not-found]
13+
else:
14+
from distutils import ccompiler
15+
16+
EXTRA_FLAGS_PER_COMPILER_TYPE_PER_PATH_COMPONENT = {
17+
"unix": {
18+
"base64/arch/ssse3": "-mssse3",
19+
"base64/arch/sse41": "-msse4.1",
20+
"base64/arch/sse42": "-msse4.2",
21+
"base64/arch/avx2": "-mavx2",
22+
"base64/arch/avx": "-mavx",
23+
},
24+
"msvc": {
25+
"base64/arch/sse42": "/arch:SSE4.2",
26+
"base64/arch/avx2": "/arch:AVX2",
27+
"base64/arch/avx": "/arch:AVX",
28+
},
29+
}
30+
31+
ccompiler.CCompiler.__spawn = ccompiler.CCompiler.spawn # type: ignore[attr-defined]
32+
X86_64 = platform.machine() in ("x86_64", "AMD64", "amd64")
33+
34+
35+
def spawn(self, cmd, **kwargs) -> None: # type: ignore[no-untyped-def]
36+
compiler_type: str = self.compiler_type
37+
extra_options = EXTRA_FLAGS_PER_COMPILER_TYPE_PER_PATH_COMPONENT[compiler_type]
38+
new_cmd = list(cmd)
39+
if X86_64 and extra_options is not None:
40+
# filenames are closer to the end of command line
41+
for argument in reversed(new_cmd):
42+
# Check if argument contains a filename. We must check for all
43+
# possible extensions; checking for target extension is faster.
44+
if self.obj_extension and not str(argument).endswith(self.obj_extension):
45+
continue
46+
47+
for path in extra_options.keys():
48+
if path in str(argument):
49+
if compiler_type == "bcpp":
50+
# Borland accepts a source file name at the end,
51+
# insert the options before it
52+
new_cmd[-1:-1] = extra_options[path]
53+
else:
54+
new_cmd.append(extra_options[path])
55+
56+
# path component is found, no need to search any further
57+
break
58+
self.__spawn(new_cmd, **kwargs)
59+
60+
61+
ccompiler.CCompiler.spawn = spawn # type: ignore[method-assign]

mypyc/common.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
from __future__ import annotations
22

3-
import platform
43
import sys
54
import sysconfig
65
from typing import Any, Final
@@ -45,8 +44,6 @@
4544

4645
IS_32_BIT_PLATFORM: Final = int(SIZEOF_SIZE_T) == 4
4746

48-
X86_64: Final = platform.machine() in ("x86_64", "AMD64", "amd64")
49-
5047
PLATFORM_SIZE = 4 if IS_32_BIT_PLATFORM else 8
5148

5249
# Maximum value for a short tagged integer.

mypyc/lib-rt/base64/arch/avx/codec.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
#include "../ssse3/dec_loop.c"
2525

2626
#if BASE64_AVX_USE_ASM
27-
# include "enc_loop_asm.c"
27+
# include "./enc_loop_asm.c"
2828
#else
2929
# include "../ssse3/enc_translate.c"
3030
# include "../ssse3/enc_reshuffle.c"

mypyc/lib-rt/base64/arch/avx2/codec.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,15 @@
2020
# endif
2121
#endif
2222

23-
#include "dec_reshuffle.c"
24-
#include "dec_loop.c"
23+
#include "./dec_reshuffle.c"
24+
#include "./dec_loop.c"
2525

2626
#if BASE64_AVX2_USE_ASM
27-
# include "enc_loop_asm.c"
27+
# include "./enc_loop_asm.c"
2828
#else
29-
# include "enc_translate.c"
30-
# include "enc_reshuffle.c"
31-
# include "enc_loop.c"
29+
# include "./enc_translate.c"
30+
# include "./enc_reshuffle.c"
31+
# include "./enc_loop.c"
3232
#endif
3333

3434
#endif // HAVE_AVX2

mypyc/lib-rt/base64/config.h

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,19 @@
11
#ifndef BASE64_CONFIG_H
22
#define BASE64_CONFIG_H
33

4-
#define BASE64_WITH_SSSE3 0
4+
#define BASE64_WITH_SSSE3 1
55
#define HAVE_SSSE3 BASE64_WITH_SSSE3
66

7-
#define BASE64_WITH_SSE41 0
7+
#define BASE64_WITH_SSE41 1
88
#define HAVE_SSE41 BASE64_WITH_SSE41
99

10-
#if defined(__x86_64__) || defined(_M_X64)
1110
#define BASE64_WITH_SSE42 1
12-
#else
13-
#define BASE64_WITH_SSE42 0
14-
#endif
15-
1611
#define HAVE_SSE42 BASE64_WITH_SSE42
1712

18-
#define BASE64_WITH_AVX 0
13+
#define BASE64_WITH_AVX 1
1914
#define HAVE_AVX BASE64_WITH_AVX
2015

21-
#define BASE64_WITH_AVX2 0
16+
#define BASE64_WITH_AVX2 1
2217
#define HAVE_AVX2 BASE64_WITH_AVX2
2318

2419
#define BASE64_WITH_AVX512 0

mypyc/lib-rt/setup.py

Lines changed: 47 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,54 @@
2525
"pythonsupport.c",
2626
]
2727

28+
EXTRA_FLAGS_PER_COMPILER_TYPE_PER_PATH_COMPONENT = {
29+
"unix": {
30+
"base64/arch/ssse3": "-mssse3",
31+
"base64/arch/sse41": "-msse4.1",
32+
"base64/arch/sse42": "-msse4.2",
33+
"base64/arch/avx2": "-mavx2",
34+
"base64/arch/avx": "-mavx",
35+
},
36+
"msvc": {
37+
"base64/arch/sse42": "/arch:SSE4.2",
38+
"base64/arch/avx2": "/arch:AVX2",
39+
"base64/arch/avx": "/arch:AVX",
40+
},
41+
}
42+
43+
ccompiler.CCompiler.__spawn = ccompiler.CCompiler.spawn # type: ignore[attr-defined]
2844
X86_64 = platform.machine() in ("x86_64", "AMD64", "amd64")
2945

3046

47+
def spawn(self, cmd, **kwargs) -> None: # type: ignore[no-untyped-def]
48+
compiler_type: str = self.compiler_type
49+
extra_options = EXTRA_FLAGS_PER_COMPILER_TYPE_PER_PATH_COMPONENT[compiler_type]
50+
new_cmd = list(cmd)
51+
if X86_64 and extra_options is not None:
52+
# filenames are closer to the end of command line
53+
for argument in reversed(new_cmd):
54+
# Check if argument contains a filename. We must check for all
55+
# possible extensions; checking for target extension is faster.
56+
if self.obj_extension and not str(argument).endswith(self.obj_extension):
57+
continue
58+
59+
for path in extra_options.keys():
60+
if path in str(argument):
61+
if compiler_type == "bcpp":
62+
# Borland accepts a source file name at the end,
63+
# insert the options before it
64+
new_cmd[-1:-1] = extra_options[path]
65+
else:
66+
new_cmd.append(extra_options[path])
67+
68+
# path component is found, no need to search any further
69+
break
70+
self.__spawn(new_cmd, **kwargs)
71+
72+
73+
ccompiler.CCompiler.spawn = spawn # type: ignore[method-assign]
74+
75+
3176
class BuildExtGtest(build_ext):
3277
def get_library_names(self) -> list[str]:
3378
return ["gtest"]
@@ -80,14 +125,10 @@ def run(self) -> None:
80125
compiler = ccompiler.new_compiler()
81126
sysconfig.customize_compiler(compiler)
82127
cflags: list[str] = []
83-
if compiler.compiler_type == "unix":
128+
if compiler.compiler_type == "unix": # type: ignore[attr-defined]
84129
cflags += ["-O3"]
85-
if X86_64:
86-
cflags.append("-msse4.2") # Enable SIMD (see also mypyc/build.py)
87-
elif compiler.compiler_type == "msvc":
130+
elif compiler.compiler_type == "msvc": # type: ignore[attr-defined]
88131
cflags += ["/O2"]
89-
if X86_64:
90-
cflags.append("/arch:SSE4.2") # Enable SIMD (see also mypyc/build.py)
91132

92133
setup(
93134
ext_modules=[

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ def run(self) -> None:
9999
os.path.join("mypyc", "lib-rt", "setup.py"),
100100
# Uses __file__ at top level https://github.com/mypyc/mypyc/issues/700
101101
os.path.join("mypyc", "__main__.py"),
102+
os.path.join("mypyc", "build_setup.py"), # for monkeypatching
102103
)
103104

104105
everything = [os.path.join("mypy", x) for x in find_package_data("mypy", ["*.py"])] + [

0 commit comments

Comments
 (0)