Skip to content

Commit d270f09

Browse files
committed
librt base64: use existing SIMD CPU dispatch by customizing build flags
Inspired by https://stackoverflow.com/a/68508804
1 parent 66797fc commit d270f09

File tree

9 files changed

+129
-36
lines changed

9 files changed

+129
-36
lines changed

mypy_self_check.ini

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ pretty = True
88
always_false = MYPYC
99
plugins = mypy.plugins.proper_plugin
1010
python_version = 3.9
11-
exclude = mypy/typeshed/|mypyc/test-data/|mypyc/lib-rt/
11+
exclude = mypy/typeshed/|mypyc/test-data/
1212
enable_error_code = ignore-without-code,redundant-expr
1313
enable_incomplete_feature = PreciseTupleTypes
1414
show_error_code_links = True

mypyc/build.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
from collections.abc import Iterable
2929
from typing import TYPE_CHECKING, Any, NamedTuple, NoReturn, Union, cast
3030

31+
import mypyc.build_setup # noqa: F401
3132
from mypy.build import BuildSource
3233
from mypy.errors import CompileError
3334
from mypy.fscache import FileSystemCache
@@ -36,7 +37,7 @@
3637
from mypy.util import write_junit_xml
3738
from mypyc.annotate import generate_annotated_html
3839
from mypyc.codegen import emitmodule
39-
from mypyc.common import IS_FREE_THREADED, RUNTIME_C_FILES, X86_64, shared_lib_name
40+
from mypyc.common import IS_FREE_THREADED, RUNTIME_C_FILES, shared_lib_name
4041
from mypyc.errors import Errors
4142
from mypyc.ir.pprint import format_modules
4243
from mypyc.namegen import exported_name
@@ -70,6 +71,12 @@ class ModDesc(NamedTuple):
7071
"base64/arch/neon64/codec.c",
7172
],
7273
[
74+
"base64/arch/avx/enc_loop_asm.c",
75+
"base64/arch/avx2/enc_loop_asm.c",
76+
"base64/arch/avx2/enc_reshuffle.c",
77+
"base64/arch/avx2/enc_translate.c",
78+
"base64/arch/avx2/dec_loop.c",
79+
"base64/arch/avx2/dec_reshuffle.c",
7380
"base64/arch/generic/32/enc_loop.c",
7481
"base64/arch/generic/64/enc_loop.c",
7582
"base64/arch/generic/32/dec_loop.c",
@@ -661,9 +668,6 @@ def mypycify(
661668
# See https://github.com/mypyc/mypyc/issues/956
662669
"-Wno-cpp",
663670
]
664-
if X86_64:
665-
# Enable SIMD extensions. All CPUs released since ~2010 support SSE4.2.
666-
cflags.append("-msse4.2")
667671
if log_trace:
668672
cflags.append("-DMYPYC_LOG_TRACE")
669673
if experimental_features:
@@ -692,10 +696,6 @@ def mypycify(
692696
# that we actually get the compilation speed and memory
693697
# use wins that multi-file mode is intended for.
694698
cflags += ["/GL-", "/wd9025"] # warning about overriding /GL
695-
if X86_64:
696-
# Enable SIMD extensions. All CPUs released since ~2010 support SSE4.2.
697-
# Also Windows 11 requires SSE4.2 since 24H2.
698-
cflags.append("/arch:SSE4.2")
699699
if log_trace:
700700
cflags.append("/DMYPYC_LOG_TRACE")
701701
if experimental_features:

mypyc/build_setup.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
import platform
2+
import sys
3+
4+
try:
5+
# Import setuptools so that it monkey-patch overrides distutils
6+
import setuptools # noqa: F401
7+
except ImportError:
8+
pass
9+
10+
if sys.version_info >= (3, 12):
11+
# From setuptools' monkeypatch
12+
from distutils import ccompiler # type: ignore[import-not-found]
13+
else:
14+
from distutils import ccompiler
15+
16+
EXTRA_FLAGS_PER_COMPILER_TYPE_PER_PATH_COMPONENT = {
17+
"unix": {
18+
"base64/arch/ssse3": ["-mssse3", "-DBASE64_WITH_SSSE3"],
19+
"base64/arch/sse41": ["-msse4.1", "-DBASE64_WITH_SSE41"],
20+
"base64/arch/sse42": ["-msse4.2", "-DBASE64_WITH_SSE42"],
21+
"base64/arch/avx2": ["-mavx2", "-DBASE64_WITH_AVX2"],
22+
"base64/arch/avx": ["-mavx", "-DBASE64_WITH_AVX"],
23+
},
24+
"msvc": {
25+
"base64/arch/sse42": ["/arch:SSE4.2", "/DBASE64_WITH_SSE42"],
26+
"base64/arch/avx2": ["/arch:AVX2", "/DBASE64_WITH_AVX2"],
27+
"base64/arch/avx": ["/arch:AVX", "/DBASE64_WITH_AVX"],
28+
},
29+
}
30+
31+
ccompiler.CCompiler.__spawn = ccompiler.CCompiler.spawn # type: ignore[attr-defined]
32+
X86_64 = platform.machine() in ("x86_64", "AMD64", "amd64")
33+
34+
35+
def spawn(self, cmd, **kwargs) -> None: # type: ignore[no-untyped-def]
36+
compiler_type: str = self.compiler_type
37+
extra_options = EXTRA_FLAGS_PER_COMPILER_TYPE_PER_PATH_COMPONENT[compiler_type]
38+
new_cmd = list(cmd)
39+
if X86_64 and extra_options is not None:
40+
# filenames are closer to the end of command line
41+
for argument in reversed(new_cmd):
42+
# Check if argument contains a filename. We must check for all
43+
# possible extensions; checking for target extension is faster.
44+
if self.obj_extension and not str(argument).endswith(self.obj_extension):
45+
continue
46+
47+
for path in extra_options.keys():
48+
if path in str(argument):
49+
if compiler_type == "bcpp":
50+
compiler = new_cmd.pop()
51+
# Borland accepts a source file name at the end,
52+
# insert the options before it
53+
new_cmd.extend(extra_options[path])
54+
new_cmd.append(compiler)
55+
else:
56+
new_cmd.extend(extra_options[path])
57+
58+
# path component is found, no need to search any further
59+
break
60+
self.__spawn(new_cmd, **kwargs)
61+
62+
63+
ccompiler.CCompiler.spawn = spawn # type: ignore[method-assign]

mypyc/common.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
from __future__ import annotations
22

3-
import platform
43
import sys
54
import sysconfig
65
from typing import Any, Final
@@ -45,8 +44,6 @@
4544

4645
IS_32_BIT_PLATFORM: Final = int(SIZEOF_SIZE_T) == 4
4746

48-
X86_64: Final = platform.machine() in ("x86_64", "AMD64", "amd64")
49-
5047
PLATFORM_SIZE = 4 if IS_32_BIT_PLATFORM else 8
5148

5249
# Maximum value for a short tagged integer.

mypyc/lib-rt/base64/arch/avx/codec.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
#include "../ssse3/dec_loop.c"
2525

2626
#if BASE64_AVX_USE_ASM
27-
# include "enc_loop_asm.c"
27+
# include "./enc_loop_asm.c"
2828
#else
2929
# include "../ssse3/enc_translate.c"
3030
# include "../ssse3/enc_reshuffle.c"

mypyc/lib-rt/base64/arch/avx2/codec.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,15 @@
2020
# endif
2121
#endif
2222

23-
#include "dec_reshuffle.c"
24-
#include "dec_loop.c"
23+
#include "./dec_reshuffle.c"
24+
#include "./dec_loop.c"
2525

2626
#if BASE64_AVX2_USE_ASM
27-
# include "enc_loop_asm.c"
27+
# include "./enc_loop_asm.c"
2828
#else
29-
# include "enc_translate.c"
30-
# include "enc_reshuffle.c"
31-
# include "enc_loop.c"
29+
# include "./enc_translate.c"
30+
# include "./enc_reshuffle.c"
31+
# include "./enc_loop.c"
3232
#endif
3333

3434
#endif // HAVE_AVX2

mypyc/lib-rt/base64/config.h

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,16 @@
11
#ifndef BASE64_CONFIG_H
22
#define BASE64_CONFIG_H
33

4-
#define BASE64_WITH_SSSE3 0
54
#define HAVE_SSSE3 BASE64_WITH_SSSE3
65

7-
#define BASE64_WITH_SSE41 0
86
#define HAVE_SSE41 BASE64_WITH_SSE41
97

10-
#if defined(__x86_64__) || defined(_M_X64)
11-
#define BASE64_WITH_SSE42 1
12-
#else
13-
#define BASE64_WITH_SSE42 0
14-
#endif
15-
168
#define HAVE_SSE42 BASE64_WITH_SSE42
179

18-
#define BASE64_WITH_AVX 0
1910
#define HAVE_AVX BASE64_WITH_AVX
2011

21-
#define BASE64_WITH_AVX2 0
2212
#define HAVE_AVX2 BASE64_WITH_AVX2
2313

24-
#define BASE64_WITH_AVX512 0
2514
#define HAVE_AVX512 BASE64_WITH_AVX512
2615

2716
#define BASE64_WITH_NEON32 0

mypyc/lib-rt/setup.py

Lines changed: 49 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,56 @@
2525
"pythonsupport.c",
2626
]
2727

28+
EXTRA_FLAGS_PER_COMPILER_TYPE_PER_PATH_COMPONENT = {
29+
"unix": {
30+
"base64/arch/ssse3": ["-mssse3", "-DBASE64_WITH_SSSE3"],
31+
"base64/arch/sse41": ["-msse4.1", "-DBASE64_WITH_SSE41"],
32+
"base64/arch/sse42": ["-msse4.2", "-DBASE64_WITH_SSE42"],
33+
"base64/arch/avx2": ["-mavx2", "-DBASE64_WITH_AVX2"],
34+
"base64/arch/avx": ["-mavx", "-DBASE64_WITH_AVX"],
35+
},
36+
"msvc": {
37+
"base64/arch/sse42": ["/arch:SSE4.2", "/DBASE64_WITH_SSE42"],
38+
"base64/arch/avx2": ["/arch:AVX2", "/DBASE64_WITH_AVX2"],
39+
"base64/arch/avx": ["/arch:AVX", "/DBASE64_WITH_AVX"],
40+
},
41+
}
42+
43+
ccompiler.CCompiler.__spawn = ccompiler.CCompiler.spawn # type: ignore[attr-defined]
2844
X86_64 = platform.machine() in ("x86_64", "AMD64", "amd64")
2945

3046

47+
def spawn(self, cmd, **kwargs) -> None: # type: ignore[no-untyped-def]
48+
compiler_type: str = self.compiler_type
49+
extra_options = EXTRA_FLAGS_PER_COMPILER_TYPE_PER_PATH_COMPONENT[compiler_type]
50+
new_cmd = list(cmd)
51+
if X86_64 and extra_options is not None:
52+
# filenames are closer to the end of command line
53+
for argument in reversed(new_cmd):
54+
# Check if argument contains a filename. We must check for all
55+
# possible extensions; checking for target extension is faster.
56+
if self.obj_extension and not str(argument).endswith(self.obj_extension):
57+
continue
58+
59+
for path in extra_options.keys():
60+
if path in str(argument):
61+
if compiler_type == "bcpp":
62+
compiler = new_cmd.pop()
63+
# Borland accepts a source file name at the end,
64+
# insert the options before it
65+
new_cmd.extend(extra_options[path])
66+
new_cmd.append(compiler)
67+
else:
68+
new_cmd.extend(extra_options[path])
69+
70+
# path component is found, no need to search any further
71+
break
72+
self.__spawn(new_cmd, **kwargs)
73+
74+
75+
ccompiler.CCompiler.spawn = spawn # type: ignore[method-assign]
76+
77+
3178
class BuildExtGtest(build_ext):
3279
def get_library_names(self) -> list[str]:
3380
return ["gtest"]
@@ -80,14 +127,10 @@ def run(self) -> None:
80127
compiler = ccompiler.new_compiler()
81128
sysconfig.customize_compiler(compiler)
82129
cflags: list[str] = []
83-
if compiler.compiler_type == "unix":
130+
if compiler.compiler_type == "unix": # type: ignore[attr-defined]
84131
cflags += ["-O3"]
85-
if X86_64:
86-
cflags.append("-msse4.2") # Enable SIMD (see also mypyc/build.py)
87-
elif compiler.compiler_type == "msvc":
132+
elif compiler.compiler_type == "msvc": # type: ignore[attr-defined]
88133
cflags += ["/O2"]
89-
if X86_64:
90-
cflags.append("/arch:SSE4.2") # Enable SIMD (see also mypyc/build.py)
91134

92135
setup(
93136
ext_modules=[

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ def run(self) -> None:
9999
os.path.join("mypyc", "lib-rt", "setup.py"),
100100
# Uses __file__ at top level https://github.com/mypyc/mypyc/issues/700
101101
os.path.join("mypyc", "__main__.py"),
102+
os.path.join("mypyc", "build_setup.py"), # for monkeypatching
102103
)
103104

104105
everything = [os.path.join("mypy", x) for x in find_package_data("mypy", ["*.py"])] + [

0 commit comments

Comments
 (0)