Skip to content

Commit a861a3b

Browse files
committed
librt base64: use existing SIMD CPU dispatch by customizing build flags
Inspired by https://stackoverflow.com/a/68508804
1 parent 0c6593b commit a861a3b

File tree

7 files changed

+111
-36
lines changed

7 files changed

+111
-36
lines changed

mypy_self_check.ini

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ pretty = True
88
always_false = MYPYC
99
plugins = mypy.plugins.proper_plugin
1010
python_version = 3.9
11-
exclude = mypy/typeshed/|mypyc/test-data/|mypyc/lib-rt/
11+
exclude = mypy/typeshed/|mypyc/test-data/
1212
enable_error_code = ignore-without-code,redundant-expr
1313
enable_incomplete_feature = PreciseTupleTypes
1414
show_error_code_links = True

mypyc/build.py

Lines changed: 51 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
from mypy.util import write_junit_xml
3737
from mypyc.annotate import generate_annotated_html
3838
from mypyc.codegen import emitmodule
39-
from mypyc.common import IS_FREE_THREADED, RUNTIME_C_FILES, X86_64, shared_lib_name
39+
from mypyc.common import IS_FREE_THREADED, RUNTIME_C_FILES, shared_lib_name
4040
from mypyc.errors import Errors
4141
from mypyc.ir.pprint import format_modules
4242
from mypyc.namegen import exported_name
@@ -70,6 +70,10 @@ class ModDesc(NamedTuple):
7070
"base64/arch/neon64/codec.c",
7171
],
7272
[
73+
"base64/arch/avx/enc_loop_asm.c",
74+
"base64/arch/avx2/enc_loop_asm.c",
75+
"base64/arch/avx2/dec_loop.c",
76+
"base64/arch/avx2/dec_reshuffle.c",
7377
"base64/arch/generic/32/enc_loop.c",
7478
"base64/arch/generic/64/enc_loop.c",
7579
"base64/arch/generic/32/dec_loop.c",
@@ -118,6 +122,52 @@ class ModDesc(NamedTuple):
118122
else:
119123
from distutils import ccompiler, sysconfig
120124

125+
EXTRA_FLAGS_PER_COMPILER_TYPE_PER_PATH_COMPONENT = {
126+
"unix": {
127+
"base64/arch/ssse3": "-mssse3",
128+
"base64/arch/sse41": "-msse4.1",
129+
"base64/arch/sse42": "-msse4.2",
130+
"base64/arch/avx2": "-mavx2",
131+
"base64/arch/avx": "-mavx",
132+
},
133+
"msvc": {
134+
"base64/arch/sse42": "/arch:SSE4.2",
135+
"base64/arch/avx2": "/arch:AVX2",
136+
"base64/arch/avx": "/arch:AVX",
137+
},
138+
}
139+
140+
__spawn = ccompiler.CCompiler.spawn
141+
142+
143+
def spawn(self, cmd, **kwargs): # type: ignore[no-untyped-def]
144+
compiler_type: str = self.compiler_type
145+
extra_options = EXTRA_FLAGS_PER_COMPILER_TYPE_PER_PATH_COMPONENT[compiler_type]
146+
new_cmd = list(cmd)
147+
if extra_options is not None:
148+
# filenames are closer to the end of command line
149+
for argument in reversed(new_cmd):
150+
# Check if argument contains a filename. We must check for all
151+
# possible extensions; checking for target extension is faster.
152+
if self.obj_extension and not str(argument).endswith(self.obj_extension):
153+
continue
154+
155+
for path in extra_options.keys():
156+
if path in str(argument):
157+
if compiler_type == "bcpp":
158+
# Borland accepts a source file name at the end,
159+
# insert the options before it
160+
new_cmd[-1:-1] = extra_options[path]
161+
else:
162+
new_cmd.append(extra_options[path])
163+
164+
# path component is found, no need to search any further
165+
break
166+
__spawn(self, new_cmd, **kwargs)
167+
168+
169+
ccompiler.CCompiler.spawn = spawn # type: ignore[method-assign]
170+
121171

122172
def get_extension() -> type[Extension]:
123173
# We can work with either setuptools or distutils, and pick setuptools
@@ -661,9 +711,6 @@ def mypycify(
661711
# See https://github.com/mypyc/mypyc/issues/956
662712
"-Wno-cpp",
663713
]
664-
if X86_64:
665-
# Enable SIMD extensions. All CPUs released since ~2010 support SSE4.2.
666-
cflags.append("-msse4.2")
667714
if log_trace:
668715
cflags.append("-DMYPYC_LOG_TRACE")
669716
if experimental_features:
@@ -692,10 +739,6 @@ def mypycify(
692739
# that we actually get the compilation speed and memory
693740
# use wins that multi-file mode is intended for.
694741
cflags += ["/GL-", "/wd9025"] # warning about overriding /GL
695-
if X86_64:
696-
# Enable SIMD extensions. All CPUs released since ~2010 support SSE4.2.
697-
# Also Windows 11 requires SSE4.2 since 24H2.
698-
cflags.append("/arch:SSE4.2")
699742
if log_trace:
700743
cflags.append("/DMYPYC_LOG_TRACE")
701744
if experimental_features:

mypyc/common.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
from __future__ import annotations
22

3-
import platform
43
import sys
54
import sysconfig
65
from typing import Any, Final
@@ -45,8 +44,6 @@
4544

4645
IS_32_BIT_PLATFORM: Final = int(SIZEOF_SIZE_T) == 4
4746

48-
X86_64: Final = platform.machine() in ("x86_64", "AMD64", "amd64")
49-
5047
PLATFORM_SIZE = 4 if IS_32_BIT_PLATFORM else 8
5148

5249
# Maximum value for a short tagged integer.

mypyc/lib-rt/base64/arch/avx/codec.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
#include "../ssse3/dec_loop.c"
2525

2626
#if BASE64_AVX_USE_ASM
27-
# include "enc_loop_asm.c"
27+
# include "./enc_loop_asm.c"
2828
#else
2929
# include "../ssse3/enc_translate.c"
3030
# include "../ssse3/enc_reshuffle.c"

mypyc/lib-rt/base64/arch/avx2/codec.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,15 @@
2020
# endif
2121
#endif
2222

23-
#include "dec_reshuffle.c"
24-
#include "dec_loop.c"
23+
#include "./dec_reshuffle.c"
24+
#include "./dec_loop.c"
2525

2626
#if BASE64_AVX2_USE_ASM
27-
# include "enc_loop_asm.c"
27+
# include "./enc_loop_asm.c"
2828
#else
29-
# include "enc_translate.c"
30-
# include "enc_reshuffle.c"
31-
# include "enc_loop.c"
29+
# include "./enc_translate.c"
30+
# include "./enc_reshuffle.c"
31+
# include "./enc_loop.c"
3232
#endif
3333

3434
#endif // HAVE_AVX2

mypyc/lib-rt/base64/config.h

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,19 @@
11
#ifndef BASE64_CONFIG_H
22
#define BASE64_CONFIG_H
33

4-
#define BASE64_WITH_SSSE3 0
4+
#define BASE64_WITH_SSSE3 1
55
#define HAVE_SSSE3 BASE64_WITH_SSSE3
66

7-
#define BASE64_WITH_SSE41 0
7+
#define BASE64_WITH_SSE41 1
88
#define HAVE_SSE41 BASE64_WITH_SSE41
99

10-
#if defined(__x86_64__) || defined(_M_X64)
1110
#define BASE64_WITH_SSE42 1
12-
#else
13-
#define BASE64_WITH_SSE42 0
14-
#endif
15-
1611
#define HAVE_SSE42 BASE64_WITH_SSE42
1712

18-
#define BASE64_WITH_AVX 0
13+
#define BASE64_WITH_AVX 1
1914
#define HAVE_AVX BASE64_WITH_AVX
2015

21-
#define BASE64_WITH_AVX2 0
16+
#define BASE64_WITH_AVX2 1
2217
#define HAVE_AVX2 BASE64_WITH_AVX2
2318

2419
#define BASE64_WITH_AVX512 0

mypyc/lib-rt/setup.py

Lines changed: 48 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@
66
from __future__ import annotations
77

88
import os
9-
import platform
109
import subprocess
1110
import sys
11+
from collections.abc import Iterable
1212
from distutils import ccompiler, sysconfig
1313
from typing import Any
1414

@@ -25,7 +25,51 @@
2525
"pythonsupport.c",
2626
]
2727

28-
X86_64 = platform.machine() in ("x86_64", "AMD64", "amd64")
28+
EXTRA_FLAGS_PER_COMPILER_TYPE_PER_PATH_COMPONENT = {
29+
"unix": {
30+
"base64/arch/ssse3": "-mssse3",
31+
"base64/arch/sse41": "-msse4.1",
32+
"base64/arch/sse42": "-msse4.2",
33+
"base64/arch/avx2": "-mavx2",
34+
"base64/arch/avx": "-mavx",
35+
},
36+
"msvc": {
37+
"base64/arch/sse42": "/arch:SSE4.2",
38+
"base64/arch/avx2": "/arch:AVX2",
39+
"base64/arch/avx": "/arch:AVX",
40+
},
41+
}
42+
43+
__spawn = ccompiler.CCompiler.spawn
44+
45+
46+
def spawn(self: ccompiler.CCompiler, cmd: Iterable[str], **kwargs: Any) -> None:
47+
compiler_type: str = self.compiler_type # type: ignore[attr-defined]
48+
extra_options = EXTRA_FLAGS_PER_COMPILER_TYPE_PER_PATH_COMPONENT[compiler_type]
49+
new_cmd = list(cmd)
50+
if extra_options is not None:
51+
# filenames are closer to the end of command line
52+
for argument in reversed(new_cmd):
53+
# Check if argument contains a filename. We must check for all
54+
# possible extensions; checking for target extension is faster.
55+
if self.obj_extension and not str(argument).endswith(self.obj_extension): # type: ignore[attr-defined]
56+
continue
57+
58+
for path in extra_options.keys():
59+
if path in str(argument):
60+
if compiler_type == "bcpp":
61+
# Borland accepts a source file name at the end,
62+
# insert the options before it
63+
new_cmd[-1:-1] = extra_options[path]
64+
else:
65+
new_cmd.append(extra_options[path])
66+
67+
# path component is found, no need to search any further
68+
break
69+
__spawn(self, new_cmd, **kwargs)
70+
71+
72+
ccompiler.CCompiler.spawn = spawn # type: ignore[method-assign]
2973

3074

3175
class BuildExtGtest(build_ext):
@@ -80,14 +124,10 @@ def run(self) -> None:
80124
compiler = ccompiler.new_compiler()
81125
sysconfig.customize_compiler(compiler)
82126
cflags: list[str] = []
83-
if compiler.compiler_type == "unix":
127+
if compiler.compiler_type == "unix": # type: ignore[attr-defined]
84128
cflags += ["-O3"]
85-
if X86_64:
86-
cflags.append("-msse4.2") # Enable SIMD (see also mypyc/build.py)
87-
elif compiler.compiler_type == "msvc":
129+
elif compiler.compiler_type == "msvc": # type: ignore[attr-defined]
88130
cflags += ["/O2"]
89-
if X86_64:
90-
cflags.append("/arch:SSE4.2") # Enable SIMD (see also mypyc/build.py)
91131

92132
setup(
93133
ext_modules=[

0 commit comments

Comments
 (0)