|
36 | 36 | from mypy.util import write_junit_xml |
37 | 37 | from mypyc.annotate import generate_annotated_html |
38 | 38 | from mypyc.codegen import emitmodule |
39 | | -from mypyc.common import IS_FREE_THREADED, RUNTIME_C_FILES, X86_64, shared_lib_name |
| 39 | +from mypyc.common import IS_FREE_THREADED, RUNTIME_C_FILES, shared_lib_name |
40 | 40 | from mypyc.errors import Errors |
41 | 41 | from mypyc.ir.pprint import format_modules |
42 | 42 | from mypyc.namegen import exported_name |
@@ -70,6 +70,10 @@ class ModDesc(NamedTuple): |
70 | 70 | "base64/arch/neon64/codec.c", |
71 | 71 | ], |
72 | 72 | [ |
| 73 | + "base64/arch/avx/enc_loop_asm.c", |
| 74 | + "base64/arch/avx2/enc_loop_asm.c", |
| 75 | + "base64/arch/avx2/dec_loop.c", |
| 76 | + "base64/arch/avx2/dec_reshuffle.c", |
73 | 77 | "base64/arch/generic/32/enc_loop.c", |
74 | 78 | "base64/arch/generic/64/enc_loop.c", |
75 | 79 | "base64/arch/generic/32/dec_loop.c", |
@@ -118,6 +122,52 @@ class ModDesc(NamedTuple): |
118 | 122 | else: |
119 | 123 | from distutils import ccompiler, sysconfig |
120 | 124 |
|
| 125 | +EXTRA_FLAGS_PER_COMPILER_TYPE_PER_PATH_COMPONENT = { |
| 126 | + "unix": { |
| 127 | + "base64/arch/ssse3": "-mssse3", |
| 128 | + "base64/arch/sse41": "-msse4.1", |
| 129 | + "base64/arch/sse42": "-msse4.2", |
| 130 | + "base64/arch/avx2": "-mavx2", |
| 131 | + "base64/arch/avx": "-mavx", |
| 132 | + }, |
| 133 | + "msvc": { |
| 134 | + "base64/arch/sse42": "/arch:SSE4.2", |
| 135 | + "base64/arch/avx2": "/arch:AVX2", |
| 136 | + "base64/arch/avx": "/arch:AVX", |
| 137 | + }, |
| 138 | +} |
| 139 | + |
| 140 | +__spawn = ccompiler.CCompiler.spawn |
| 141 | + |
| 142 | + |
| 143 | +def spawn(self, cmd, **kwargs): # type: ignore[no-untyped-def] |
| 144 | + compiler_type: str = self.compiler_type |
| 145 | + extra_options = EXTRA_FLAGS_PER_COMPILER_TYPE_PER_PATH_COMPONENT[compiler_type] |
| 146 | + new_cmd = list(cmd) |
| 147 | + if extra_options is not None: |
| 148 | + # filenames are closer to the end of command line |
| 149 | + for argument in reversed(new_cmd): |
| 150 | + # Check if argument contains a filename. We must check for all |
| 151 | + # possible extensions; checking for target extension is faster. |
| 152 | + if self.obj_extension and not str(argument).endswith(self.obj_extension): |
| 153 | + continue |
| 154 | + |
| 155 | + for path in extra_options.keys(): |
| 156 | + if path in str(argument): |
| 157 | + if compiler_type == "bcpp": |
| 158 | + # Borland accepts a source file name at the end, |
| 159 | + # insert the options before it |
| 160 | + new_cmd[-1:-1] = extra_options[path] |
| 161 | + else: |
| 162 | + new_cmd.append(extra_options[path]) |
| 163 | + |
| 164 | + # path component is found, no need to search any further |
| 165 | + break |
| 166 | + __spawn(self, new_cmd, **kwargs) |
| 167 | + |
| 168 | + |
| 169 | +ccompiler.CCompiler.spawn = spawn # type: ignore[method-assign] |
| 170 | + |
121 | 171 |
|
122 | 172 | def get_extension() -> type[Extension]: |
123 | 173 | # We can work with either setuptools or distutils, and pick setuptools |
@@ -661,9 +711,6 @@ def mypycify( |
661 | 711 | # See https://github.com/mypyc/mypyc/issues/956 |
662 | 712 | "-Wno-cpp", |
663 | 713 | ] |
664 | | - if X86_64: |
665 | | - # Enable SIMD extensions. All CPUs released since ~2010 support SSE4.2. |
666 | | - cflags.append("-msse4.2") |
667 | 714 | if log_trace: |
668 | 715 | cflags.append("-DMYPYC_LOG_TRACE") |
669 | 716 | if experimental_features: |
@@ -692,10 +739,6 @@ def mypycify( |
692 | 739 | # that we actually get the compilation speed and memory |
693 | 740 | # use wins that multi-file mode is intended for. |
694 | 741 | cflags += ["/GL-", "/wd9025"] # warning about overriding /GL |
695 | | - if X86_64: |
696 | | - # Enable SIMD extensions. All CPUs released since ~2010 support SSE4.2. |
697 | | - # Also Windows 11 requires SSE4.2 since 24H2. |
698 | | - cflags.append("/arch:SSE4.2") |
699 | 742 | if log_trace: |
700 | 743 | cflags.append("/DMYPYC_LOG_TRACE") |
701 | 744 | if experimental_features: |
|
0 commit comments