@@ -731,11 +731,25 @@ elif env.msvc:
731731 )
732732 Exit (255 )
733733
734- # Default architecture flags.
735- if env ["arch" ] == "x86_32" :
736- if env .msvc :
734+ # Set x86 CPU instruction sets to use by the compiler's autovectorization.
735+ if env ["arch" ] == "x86_64" :
736+ # On 64-bit x86, enable SSE 4.2 and prior instruction sets (SSE3/SSSE3/SSE4/SSE4.1) to improve performance.
737+ # This is supported on most CPUs released after 2009-2011 (Intel Nehalem, AMD Bulldozer).
738+ # AVX and AVX2 aren't enabled because they aren't available on more recent low-end Intel CPUs.
739+ if env .msvc and not methods .using_clang (env ):
740+ # https://stackoverflow.com/questions/64053597/how-do-i-enable-sse4-1-and-sse3-but-not-avx-in-msvc/69328426
741+ env .Append (CCFLAGS = ["/d2archSSE42" ])
742+ else :
743+ # `-msse2` is implied when compiling for x86_64.
744+ env .Append (CCFLAGS = ["-msse4.2" ])
745+ elif env ["arch" ] == "x86_32" :
746+ # Be more conservative with instruction sets on 32-bit x86 to improve compatibility.
747+ # SSE and SSE2 are present on all CPUs that support 64-bit, even if running a 32-bit OS.
748+ if env .msvc and not methods .using_clang (env ):
737749 env .Append (CCFLAGS = ["/arch:SSE2" ])
738750 else :
751+ # Use `-mfpmath=sse` to use SSE for floating-point math, which is more stable than x87.
752+ # `-mstackrealign` is needed for it to work.
739753 env .Append (CCFLAGS = ["-msse2" , "-mfpmath=sse" , "-mstackrealign" ])
740754
741755# Explicitly specify colored output.
0 commit comments