Skip to content

Commit eb72ce2

Browse files
authored
Merge pull request godotengine#59595 from Calinou/scons-use-sse4.2
Use SSE 4.2 as a baseline when compiling Godot
2 parents b89c47b + be1f9a8 commit eb72ce2

File tree

2 files changed

+26
-5
lines changed

2 files changed

+26
-5
lines changed

SConstruct

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -731,11 +731,25 @@ elif env.msvc:
731731
)
732732
Exit(255)
733733

734-
# Default architecture flags.
735-
if env["arch"] == "x86_32":
736-
if env.msvc:
734+
# Set x86 CPU instruction sets to use by the compiler's autovectorization.
735+
if env["arch"] == "x86_64":
736+
# On 64-bit x86, enable SSE 4.2 and prior instruction sets (SSE3/SSSE3/SSE4/SSE4.1) to improve performance.
737+
# This is supported on most CPUs released after 2009-2011 (Intel Nehalem, AMD Bulldozer).
738+
# AVX and AVX2 aren't enabled because they aren't available on more recent low-end Intel CPUs.
739+
if env.msvc and not methods.using_clang(env):
740+
# https://stackoverflow.com/questions/64053597/how-do-i-enable-sse4-1-and-sse3-but-not-avx-in-msvc/69328426
741+
env.Append(CCFLAGS=["/d2archSSE42"])
742+
else:
743+
# `-msse2` is implied when compiling for x86_64.
744+
env.Append(CCFLAGS=["-msse4.2"])
745+
elif env["arch"] == "x86_32":
746+
# Be more conservative with instruction sets on 32-bit x86 to improve compatibility.
747+
# SSE and SSE2 are present on all CPUs that support 64-bit, even if running a 32-bit OS.
748+
if env.msvc and not methods.using_clang(env):
737749
env.Append(CCFLAGS=["/arch:SSE2"])
738750
else:
751+
# Use `-mfpmath=sse` to use SSE for floating-point math, which is more stable than x87.
752+
# `-mstackrealign` is needed for it to work.
739753
env.Append(CCFLAGS=["-msse2", "-mfpmath=sse", "-mstackrealign"])
740754

741755
# Explicitly specify colored output.

modules/raycast/SCsub

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,9 +80,16 @@ if env["builtin_embree"]:
8080
env_thirdparty.disable_warnings()
8181
env_thirdparty.add_source_files(thirdparty_obj, thirdparty_sources)
8282

83+
# Set x86 CPU instruction sets to use when building Embree's own intrinsics.
84+
# Keep this in sync with Godot's main SConstruct file.
85+
# This is only needed on MSVC, as GCC/Clang will set those defines automatically
86+
# according to compiler instruction set flags.
8387
if env["arch"] != "x86_64" or env.msvc:
84-
# Embree needs those, it will automatically use SSE2NEON in ARM
85-
env_thirdparty.Append(CPPDEFINES=["__SSE2__", "__SSE__"])
88+
# Embree needs those; it will automatically use SSE2NEON in ARM.
89+
env_thirdparty.Append(CPPDEFINES=["__SSE__", "__SSE2__"])
90+
91+
if env["arch"] == "x86_64" and env.msvc:
92+
env_thirdparty.Append(CPPDEFINES=["__SSE3__", "__SSSE3__", "__SSE4_1__", "__SSE4_2__"])
8693

8794
if env["platform"] == "web":
8895
env_thirdparty.Append(CXXFLAGS=["-msimd128"])

0 commit comments

Comments
 (0)