Skip to content

Commit 9cc2486

Browse files
Add AVX2/AVX/SSE2 SIMD accelerated 1D/3D LUTS (#1687)
* Add AVX2/AVX/SSE2 accelerated pack/unpacking function templates Signed-off-by: Mark Reid <[email protected]> * Add AVX2/AVX/SSE2 accelerated Lut3D Tetrahedral implementations Signed-off-by: Mark Reid <[email protected]> * Add AVX2/AVX/SSE2 accelerated linear Lut1D implementations Signed-off-by: Mark Reid <[email protected]> * Fix a bunch of typos Signed-off-by: Mark Reid <[email protected]> * Remove USE_SSE code that is no longer needed Signed-off-by: Mark Reid <[email protected]> * Use alignas specifier Signed-off-by: Mark Reid <[email protected]> * Move x86 simd checking code to seperate file Signed-off-by: Mark Reid <[email protected]> * Fix cacheID test, compare lengths and everything but the cacheID hash Signed-off-by: Mark Reid <[email protected]> * Remove debug gather code Signed-off-by: Mark Reid <[email protected]> * fixed outBD typo Signed-off-by: Mark Reid <[email protected]> --------- Signed-off-by: Mark Reid <[email protected]> Co-authored-by: Doug Walker <[email protected]>
1 parent 84948c7 commit 9cc2486

32 files changed

+4894
-447
lines changed

CMakeLists.txt

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,18 @@ endif()
177177
option(OCIO_USE_SSE "Specify whether to enable SSE CPU performance optimizations" ON)
178178
option(OCIO_USE_OIIO_FOR_APPS "Request OIIO to build apps (ociolutimage, ocioconvert and ociodisplay), the default uses OpenEXR." OFF)
179179

180+
if ("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "(AMD64|IA64|EM64T|X86|x86_64|i386|i686)")
181+
option(OCIO_USE_SSE2 "Specify whether to enable SSE2 CPU performance optimizations" ON)
182+
option(OCIO_USE_SSE3 "Specify whether to enable SSE3 CPU performance optimizations" ON)
183+
option(OCIO_USE_SSSE3 "Specify whether to enable SSSE3 CPU performance optimizations" ON)
184+
option(OCIO_USE_SSE4 "Specify whether to enable SSE4 CPU performance optimizations" ON)
185+
option(OCIO_USE_SSE42 "Specify whether to enable SSE4.2 CPU performance optimizations" ON)
186+
option(OCIO_USE_AVX "Specify whether to enable AVX CPU performance optimizations" ON)
187+
option(OCIO_USE_AVX2 "Specify whether to enable AVX2 CPU performance optimizations" ON)
188+
option(OCIO_USE_AVX512 "Specify whether to enable AVX512 CPU performance optimizations" ON)
189+
option(OCIO_USE_F16C "Specify whether to enable F16C CPU performance optimizations" ON)
190+
set(OCIO_ARCH_X86 1)
191+
endif()
180192

181193
###############################################################################
182194
# GPU configuration
@@ -190,7 +202,6 @@ include(CheckSupportGL)
190202

191203
include(CompilerFlags)
192204

193-
194205
###############################################################################
195206
# External linking options
196207

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
# SPDX-License-Identifier: BSD-3-Clause
2+
# Copyright Contributors to the OpenColorIO Project.
3+
4+
5+
###############################################################################
6+
# Check if compiler supports X86 SIMD extensions
7+
8+
if(MSVC)
9+
# x86_64 always has SSE2
10+
if("${CMAKE_SIZEOF_VOID_P}" STREQUAL "8")
11+
set(COMPILER_SUPPORTS_SSE2 1)
12+
else()
13+
check_cxx_compiler_flag("/arch:SSE2" COMPILER_SUPPORTS_SSE2)
14+
set(OCIO_SSE2_ARGS "/arch:SSE2")
15+
endif()
16+
check_cxx_compiler_flag("/arch:AVX" COMPILER_SUPPORTS_AVX)
17+
check_cxx_compiler_flag("/arch:AVX2" COMPILER_SUPPORTS_AVX2)
18+
check_cxx_compiler_flag("/arch:AVX512" COMPILER_SUPPORTS_AVX512)
19+
# MSVC doesn't have flags for these, if AVX available assume they are too
20+
set(COMPILER_SUPPORTS_SSE42 ${COMPILER_SUPPORTS_AVX})
21+
set(COMPILER_SUPPORTS_SSE4 ${COMPILER_SUPPORTS_AVX})
22+
set(COMPILER_SUPPORTS_SSSE3 ${COMPILER_SUPPORTS_AVX})
23+
set(COMPILER_SUPPORTS_SSE3 ${COMPILER_SUPPORTS_AVX})
24+
set(COMPILER_SUPPORTS_F16C ${COMPILER_SUPPORTS_AVX})
25+
26+
set(OCIO_AVX_ARGS "/arch:AVX")
27+
set(OCIO_AVX2_ARGS "/arch:AVX2")
28+
29+
else()
30+
check_cxx_compiler_flag("-msse2" COMPILER_SUPPORTS_SSE2)
31+
check_cxx_compiler_flag("-msse3" COMPILER_SUPPORTS_SSE3)
32+
check_cxx_compiler_flag("-mssse3" COMPILER_SUPPORTS_SSSE3)
33+
check_cxx_compiler_flag("-msse4" COMPILER_SUPPORTS_SSE4)
34+
check_cxx_compiler_flag("-msse4.2" COMPILER_SUPPORTS_SSE42)
35+
check_cxx_compiler_flag("-mavx" COMPILER_SUPPORTS_AVX)
36+
check_cxx_compiler_flag("-mavx2 -mfma -mf16c" CCOMPILER_SUPPORTS_AVX2)
37+
check_cxx_compiler_flag("-mavx512f" COMPILER_SUPPORTS_AVX512)
38+
check_cxx_compiler_flag("-mf16c" COMPILER_SUPPORTS_F16C)
39+
40+
set(OCIO_SSE2_ARGS "-msse2")
41+
set(OCIO_AVX_ARGS "-mavx")
42+
set(OCIO_AVX2_ARGS "-mavx2" "-mfma")
43+
endif()
44+
45+
if(${OCIO_USE_AVX512} AND NOT ${COMPILER_SUPPORTS_AVX512})
46+
message(STATUS "OCIO_USE_AVX512 requested but compiler does not support, disabling")
47+
set(OCIO_USE_AVX512 0)
48+
endif()
49+
50+
if(${OCIO_USE_AVX2} AND NOT ${COMPILER_SUPPORTS_AVX2})
51+
message(STATUS "OCIO_USE_AVX2 requested but compiler does not support, disabling")
52+
set(OCIO_USE_AVX2 0)
53+
endif()
54+
55+
if(${OCIO_USE_AVX} AND NOT ${COMPILER_SUPPORTS_AVX})
56+
message(STATUS "OCIO_USE_AVX requested but compiler does not support, disabling")
57+
set(OCIO_USE_AVX 0)
58+
endif()
59+
60+
if(${OCIO_USE_SSE42} AND NOT ${COMPILER_SUPPORTS_SSE42})
61+
message(STATUS "OCIO_USE_SSE42 requested but compiler does not support, disabling")
62+
set(OCIO_USE_SSE42 0)
63+
endif()
64+
65+
if(${OCIO_USE_SSE4} AND NOT ${COMPILER_SUPPORTS_SSE4})
66+
message(STATUS "OCIO_USE_SSE4 requested but compiler does not support, disabling")
67+
set(OCIO_USE_SSE4 0)
68+
endif()
69+
70+
if(${OCIO_USE_SSSE3} AND NOT ${COMPILER_SUPPORTS_SSSE3})
71+
message(STATUS "OCIO_USE_SSSE3 requested but compiler does not support, disabling")
72+
set(OCIO_USE_SSSE3 0)
73+
endif()
74+
75+
if(${OCIO_USE_SSE3} AND NOT ${COMPILER_SUPPORTS_SSE3})
76+
message(STATUS "OCIO_USE_SSE3 requested but compiler does not support, disabling")
77+
set(OCIO_USE_SSE3 0)
78+
endif()
79+
80+
if(${OCIO_USE_SSE2} AND NOT ${COMPILER_SUPPORTS_SSE2})
81+
message(STATUS "OCIO_USE_SSE2 requested but compiler does not support, disabling")
82+
set(OCIO_USE_SSE2 0)
83+
endif()
84+
85+
if(${OCIO_USE_F16C} AND NOT ${COMPILER_SUPPORTS_F16C})
86+
message(STATUS "OCIO_USE_F16C requested but compiler does not support, disabling")
87+
set(OCIO_USE_F16C 0)
88+
endif()
89+
90+
if(${OCIO_USE_F16C})
91+
if(NOT MSVC)
92+
list(APPEND OCIO_SSE2_ARGS -mf16c)
93+
list(APPEND OCIO_AVX_ARGS -mf16c)
94+
list(APPEND OCIO_AVX2_ARGS -mf16c)
95+
endif()
96+
endif()

share/cmake/utils/CompilerFlags.cmake

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ set_unless_defined(CMAKE_VISIBILITY_INLINES_HIDDEN YES)
9393
###############################################################################
9494
# Define if SSE2 can be used.
9595

96+
9697
message(STATUS "")
9798
message(STATUS "Checking for SSE2 support...")
9899
include(CheckSupportSSE2)
@@ -102,6 +103,19 @@ if(NOT HAVE_SSE2)
102103
set(OCIO_USE_SSE OFF)
103104
endif(NOT HAVE_SSE2)
104105

106+
if(OCIO_USE_SSE)
107+
include(CheckSupportX86SIMD)
108+
else()
109+
set(OCIO_USE_SSE2 OFF)
110+
set(OCIO_USE_SSE3 OFF)
111+
set(OCIO_USE_SSSE3 OFF)
112+
set(OCIO_USE_SSE4 OFF)
113+
set(OCIO_USE_SSE42 OFF)
114+
set(OCIO_USE_AVX OFF)
115+
set(OCIO_USE_AVX2 OFF)
116+
set(OCIO_USE_AVX512 OFF)
117+
set(OCIO_USE_F16C OFF)
118+
endif()
105119

106120
###############################################################################
107121
# Define RPATH.

0 commit comments

Comments
 (0)