@@ -274,6 +274,38 @@ def define_xnnpack():
274274 ],
275275 )
276276
277+ SSE2_FMA_COMPILER_FLAGS = [
278+ "-msse2" ,
279+ "-mno-sse3" ,
280+ ]
281+
282+ native .cxx_library (
283+ name = "ukernels_sse2fma" ,
284+ srcs = select ({
285+ "DEFAULT" : prod_srcs_for_arch_wrapper ("sse2fma" ),
286+ "ovr_config//cpu:arm32" : DEFAULT_DUMMY_SRC ,
287+ "ovr_config//cpu:arm64" : DEFAULT_DUMMY_SRC ,
288+ }),
289+ headers = get_xnnpack_headers (),
290+ header_namespace = "" ,
291+ compiler_flags = [
292+ "-O2" ,
293+ "-Wno-error=missing-braces" , # required since the SGX toolchain does not have this by default
294+ ] + select ({
295+ "DEFAULT" : SSE2_FMA_COMPILER_FLAGS ,
296+ "ovr_config//cpu:arm32" : [],
297+ "ovr_config//cpu:arm64" : [],
298+ }),
299+ preferred_linkage = "static" ,
300+ preprocessor_flags = [
301+ "-DXNN_LOG_LEVEL=0" ,
302+ ],
303+ exported_deps = [
304+ ":FP16" ,
305+ ":interface" ,
306+ ],
307+ )
308+
277309 SSE3_COMPILER_FLAGS = ["-mssse3" ]
278310
279311 # @lint-ignore BUCKLINT: native and fb_native are explicitly forbidden in fbcode.
@@ -961,6 +993,44 @@ def define_xnnpack():
961993 ],
962994 )
963995
996+ AMD64_COMPILER_FLAGS = [
997+ "-mf16c" ,
998+ "-mfma" ,
999+ "-mavx512f" ,
1000+ "-mavx512cd" ,
1001+ "-mavx512bw" ,
1002+ "-mavx512dq" ,
1003+ "-mavx512vl" ,
1004+ "-mavx512vnni" ,
1005+ "-mgfni" ,
1006+ ]
1007+ native .cxx_library (
1008+ name = "ukernels_amd64" ,
1009+ srcs = select ({
1010+ "DEFAULT" : prod_srcs_for_arch_wrapper ("amd64" ),
1011+ "ovr_config//cpu:arm32" : DEFAULT_DUMMY_SRC ,
1012+ "ovr_config//cpu:arm64" : DEFAULT_DUMMY_SRC ,
1013+ }),
1014+ headers = get_xnnpack_headers (),
1015+ header_namespace = "" ,
1016+ compiler_flags = [
1017+ "-O2" ,
1018+ "-Wno-error=missing-braces" , # required since the SGX toolchain does not have this by default
1019+ ] + select ({
1020+ "DEFAULT" : AMD64_COMPILER_FLAGS ,
1021+ "ovr_config//cpu:arm32" : [],
1022+ "ovr_config//cpu:arm64" : [],
1023+ }),
1024+ preferred_linkage = "static" ,
1025+ preprocessor_flags = [
1026+ "-DXNN_LOG_LEVEL=0" ,
1027+ ],
1028+ exported_deps = [
1029+ ":FP16" ,
1030+ ":interface" ,
1031+ ],
1032+ )
1033+
9641034 AVX512VNNIGFNI_COMPILER_FLAGS = AVX512VNNI_COMPILER_FLAGS + [
9651035 "-mgfni" ,
9661036 ]
@@ -1044,12 +1114,14 @@ def define_xnnpack():
10441114 ":ukernels_fma3" ,
10451115 ":ukernels_sse" ,
10461116 ":ukernels_sse2" ,
1117+ ":ukernels_sse2fma" ,
10471118 ":ukernels_sse41" ,
10481119 ":ukernels_ssse3" ,
10491120 ":ukernels_avx512vbmi" ,
10501121 ":ukernels_avx512vnnigfni" ,
10511122 ":ukernels_avx512vnni" ,
10521123 ":ukernels_avxvnni" ,
1124+ ":ukernels_amd64" ,
10531125 ]
10541126
10551127 ARM_XNNPACK_DEPS = [
0 commit comments