Skip to content

Commit 0698252

Browse files
authored
[fix](kt-kernel): gate RAWINT4 behind AVX512 and avoid AVX2 build break (#1660)
1 parent 670c488 commit 0698252

File tree

2 files changed

+11
-4
lines changed

2 files changed

+11
-4
lines changed

kt-kernel/CMakeLists.txt

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,7 @@ elseif(CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LWR
268268
list(APPEND ARCH_FLAGS -mavx2 -mfma -msse3 -mf16c)
269269
endif()
270270
if(LLAMA_AVX512)
271-
list(APPEND ARCH_FLAGS -mavx512f -mavx512bw -mfma -mf16c -msse3)
271+
list(APPEND ARCH_FLAGS -mavx512f -mavx512bw -mavx512dq -mfma -mf16c -msse3)
272272
endif()
273273
if(LLAMA_AVX512_VBMI)
274274
list(APPEND ARCH_FLAGS -mavx512vbmi)
@@ -639,4 +639,3 @@ else()
639639
message(FATAL_ERROR "NUMA library not found, please install NUMA, sudo apt install libnuma-dev")
640640
endif()
641641

642-

kt-kernel/setup.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,7 @@ def find_nvcc_path() -> str | None:
292292
cmake_args += cpu_feature_flags()
293293
d = self.detect_cpu_info()
294294
print(f"Detected CPU info: {d}")
295+
cpu_mode = os.environ.get("CPUINFER_CPU_INSTRUCT", "NATIVE").upper()
295296

296297
# Vendor / feature specific toggles
297298
# AMD MoE: explicit env overrides; otherwise default ON on AMD CPU
@@ -314,11 +315,18 @@ def find_nvcc_path() -> str | None:
314315
if "AMX" in d["features"]:
315316
cmake_args.append("-DKTRANSFORMERS_CPU_USE_AMX=ON")
316317
print("-- AMX support detected; enabling (-DKTRANSFORMERS_CPU_USE_AMX=ON)")
317-
# AVX512 umbrella: explicit env overrides; else enable if AMX or AVX512 detected
318+
319+
# AVX512 umbrella (AMX/AVX512 kernels):
320+
# - If user explicitly sets CPUINFER_ENABLE_AVX512 -> honor it
321+
# - Otherwise, only auto-enable when CPU mode actually wants AVX512
322+
# (NATIVE/FANCY/AVX512). In AVX2 mode we do NOT enable this, so
323+
# RAWINT4 / K2 kernels are not compiled.
318324
if not _forward_bool_env(cmake_args, "CPUINFER_ENABLE_AVX512", "KTRANSFORMERS_CPU_USE_AMX_AVX512"):
319-
if "AMX" in d["features"] or "AVX512" in d["features"]:
325+
if cpu_mode in ("NATIVE", "FANCY", "AVX512") and ("AMX" in d["features"] or "AVX512" in d["features"]):
320326
cmake_args.append("-DKTRANSFORMERS_CPU_USE_AMX_AVX512=ON")
321327
print("-- Enabling AMX/AVX512 umbrella (-DKTRANSFORMERS_CPU_USE_AMX_AVX512=ON)")
328+
else:
329+
print(f"-- CPUINFER_CPU_INSTRUCT={cpu_mode}; not auto-enabling AMX/AVX512 umbrella")
322330

323331
# Auto-enable MOE kernel only when env explicitly turns on AMD or KML backend
324332
# (Do not enable purely on vendor auto-detection to avoid surprise behavior.)

0 commit comments

Comments
 (0)