Use oneMath for Intel vendor

Rbiessy · Rbiessy · commit dccc9f87951a · 2025-03-04T13:46:42.000Z
diff --git a/docs/backend/SYCL.md b/docs/backend/SYCL.md
@@ -648,7 +648,7 @@ use 1 SYCL GPUs: [0] with Max compute units:512
 | Name               | Value                                 | Function                                    |
 |--------------------|---------------------------------------|---------------------------------------------|
 | GGML_SYCL          | ON (mandatory)                        | Enable build with SYCL code path.<br>FP32 path - recommended for better perforemance than FP16 on quantized model|
-| GGML_SYCL_TARGET   | INTEL *(default)* \| NVIDIA \| AMD    | Set the SYCL target device type.            |
+| GGML_SYCL_TARGET   | INTEL *(default)* \| INTEL_CPU \| INTEL_GPU \| NVIDIA \| AMD    | Set the SYCL target device type.            |
 | GGML_SYCL_DEVICE_ARCH | Optional (except for AMD)          | Set the SYCL device architecture, optional except for AMD. Setting the device architecture can improve the performance. See the table [--offload-arch](https://github.com/intel/llvm/blob/sycl/sycl/doc/design/OffloadDesign.md#--offload-arch) for a list of valid architectures. |
 | GGML_SYCL_F16      | OFF *(default)* \|ON *(optional)*     | Enable FP16 build with SYCL code path.      |
 | CMAKE_C_COMPILER   | `icx` *(Linux)*, `icx/cl` *(Windows)* | Set `icx` compiler for SYCL code path.      |
diff --git a/examples/sycl/README.md b/examples/sycl/README.md
@@ -14,7 +14,7 @@ List all SYCL devices with ID, compute capability, max work group size, ect.
 
 1. Build the llama.cpp for SYCL for the specified target *(using GGML_SYCL_TARGET)*.
 
-2. Enable oneAPI running environment *(if GGML_SYCL_TARGET is set to INTEL -default-)*
+2. Enable oneAPI running environment *(if GGML_SYCL_TARGET is set to INTEL -default-, INTEL_CPU or INTEL_GPU)*
 
 ```
 source /opt/intel/oneapi/setvars.sh
diff --git a/ggml/cmake/ggml-config.cmake.in b/ggml/cmake/ggml-config.cmake.in
@@ -78,7 +78,7 @@ if (NOT GGML_SHARED_LIB)
 
     if (GGML_SYCL)
         find_package(DNNL)
-        if (${DNNL_FOUND} AND GGML_SYCL_TARGET STREQUAL "INTEL")
+        if (${DNNL_FOUND} AND GGML_SYCL_TARGET MATCHES "INTEL.*")
             list(APPEND GGML_SYCL_INTERFACE_LINK_LIBRARIES DNNL::dnnl)
         endif()
         if (WIN32)
diff --git a/ggml/src/ggml-sycl/CMakeLists.txt b/ggml/src/ggml-sycl/CMakeLists.txt
@@ -1,6 +1,6 @@
 message(STATUS  "GGML_SYCL_TARGET=${GGML_SYCL_TARGET}")
 
-if (NOT GGML_SYCL_TARGET MATCHES "^(INTEL|NVIDIA|AMD)$")
+if (NOT GGML_SYCL_TARGET MATCHES "^(INTEL.*|NVIDIA|AMD)$")
     message(FATAL_ERROR "Invalid backend chosen, supported options are INTEL, NVIDIA, or AMD")
 endif()
 
@@ -30,8 +30,6 @@ if (GGML_SYCL_F16)
     add_compile_definitions(GGML_SYCL_F16)
 endif()
 
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing -fsycl")
-
 if (GGML_SYCL_TARGET STREQUAL "NVIDIA")
     add_compile_definitions(GGML_SYCL_WARP_SIZE=32)
 elseif (GGML_SYCL_TARGET STREQUAL "AMD")
@@ -51,44 +49,90 @@ target_sources(ggml-sycl PRIVATE ${GGML_HEADERS_SYCL} ${GGML_SOURCES_SYCL})
 find_package(DNNL)
 message("-- DNNL found:" ${DNNL_FOUND})
 
-if (GGML_SYCL_TARGET STREQUAL "INTEL")
+if (GGML_SYCL_TARGET MATCHES "INTEL.*")
     add_compile_definitions(GGML_SYCL_DNNL=${DNNL_FOUND})
 else()
     add_compile_definitions(GGML_SYCL_DNNL=0)
 endif()
 
-if (${DNNL_FOUND} AND GGML_SYCL_TARGET STREQUAL "INTEL")
+if (${DNNL_FOUND} AND GGML_SYCL_TARGET MATCHES "INTEL.*")
     target_link_libraries(ggml-sycl PRIVATE DNNL::dnnl)
 endif()
 
-if (WIN32)
-    find_package(IntelSYCL REQUIRED)
-    find_package(MKL REQUIRED)
-    target_link_libraries(ggml-sycl PRIVATE IntelSYCL::SYCL_CXX MKL::MKL MKL::MKL_SYCL)
+find_package(IntelSYCL)
+if (IntelSYCL_FOUND)
+    # Use oneAPI CMake when possible
+    target_link_libraries(ggml-sycl PRIVATE IntelSYCL::SYCL_CXX)
 else()
-    if (GGML_SYCL_TARGET STREQUAL "INTEL")
-        target_link_libraries(ggml-sycl PRIVATE sycl OpenCL mkl_core pthread m dl mkl_sycl_blas mkl_intel_ilp64 mkl_tbb_thread)
-    elseif (GGML_SYCL_TARGET STREQUAL "NVIDIA")
-        add_compile_definitions(GGML_SYCL_NVIDIA)
-        find_package(oneMath REQUIRED)
-        target_link_libraries(ggml-sycl PRIVATE sycl pthread m dl ONEMATH::onemath_blas_cublas)
-        target_compile_options(ggml-sycl PRIVATE "-fsycl-targets=nvptx64-nvidia-cuda")
-        # Disable warnings for using deprecated oneapi::mkl namespace in oneMath
-        # Using the deprecated API in oneMath is useful to have a similar API than Intel oneMKL
-        target_compile_options(ggml-sycl PRIVATE "-Wno-deprecated-declarations")
+    # Fallback to the simplest way of enabling SYCL when using intel/llvm nightly for instance
+    target_compile_options(ggml-sycl PRIVATE "-fsycl")
+    target_link_options(ggml-sycl PRIVATE "-fsycl")
+endif()
+
+target_compile_options(ggml-sycl PRIVATE "-Wno-narrowing")
+
+find_package(oneMath QUIET)
+if (NOT oneMath_FOUND)
+    message("-- oneMath not found: oneMath will be automatically downloaded")
+    # Use FetchContent to automatically pull and build oneMath
+    include(FetchContent)
+    set(BUILD_FUNCTIONAL_TESTS False)
+    set(BUILD_EXAMPLES False)
+    set(TARGET_DOMAINS blas)
+    if (GGML_SYCL_TARGET STREQUAL "NVIDIA")
+        set(ENABLE_MKLCPU_BACKEND False)
+        set(ENABLE_MKLGPU_BACKEND False)
+        set(ENABLE_CUBLAS_BACKEND True)
     elseif (GGML_SYCL_TARGET STREQUAL "AMD")
-        if (NOT GGML_SYCL_DEVICE_ARCH)
-            message(ERROR "Can't enable SYCL hip backend, GGML_SYCL_DEVICE_ARCH has not been set.")
-        endif()
-        find_package(oneMath REQUIRED)
-        target_link_libraries(ggml-sycl PRIVATE sycl pthread m dl onemath)
-        target_compile_options(ggml-sycl PRIVATE "-fsycl-targets=amdgcn-amd-amdhsa")
-        # Disable warnings for using deprecated oneapi::mkl namespace in oneMath
-        # Using the deprecated API in oneMath is useful to have a similar API than Intel oneMKL
-        target_compile_options(ggml-sycl PRIVATE "-Wno-deprecated-declarations")
+        set(ENABLE_MKLCPU_BACKEND False)
+        set(ENABLE_MKLGPU_BACKEND False)
+        set(ENABLE_ROCBLAS_BACKEND True)
     endif()
+    FetchContent_Declare(
+        ONEMATH
+        GIT_REPOSITORY https://github.com/uxlfoundation/oneMath.git
+        GIT_TAG develop
+    )
+    FetchContent_MakeAvailable(ONEMATH)
+    # Create alias to match with find_package targets name
+    function(onemath_alias target)
+        if (TARGET ${target})
+            add_library(ONEMATH::${target} ALIAS ${target})
+        endif()
+    endfunction()
+    onemath_alias(onemath_blas_mklcpu)
+    onemath_alias(onemath_blas_mklgpu)
+    onemath_alias(onemath_blas_cublas)
+    onemath_alias(onemath_blas_rocblas)
+endif()
 
-    if (GGML_SYCL_DEVICE_ARCH)
-        target_compile_options(ggml-sycl PRIVATE "-Xsycl-target-backend --offload-arch=${GGML_SYCL_DEVICE_ARCH}")
+# Below oneMath compile-time dispatching is used for better performance
+if (GGML_SYCL_TARGET STREQUAL "INTEL_CPU")
+    target_link_libraries(ggml-sycl PRIVATE ONEMATH::onemath_blas_mklcpu)
+    target_compile_definitions(ggml-sycl PRIVATE GGML_SYCL_INTEL GGML_SYCL_INTEL_CPU)
+elseif (GGML_SYCL_TARGET STREQUAL "INTEL_GPU")
+    target_link_libraries(ggml-sycl PRIVATE ONEMATH::onemath_blas_mklgpu)
+    target_compile_definitions(ggml-sycl PRIVATE GGML_SYCL_INTEL GGML_SYCL_INTEL_GPU)
+elseif (GGML_SYCL_TARGET STREQUAL "NVIDIA")
+    target_link_libraries(ggml-sycl PRIVATE ONEMATH::onemath_blas_cublas)
+    target_compile_options(ggml-sycl PRIVATE "-fsycl-targets=nvptx64-nvidia-cuda")
+    target_link_options(ggml-sycl PRIVATE "-fsycl-targets=nvptx64-nvidia-cuda")
+    target_compile_definitions(ggml-sycl PRIVATE GGML_SYCL_NVIDIA)
+elseif (GGML_SYCL_TARGET STREQUAL "AMD")
+    if (NOT GGML_SYCL_DEVICE_ARCH)
+        message(ERROR "Can't enable SYCL hip backend, GGML_SYCL_DEVICE_ARCH has not been set.")
     endif()
+    target_link_libraries(ggml-sycl PRIVATE ONEMATH::onemath_blas_rocblas)
+    target_compile_options(ggml-sycl PRIVATE "-fsycl-targets=amdgcn-amd-amdhsa")
+    target_link_options(ggml-sycl PRIVATE "-fsycl-targets=amdgcn-amd-amdhsa")
+    target_compile_definitions(ggml-sycl PRIVATE GGML_SYCL_AMD)
+else()
+    # Fallback to oneMath runtime dispatcher
+    target_link_libraries(ggml-sycl PRIVATE ONEMATH::onemath)
+    target_compile_definitions(ggml-sycl PRIVATE GGML_SYCL_GENERIC)
+endif()
+
+if (GGML_SYCL_DEVICE_ARCH)
+    target_compile_options(ggml-sycl PRIVATE -Xsycl-target-backend --offload-arch=${GGML_SYCL_DEVICE_ARCH})
+    target_link_options(ggml-sycl PRIVATE -Xsycl-target-backend --offload-arch=${GGML_SYCL_DEVICE_ARCH})
 endif()