Skip to content

Commit f640fdd

Browse files
committed
sycl: Add option to set the SYCL architecture for all targets
* Convert GGML_SYCL_HIP_TARGET to the more generic GGML_SYCL_ARCH option * Document that setting GGML_SYCL_ARCH can improve the performance
1 parent 1dc04b2 commit f640fdd

File tree

3 files changed

+16
-7
lines changed

3 files changed

+16
-7
lines changed

docs/backend/SYCL.md

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -310,12 +310,14 @@ export CPLUS_INCLUDE_DIR=/path/to/oneMKL/buildWithCublas/include:$CPLUS_INCLUDE_
310310
export CPLUS_INCLUDE_DIR=/path/to/oneMKL/include:$CPLUS_INCLUDE_DIR
311311

312312
# Build LLAMA with Nvidia BLAS acceleration through SYCL
313+
# Setting GGML_SYCL_ARCH is optional but can improve performance
314+
GGML_SYCL_ARCH=sm_80 # Example architecture
313315

314316
# Option 1: Use FP32 (recommended for better performance in most cases)
315-
cmake -B build -DGGML_SYCL=ON -DGGML_SYCL_TARGET=NVIDIA -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
317+
cmake -B build -DGGML_SYCL=ON -DGGML_SYCL_TARGET=NVIDIA -DGGML_SYCL_ARCH=${GGML_SYCL_ARCH} -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
316318

317319
# Option 2: Use FP16
318-
cmake -B build -DGGML_SYCL=ON -DGGML_SYCL_TARGET=NVIDIA -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON
320+
cmake -B build -DGGML_SYCL=ON -DGGML_SYCL_TARGET=NVIDIA -DGGML_SYCL_ARCH=${GGML_SYCL_ARCH} -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON
319321

320322
# build all binary
321323
cmake --build build --config Release -j -v
@@ -333,8 +335,9 @@ export CPLUS_INCLUDE_DIR=/path/to/oneMKL/buildWithrocBLAS/include:$CPLUS_INCLUDE
333335

334336
## AMD
335337
# Use FP32, FP16 is not supported
336-
# Find your GGML_SYCL_HIP_TARGET with rocminfo, under the key 'Name:'
337-
cmake -B build -DGGML_SYCL=ON -DGGML_SYCL_TARGET=AMD -DGGML_SYCL_HIP_TARGET=${GGML_SYCL_HIP_TARGET} -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
338+
# Find your GGML_SYCL_ARCH with rocminfo, under the key 'Name:'
339+
GGML_SYCL_ARCH=gfx90a # Example architecture
340+
cmake -B build -DGGML_SYCL=ON -DGGML_SYCL_TARGET=AMD -DGGML_SYCL_ARCH=${GGML_SYCL_ARCH} -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
338341

339342
# build all binary
340343
cmake --build build --config Release -j -v
@@ -644,6 +647,7 @@ use 1 SYCL GPUs: [0] with Max compute units:512
644647
|--------------------|---------------------------------------|---------------------------------------------|
645648
| GGML_SYCL | ON (mandatory) | Enable build with SYCL code path.<br>FP32 path - recommended for better perforemance than FP16 on quantized model|
646649
| GGML_SYCL_TARGET | INTEL *(default)* \| NVIDIA \| AMD | Set the SYCL target device type. |
650+
| GGML_SYCL_ARCH | "" | Set the SYCL target architecture, optional except for AMD. Setting the architecture can improve the performance. See the table [here](https://github.com/intel/llvm/blob/sycl/sycl/doc/design/OffloadDesign.md#--offload-arch) for a list of valid architectures. |
647651
| GGML_SYCL_F16 | OFF *(default)* \|ON *(optional)* | Enable FP16 build with SYCL code path. |
648652
| CMAKE_C_COMPILER | `icx` *(Linux)*, `icx/cl` *(Windows)* | Set `icx` compiler for SYCL code path. |
649653
| CMAKE_CXX_COMPILER | `icpx` *(Linux)*, `icx` *(Windows)* | Set `icpx/icx` compiler for SYCL code path. |

ggml/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,7 @@ option(GGML_SYCL "ggml: use SYCL"
166166
option(GGML_SYCL_F16 "ggml: use 16 bit floats for sycl calculations" OFF)
167167
set (GGML_SYCL_TARGET "INTEL" CACHE STRING
168168
"ggml: sycl target device")
169+
set (GGML_SYCL_ARCH "" CACHE STRING "ggml: sycl architecture")
169170

170171
# extra artifacts
171172
option(GGML_BUILD_TESTS "ggml: build tests" ${GGML_STANDALONE})

ggml/src/CMakeLists.txt

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -608,12 +608,16 @@ if (GGML_SYCL)
608608
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=nvptx64-nvidia-cuda")
609609
list(APPEND GGML_EXTRA_LIBS_PRIVATE sycl pthread m dl onemkl)
610610
elseif (GGML_SYCL_TARGET STREQUAL "AMD")
611-
if (GGML_SYCL_HIP_TARGET STREQUAL "")
612-
message(ERROR "Can't enable SYCL hip backend, GGML_SYCL_HIP_TARGET has not been set.")
611+
if (NOT GGML_SYCL_ARCH)
612+
message(ERROR "Can't enable SYCL hip backend, GGML_SYCL_ARCH has not been set.")
613613
endif()
614-
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=${GGML_SYCL_HIP_TARGET}")
614+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=amdgcn-amd-amdhsa")
615615
list(APPEND GGML_EXTRA_LIBS_PRIVATE sycl pthread m dl onemkl)
616616
endif()
617+
618+
if (GGML_SYCL_ARCH)
619+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Xsycl-target-backend --offload-arch=${GGML_SYCL_ARCH}")
620+
endif()
617621
endif()
618622
endif()
619623

0 commit comments

Comments
 (0)