From 81044d3460a0ad0bbbf5a32194fe6c281141ada8 Mon Sep 17 00:00:00 2001 From: yangxiao Date: Wed, 28 May 2025 21:29:53 +0800 Subject: [PATCH 1/2] CUDA: add a flag "GGML_CUDA_JETSON_DEVICE" for optimization(#13856) --- ggml/src/ggml-cuda/ggml-cuda.cu | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu index c442a64924303..58b56f1a29dbb 100644 --- a/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu @@ -1064,6 +1064,9 @@ static const char * ggml_backend_cuda_host_buffer_type_name(ggml_backend_buffer_ GGML_UNUSED(buft); } +static bool ggml_backend_buft_is_cuda_host(ggml_backend_buffer_type_t buft) { + return buft->iface.get_name == ggml_backend_cuda_host_buffer_type_name; +} static void ggml_backend_cuda_host_buffer_free_buffer(ggml_backend_buffer_t buffer) { CUDA_CHECK(cudaFreeHost(buffer->context)); @@ -3263,7 +3266,11 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g } static bool ggml_backend_cuda_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) { - return (ggml_backend_buft_is_cuda(buft) || ggml_backend_buft_is_cuda_split(buft)) && buft->device == dev; + if (getenv("GGML_CUDA_JETSON_DEVICE") != nullptr){ + return (ggml_backend_buft_is_cuda(buft) || ggml_backend_buft_is_cuda_split(buft)||ggml_backend_buft_is_cuda_host(buft)) && buft->device == dev; + }else{ + return (ggml_backend_buft_is_cuda(buft) || ggml_backend_buft_is_cuda_split(buft)) && buft->device == dev; + } } static int64_t get_op_batch_size(const ggml_tensor * op) { From 8f1c07e371f86f0d15bf46b17d8f4b9d5226053b Mon Sep 17 00:00:00 2001 From: yangxiao Date: Wed, 28 May 2025 21:47:08 +0800 Subject: [PATCH 2/2] CUDA: Added illustrate for the "GGML_CUDA_JETSON_DEVICE" flag in build.md --- docs/build.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/build.md b/docs/build.md index c9027c0b580a5..024d9155b9787 100644 --- a/docs/build.md +++ b/docs/build.md @@ -298,6 +298,8 @@ If your GPU is not officially supported you can use the environment variable [`H On Linux it is possible to use unified memory architecture (UMA) to share main memory between the CPU and integrated GPU by setting environment variable `GGML_CUDA_ENABLE_UNIFIED_MEMORY=1`. However, this hurts performance for non-integrated GPUs (but enables working with integrated GPUs). +When using NVIDIA Jetson series devices which have integrated GPU, setting environment variable `GGML_CUDA_JETSON_DEVICE=1` to optimize for the Unified Memory Architecture + ## Vulkan **Windows**