@@ -243,7 +243,7 @@ static ggml_cuda_device_info ggml_cuda_init() {
243243
244244 info.default_tensor_split [id] = total_vram;
245245 total_vram += prop.totalGlobalMem ;
246-
246+ info. devices [id]. integrated = prop. integrated ;
247247 info.devices [id].nsm = prop.multiProcessorCount ;
248248 info.devices [id].smpb = prop.sharedMemPerBlock ;
249249 info.devices [id].warp_size = prop.warpSize ;
@@ -1065,6 +1065,10 @@ static const char * ggml_backend_cuda_host_buffer_type_name(ggml_backend_buffer_
10651065 GGML_UNUSED (buft);
10661066}
10671067
1068+ static bool ggml_backend_buft_is_cuda_host (ggml_backend_buffer_type_t buft) {
1069+ return buft->iface .get_name == ggml_backend_cuda_host_buffer_type_name;
1070+ }
1071+
10681072static void ggml_backend_cuda_host_buffer_free_buffer (ggml_backend_buffer_t buffer) {
10691073 CUDA_CHECK (cudaFreeHost (buffer->context ));
10701074}
@@ -3263,7 +3267,13 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
32633267}
32643268
32653269static bool ggml_backend_cuda_device_supports_buft (ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
3266- return (ggml_backend_buft_is_cuda (buft) || ggml_backend_buft_is_cuda_split (buft)) && buft->device == dev;
3270+ ggml_backend_cuda_device_context * dev_ctx = (ggml_backend_cuda_device_context *) dev->context ;
3271+ const int integrated = ggml_cuda_info ().devices [dev_ctx->device ].integrated ;
3272+ if (integrated){
3273+ return (ggml_backend_buft_is_cuda (buft) || ggml_backend_buft_is_cuda_split (buft) ||ggml_backend_buft_is_cuda_host (buft)) && buft->device == dev;
3274+ }else {
3275+ return (ggml_backend_buft_is_cuda (buft) || ggml_backend_buft_is_cuda_split (buft)) && buft->device == dev;
3276+ }
32673277}
32683278
32693279static int64_t get_op_batch_size (const ggml_tensor * op) {
0 commit comments