@@ -921,10 +921,33 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) {
921921    backend_ctx->program_CL_gemm  = build_program_from_source (context, device, kernel_src_CL_gemm.c_str (), compile_opts);
922922    CL_CHECK ((backend_ctx->CL_mul_mat_Ab_Bi_8x4  = clCreateKernel (backend_ctx->program_CL_gemm , " kernel_mul_mat_Ab_Bi_8x4"  , &err), err));
923923
924+     //  TODO: fixme: these sizes are hardcoded for now.
925+     //   they should be allocated based on the model's size
926+     //   and the device's max alloc size
927+     size_t  max_alloc_size;
928+     CL_CHECK (clGetDeviceInfo (device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof (size_t ), &max_alloc_size, NULL ));
929+ 
924930    //  Allocate intermediate buffers and images
925-     size_t  max_A_q_d_bytes = 311164928 ;
926-     size_t  max_A_s_d_bytes = 38895616 ;
927-     size_t  max_B_d_bytes = 45088768 ;
931+     size_t  required_A_q_d_bytes = 311164928 ;
932+     size_t  required_A_s_d_bytes = 38895616 ;
933+     size_t  required_B_d_bytes = 45088768 ;
934+ 
935+     //  Ensure buffer sizes do not exceed the maximum allocation size
936+     size_t  max_A_q_d_bytes = MIN (required_A_q_d_bytes, max_alloc_size);
937+     size_t  max_A_s_d_bytes = MIN (required_A_s_d_bytes, max_alloc_size);
938+     size_t  max_B_d_bytes   = MIN (required_B_d_bytes, max_alloc_size);
939+     if  (required_A_q_d_bytes > max_alloc_size) {
940+         GGML_LOG_WARN (" ggml_opencl: A_q_d buffer size reduced from %zu to %zu due to device limitations.\n "  ,
941+                       required_A_q_d_bytes, max_A_q_d_bytes);
942+     }
943+     if  (required_A_s_d_bytes > max_alloc_size) {
944+         GGML_LOG_WARN (" ggml_opencl: A_s_d buffer size reduced from %zu to %zu due to device limitations.\n "  ,
945+                       required_A_s_d_bytes, max_A_s_d_bytes);
946+     }
947+     if  (required_B_d_bytes > max_alloc_size) {
948+         GGML_LOG_WARN (" ggml_opencl: B_d buffer size reduced from %zu to %zu due to device limitations.\n "  ,
949+                       required_B_d_bytes, max_B_d_bytes);
950+     }
928951
929952    CL_CHECK ((backend_ctx->A_q_d_max  = clCreateBuffer (context, 0 , max_A_q_d_bytes, NULL , &err), err));
930953    CL_CHECK ((backend_ctx->A_s_d_max  = clCreateBuffer (context, 0 , max_A_s_d_bytes, NULL , &err), err));
0 commit comments