@@ -3559,31 +3559,12 @@ class sycl_gpu_mgr {
35593559        int work_group_size = 0;
35603560        std::string gpus_list = "";
35613561
3562-         /*
3563-         Use all GPU with same top max compute units
3564-         */
35653562        sycl_gpu_mgr() {
35663563            detect_sycl_gpu_list_with_max_cu();
35673564            get_allow_gpus();
35683565            create_context_with_gpus();
35693566        }
35703567
3571-         /*
3572-         Use the assigned GPU as only one
3573-         */
3574-         sycl_gpu_mgr(int main_gpu_id) {
3575-             sycl::device device = dpct::dev_mgr::instance().get_device(main_gpu_id);
3576-             dpct::device_info prop;
3577-             dpct::get_device_info(prop, device);
3578-             gpus.push_back(main_gpu_id);
3579-             devices.push_back(device);
3580-             work_group_size = prop.get_max_work_group_size();
3581-             max_compute_units = prop.get_max_compute_units();
3582- 
3583-             get_allow_gpus();
3584-             create_context_with_gpus();
3585-         }
3586- 
35873568        void create_context_with_gpus() {
35883569            sycl::context ctx = sycl::context(devices);
35893570            assert(gpus.size() > 0);
@@ -3599,7 +3580,7 @@ class sycl_gpu_mgr {
35993580                gpus_list += std::to_string(gpus[i]);
36003581                gpus_list += ",";
36013582            }
3602-             if (gpus_list.length() > 1 ) {
3583+             if (gpus_list.length() > 2 ) {
36033584                gpus_list.pop_back();
36043585            }
36053586        }
@@ -3648,8 +3629,8 @@ class sycl_gpu_mgr {
36483629                if (gpus[i] == id)
36493630                    return i;
36503631            }
3651-             printf("miss to get device index by id=%d\n", id );
3652-             GGML_ASSERT(false) ;
3632+             assert(false );
3633+             return -1 ;
36533634        }
36543635
36553636        int get_next_index(int id) {
@@ -3658,7 +3639,8 @@ class sycl_gpu_mgr {
36583639                if (gpus[i] == id)
36593640                    return i;
36603641            }
3661-             GGML_ASSERT(false);
3642+             assert(false);
3643+             return -1;
36623644        }
36633645};
36643646
@@ -3667,7 +3649,6 @@ static int g_device_count = -1;
36673649static int g_all_sycl_device_count = -1;
36683650static int g_main_device = -1;
36693651static int g_main_device_id = -1;
3670- static bool g_ggml_backend_sycl_buffer_type_initialized = false;
36713652
36723653static std::array<float, GGML_SYCL_MAX_DEVICES> g_default_tensor_split = {};
36733654
@@ -13244,7 +13225,7 @@ void ggml_backend_sycl_print_sycl_devices() {
1324413225}
1324513226
1324613227void print_gpu_device_list() {
13247-     fprintf(stderr, "detect %d SYCL GPUs: [%s] with top  Max compute units:%d\n",
13228+     fprintf(stderr, "detect %d SYCL GPUs: [%s] with Max compute units:%d\n",
1324813229            g_sycl_gpu_mgr->get_gpu_count(),
1324913230            g_sycl_gpu_mgr->gpus_list.c_str(),
1325013231            g_sycl_gpu_mgr->max_compute_units);
@@ -13283,15 +13264,6 @@ void ggml_init_sycl() try {
1328313264#else
1328413265        fprintf(stderr, "%s: GGML_SYCL_F16: no\n", __func__);
1328513266#endif
13286- 
13287- /* NOT REMOVE, keep it for next optimize for XMX.
13288- #if defined(SYCL_USE_XMX)
13289-         fprintf(stderr, "%s: SYCL_USE_XMX: yes\n", __func__);
13290- #else
13291-         fprintf(stderr, "%s: SYCL_USE_XMX: no\n", __func__);
13292- #endif
13293- */
13294- 
1329513267        if (CHECK_TRY_ERROR(g_all_sycl_device_count =
1329613268                            dpct::dev_mgr::instance().device_count()) != 0) {
1329713269            initialized = true;
@@ -13300,61 +13272,68 @@ void ggml_init_sycl() try {
1330013272        }
1330113273        GGML_ASSERT(g_all_sycl_device_count <= GGML_SYCL_MAX_DEVICES);
1330213274        ggml_backend_sycl_print_sycl_devices();
13303-         if (!g_sycl_gpu_mgr) g_sycl_gpu_mgr = new sycl_gpu_mgr();
13304-         print_gpu_device_list();
13305-         initialized = true;
13306-         g_sycl_loaded = true;
13307-     }
1330813275
13276+         if (!g_sycl_gpu_mgr) g_sycl_gpu_mgr = new sycl_gpu_mgr();
1330913277
13278+         g_device_count = g_sycl_gpu_mgr->get_gpu_count();
13279+         g_work_group_size = g_sycl_gpu_mgr->work_group_size;
1331013280
13311-     g_device_count = g_sycl_gpu_mgr->get_gpu_count();
13312-     g_work_group_size = g_sycl_gpu_mgr->work_group_size;
13281+         print_gpu_device_list();
1331313282
13314-     int64_t total_vram = 0;
13283+          int64_t total_vram = 0;
1331513284
13285+ /* NOT REMOVE, keep it for next optimize for XMX.
13286+ #if defined(SYCL_USE_XMX)
13287+         fprintf(stderr, "%s: SYCL_USE_XMX: yes\n", __func__);
13288+ #else
13289+         fprintf(stderr, "%s: SYCL_USE_XMX: no\n", __func__);
13290+ #endif
13291+ */
13292+         for (int id = 0; id < GGML_SYCL_MAX_DEVICES; ++id) {
13293+             g_device_caps[id].vmm = 0;
13294+             g_device_caps[id].device_id = -1;
13295+             g_device_caps[id].cc = 0;
13296+             g_tensor_split[id] = 0;
13297+             g_default_tensor_split[id] = 0;
13298+         }
1331613299
13317-     for (int id = 0; id < GGML_SYCL_MAX_DEVICES; ++id) {
13318-         g_device_caps[id].vmm = 0;
13319-         g_device_caps[id].device_id = -1;
13320-         g_device_caps[id].cc = 0;
13321-         g_tensor_split[id] = 0;
13322-         g_default_tensor_split[id] = 0;
13323-     }
13300+         for (int i = 0; i < g_device_count; ++i) {
13301+             int device_id = g_sycl_gpu_mgr->gpus[i];
13302+             g_device_caps[i].vmm = 0;
1332413303
13325-     for (int i = 0; i < g_device_count; ++i) { 
13326-         int device_id = g_sycl_gpu_mgr->gpus[i]; 
13327-         g_device_caps[i].vmm = 0 ;
13304+             dpct::device_info prop; 
13305+             SYCL_CHECK(CHECK_TRY_ERROR(dpct::get_device_info( 
13306+                 prop, dpct::dev_mgr::instance().get_device(device_id)))) ;
1332813307
13329-         dpct::device_info prop;
13330-         SYCL_CHECK(CHECK_TRY_ERROR(dpct::get_device_info(
13331-             prop, dpct::dev_mgr::instance().get_device(device_id))));
13308+             g_default_tensor_split[i] = total_vram;
13309+             total_vram += prop.get_global_mem_size();
1333213310
13333-         g_default_tensor_split[i] = total_vram;
13334-         total_vram += prop.get_global_mem_size();
13311+             g_device_caps[i].cc =
13312+                 100 * prop.get_major_version() + 10 * prop.get_minor_version();
13313+         }
1333513314
13336-         g_device_caps[i].cc = 
13337-             100 * prop.get_major_version() + 10 * prop.get_minor_version() ;
13338-     }
13315+         for (int i = 0; i < g_device_count; ++i) { 
13316+             g_default_tensor_split[i] /= total_vram ;
13317+          }
1333913318
13340-     for (int i = 0; i < g_device_count; ++i) {
13341-         g_default_tensor_split[i] /= total_vram;
13342-     }
13319+         for (int i = 0; i < g_device_count; ++i) {
13320+             SYCL_CHECK(ggml_sycl_set_device(i));
1334313321
13344-     for (int i = 0; i < g_device_count; ++i) {
13345-         SYCL_CHECK(ggml_sycl_set_device(i));
13322+             // create sycl streams
13323+             for (int is = 0; is < MAX_STREAMS; ++is) {
13324+                 SYCL_CHECK(CHECK_TRY_ERROR(
13325+                     g_syclStreams[i][is] =
13326+                         dpct::get_current_device().create_queue(
13327+                             g_sycl_gpu_mgr->get_co_ctx(), dpct::get_current_device())));
13328+             }
1334613329
13347-         // create sycl streams
13348-         for (int is = 0; is < MAX_STREAMS; ++is) {
13349-             SYCL_CHECK(CHECK_TRY_ERROR(
13350-                 g_syclStreams[i][is] =
13351-                     dpct::get_current_device().create_queue(
13352-                         g_sycl_gpu_mgr->get_co_ctx(), dpct::get_current_device())));
13330+             const dpct::queue_ptr stream = g_syclStreams[i][0];
13331+             // create sycl handle
13332+             SYCL_CHECK(CHECK_TRY_ERROR(g_sycl_handles[i] = stream));
1335313333        }
1335413334
13355-         const dpct::queue_ptr stream = g_syclStreams[i][0];
13356-         // create sycl handle
13357-         SYCL_CHECK(CHECK_TRY_ERROR(g_sycl_handles[i] = stream));
13335+         initialized = true;
13336+         g_sycl_loaded = true;
1335813337    }
1335913338}
1336013339catch (sycl::exception const &exc) {
@@ -16753,24 +16732,22 @@ static ggml_backend_buffer_type_i ggml_backend_sycl_buffer_type_interface = {
1675316732    /* .is_host          = */ nullptr,
1675416733};
1675516734
16756- ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device_index) {
16757-     if (device_index>=g_device_count or device_index<0) {
16758-         printf("ggml_backend_sycl_buffer_type error: device_index:%d is out of range [0, %d], miss to call ggml_backend_sycl_set_single_device()\n",
16759-             device_index, g_device_count-1);
16760-         GGML_ASSERT(device_index<g_device_count);
16761-     }
16735+ ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device) {
1676216736    static struct ggml_backend_buffer_type ggml_backend_sycl_buffer_types[GGML_SYCL_MAX_DEVICES];
1676316737
16764-     if (!g_ggml_backend_sycl_buffer_type_initialized) {
16738+     static bool ggml_backend_sycl_buffer_type_initialized = false;
16739+ 
16740+     if (!ggml_backend_sycl_buffer_type_initialized) {
1676516741        for (int i = 0; i < g_device_count; i++) {
1676616742            ggml_backend_sycl_buffer_types[i] = {
1676716743                /* .iface    = */ ggml_backend_sycl_buffer_type_interface,
1676816744                /* .context  = */ new ggml_backend_sycl_buffer_type_context{i, GGML_SYCL_NAME + std::to_string(g_sycl_gpu_mgr->gpus[i])},
1676916745            };
1677016746        }
16771-         g_ggml_backend_sycl_buffer_type_initialized  = true;
16747+         ggml_backend_sycl_buffer_type_initialized  = true;
1677216748    }
16773-     return &ggml_backend_sycl_buffer_types[device_index];
16749+ 
16750+     return &ggml_backend_sycl_buffer_types[device];
1677416751}
1677516752
1677616753// sycl split buffer type
@@ -17519,17 +17496,6 @@ GGML_API GGML_CALL int ggml_backend_sycl_get_device_index(int device_id) {
1751917496    return g_sycl_gpu_mgr->get_index(device_id);
1752017497}
1752117498
17522- GGML_API GGML_CALL void ggml_backend_sycl_set_single_device(int main_gpu_id) {
17523-     GGML_ASSERT(main_gpu_id<g_all_sycl_device_count);
17524-     printf("ggml_backend_sycl_set_single_device: use single device: %d\n", main_gpu_id);
17525-     if (g_sycl_gpu_mgr) {
17526-         delete g_sycl_gpu_mgr;
17527-     }
17528-     g_sycl_gpu_mgr = new sycl_gpu_mgr(main_gpu_id);
17529-     ggml_init_sycl();
17530-     g_ggml_backend_sycl_buffer_type_initialized = false;
17531- }
17532- 
1753317499extern "C" int ggml_backend_sycl_reg_devices();
1753417500
1753517501int ggml_backend_sycl_reg_devices() {
0 commit comments