@@ -3559,12 +3559,31 @@ class sycl_gpu_mgr {
35593559 int work_group_size = 0;
35603560 std::string gpus_list = "";
35613561
3562+ /*
3563+ Use all GPU with same top max compute units
3564+ */
35623565 sycl_gpu_mgr() {
35633566 detect_sycl_gpu_list_with_max_cu();
35643567 get_allow_gpus();
35653568 create_context_with_gpus();
35663569 }
35673570
3571+ /*
3572+ Use the assigned GPU as only one
3573+ */
3574+ sycl_gpu_mgr(int main_gpu_id) {
3575+ sycl::device device = dpct::dev_mgr::instance().get_device(main_gpu_id);
3576+ dpct::device_info prop;
3577+ dpct::get_device_info(prop, device);
3578+ gpus.push_back(main_gpu_id);
3579+ devices.push_back(device);
3580+ work_group_size = prop.get_max_work_group_size();
3581+ max_compute_units = prop.get_max_compute_units();
3582+
3583+ get_allow_gpus();
3584+ create_context_with_gpus();
3585+ }
3586+
35683587 void create_context_with_gpus() {
35693588 sycl::context ctx = sycl::context(devices);
35703589 assert(gpus.size() > 0);
@@ -3580,7 +3599,7 @@ class sycl_gpu_mgr {
35803599 gpus_list += std::to_string(gpus[i]);
35813600 gpus_list += ",";
35823601 }
3583- if (gpus_list.length() > 2 ) {
3602+ if (gpus_list.length() > 1 ) {
35843603 gpus_list.pop_back();
35853604 }
35863605 }
@@ -3629,8 +3648,8 @@ class sycl_gpu_mgr {
36293648 if (gpus[i] == id)
36303649 return i;
36313650 }
3632- assert(false );
3633- return -1 ;
3651+ printf("miss to get device index by id=%d\n", id );
3652+ GGML_ASSERT(false) ;
36343653 }
36353654
36363655 int get_next_index(int id) {
@@ -3639,8 +3658,7 @@ class sycl_gpu_mgr {
36393658 if (gpus[i] == id)
36403659 return i;
36413660 }
3642- assert(false);
3643- return -1;
3661+ GGML_ASSERT(false);
36443662 }
36453663};
36463664
@@ -3649,6 +3667,7 @@ static int g_device_count = -1;
36493667static int g_all_sycl_device_count = -1;
36503668static int g_main_device = -1;
36513669static int g_main_device_id = -1;
3670+ static bool g_ggml_backend_sycl_buffer_type_initialized = false;
36523671
36533672static std::array<float, GGML_SYCL_MAX_DEVICES> g_default_tensor_split = {};
36543673
@@ -13225,7 +13244,7 @@ void ggml_backend_sycl_print_sycl_devices() {
1322513244}
1322613245
1322713246void print_gpu_device_list() {
13228- fprintf(stderr, "detect %d SYCL GPUs: [%s] with Max compute units:%d\n",
13247+ fprintf(stderr, "detect %d SYCL GPUs: [%s] with top Max compute units:%d\n",
1322913248 g_sycl_gpu_mgr->get_gpu_count(),
1323013249 g_sycl_gpu_mgr->gpus_list.c_str(),
1323113250 g_sycl_gpu_mgr->max_compute_units);
@@ -13264,6 +13283,15 @@ void ggml_init_sycl() try {
1326413283#else
1326513284 fprintf(stderr, "%s: GGML_SYCL_F16: no\n", __func__);
1326613285#endif
13286+
13287+ /* NOT REMOVE, keep it for next optimize for XMX.
13288+ #if defined(SYCL_USE_XMX)
13289+ fprintf(stderr, "%s: SYCL_USE_XMX: yes\n", __func__);
13290+ #else
13291+ fprintf(stderr, "%s: SYCL_USE_XMX: no\n", __func__);
13292+ #endif
13293+ */
13294+
1326713295 if (CHECK_TRY_ERROR(g_all_sycl_device_count =
1326813296 dpct::dev_mgr::instance().device_count()) != 0) {
1326913297 initialized = true;
@@ -13272,68 +13300,61 @@ void ggml_init_sycl() try {
1327213300 }
1327313301 GGML_ASSERT(g_all_sycl_device_count <= GGML_SYCL_MAX_DEVICES);
1327413302 ggml_backend_sycl_print_sycl_devices();
13275-
1327613303 if (!g_sycl_gpu_mgr) g_sycl_gpu_mgr = new sycl_gpu_mgr();
13304+ print_gpu_device_list();
13305+ initialized = true;
13306+ g_sycl_loaded = true;
13307+ }
1327713308
13278- g_device_count = g_sycl_gpu_mgr->get_gpu_count();
13279- g_work_group_size = g_sycl_gpu_mgr->work_group_size;
1328013309
13281- print_gpu_device_list();
1328213310
13283- int64_t total_vram = 0;
13311+ g_device_count = g_sycl_gpu_mgr->get_gpu_count();
13312+ g_work_group_size = g_sycl_gpu_mgr->work_group_size;
1328413313
13285- /* NOT REMOVE, keep it for next optimize for XMX.
13286- #if defined(SYCL_USE_XMX)
13287- fprintf(stderr, "%s: SYCL_USE_XMX: yes\n", __func__);
13288- #else
13289- fprintf(stderr, "%s: SYCL_USE_XMX: no\n", __func__);
13290- #endif
13291- */
13292- for (int id = 0; id < GGML_SYCL_MAX_DEVICES; ++id) {
13293- g_device_caps[id].vmm = 0;
13294- g_device_caps[id].device_id = -1;
13295- g_device_caps[id].cc = 0;
13296- g_tensor_split[id] = 0;
13297- g_default_tensor_split[id] = 0;
13298- }
13314+ int64_t total_vram = 0;
1329913315
13300- for (int i = 0; i < g_device_count; ++i) {
13301- int device_id = g_sycl_gpu_mgr->gpus[i];
13302- g_device_caps[i].vmm = 0;
1330313316
13304- dpct::device_info prop;
13305- SYCL_CHECK(CHECK_TRY_ERROR(dpct::get_device_info(
13306- prop, dpct::dev_mgr::instance().get_device(device_id))));
13317+ for (int id = 0; id < GGML_SYCL_MAX_DEVICES; ++id) {
13318+ g_device_caps[id].vmm = 0;
13319+ g_device_caps[id].device_id = -1;
13320+ g_device_caps[id].cc = 0;
13321+ g_tensor_split[id] = 0;
13322+ g_default_tensor_split[id] = 0;
13323+ }
1330713324
13308- g_default_tensor_split[i] = total_vram;
13309- total_vram += prop.get_global_mem_size();
13325+ for (int i = 0; i < g_device_count; ++i) {
13326+ int device_id = g_sycl_gpu_mgr->gpus[i];
13327+ g_device_caps[i].vmm = 0;
1331013328
13311- g_device_caps[i].cc =
13312- 100 * prop.get_major_version() + 10 * prop.get_minor_version();
13313- }
13329+ dpct::device_info prop;
13330+ SYCL_CHECK(CHECK_TRY_ERROR(dpct::get_device_info(
13331+ prop, dpct::dev_mgr::instance().get_device(device_id))));
1331413332
13315- for (int i = 0; i < g_device_count; ++i) {
13316- g_default_tensor_split[i] /= total_vram;
13317- }
13333+ g_default_tensor_split[i] = total_vram;
13334+ total_vram += prop.get_global_mem_size();
1331813335
13319- for (int i = 0; i < g_device_count; ++i) {
13320- SYCL_CHECK(ggml_sycl_set_device(i));
13336+ g_device_caps[i].cc =
13337+ 100 * prop.get_major_version() + 10 * prop.get_minor_version();
13338+ }
1332113339
13322- // create sycl streams
13323- for (int is = 0; is < MAX_STREAMS; ++is) {
13324- SYCL_CHECK(CHECK_TRY_ERROR(
13325- g_syclStreams[i][is] =
13326- dpct::get_current_device().create_queue(
13327- g_sycl_gpu_mgr->get_co_ctx(), dpct::get_current_device())));
13328- }
13340+ for (int i = 0; i < g_device_count; ++i) {
13341+ g_default_tensor_split[i] /= total_vram;
13342+ }
13343+
13344+ for (int i = 0; i < g_device_count; ++i) {
13345+ SYCL_CHECK(ggml_sycl_set_device(i));
1332913346
13330- const dpct::queue_ptr stream = g_syclStreams[i][0];
13331- // create sycl handle
13332- SYCL_CHECK(CHECK_TRY_ERROR(g_sycl_handles[i] = stream));
13347+ // create sycl streams
13348+ for (int is = 0; is < MAX_STREAMS; ++is) {
13349+ SYCL_CHECK(CHECK_TRY_ERROR(
13350+ g_syclStreams[i][is] =
13351+ dpct::get_current_device().create_queue(
13352+ g_sycl_gpu_mgr->get_co_ctx(), dpct::get_current_device())));
1333313353 }
1333413354
13335- initialized = true;
13336- g_sycl_loaded = true;
13355+ const dpct::queue_ptr stream = g_syclStreams[i][0];
13356+ // create sycl handle
13357+ SYCL_CHECK(CHECK_TRY_ERROR(g_sycl_handles[i] = stream));
1333713358 }
1333813359}
1333913360catch (sycl::exception const &exc) {
@@ -16732,22 +16753,24 @@ static ggml_backend_buffer_type_i ggml_backend_sycl_buffer_type_interface = {
1673216753 /* .is_host = */ nullptr,
1673316754};
1673416755
16735- ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device) {
16756+ ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device_index) {
16757+ if (device_index>=g_device_count or device_index<0) {
16758+ printf("ggml_backend_sycl_buffer_type error: device_index:%d is out of range [0, %d], miss to call ggml_backend_sycl_set_single_device()\n",
16759+ device_index, g_device_count-1);
16760+ GGML_ASSERT(device_index<g_device_count);
16761+ }
1673616762 static struct ggml_backend_buffer_type ggml_backend_sycl_buffer_types[GGML_SYCL_MAX_DEVICES];
1673716763
16738- static bool ggml_backend_sycl_buffer_type_initialized = false;
16739-
16740- if (!ggml_backend_sycl_buffer_type_initialized) {
16764+ if (!g_ggml_backend_sycl_buffer_type_initialized) {
1674116765 for (int i = 0; i < g_device_count; i++) {
1674216766 ggml_backend_sycl_buffer_types[i] = {
1674316767 /* .iface = */ ggml_backend_sycl_buffer_type_interface,
1674416768 /* .context = */ new ggml_backend_sycl_buffer_type_context{i, GGML_SYCL_NAME + std::to_string(g_sycl_gpu_mgr->gpus[i])},
1674516769 };
1674616770 }
16747- ggml_backend_sycl_buffer_type_initialized = true;
16771+ g_ggml_backend_sycl_buffer_type_initialized = true;
1674816772 }
16749-
16750- return &ggml_backend_sycl_buffer_types[device];
16773+ return &ggml_backend_sycl_buffer_types[device_index];
1675116774}
1675216775
1675316776// sycl split buffer type
@@ -17496,6 +17519,17 @@ GGML_API GGML_CALL int ggml_backend_sycl_get_device_index(int device_id) {
1749617519 return g_sycl_gpu_mgr->get_index(device_id);
1749717520}
1749817521
17522+ GGML_API GGML_CALL void ggml_backend_sycl_set_single_device(int main_gpu_id) {
17523+ GGML_ASSERT(main_gpu_id<g_all_sycl_device_count);
17524+ printf("ggml_backend_sycl_set_single_device: use single device: %d\n", main_gpu_id);
17525+ if (g_sycl_gpu_mgr) {
17526+ delete g_sycl_gpu_mgr;
17527+ }
17528+ g_sycl_gpu_mgr = new sycl_gpu_mgr(main_gpu_id);
17529+ ggml_init_sycl();
17530+ g_ggml_backend_sycl_buffer_type_initialized = false;
17531+ }
17532+
1749917533extern "C" int ggml_backend_sycl_reg_devices();
1750017534
1750117535int ggml_backend_sycl_reg_devices() {
0 commit comments