4242#include < cstring>
4343#include < iostream>
4444#include < memory>
45+ #include < mutex>
4546#include < stdexcept>
4647#include < string>
4748#include < unordered_map>
@@ -273,18 +274,9 @@ static std::vector<ggml_vk_device> ggml_vk_available_devices_internal(size_t mem
273274 return results;
274275}
275276
276- // public API returns a C-style array
277- ggml_vk_device * ggml_vk_available_devices (size_t memoryRequired, size_t * count) {
278- auto devices = ggml_vk_available_devices_internal (memoryRequired);
279- *count = devices.size ();
280- if (devices.empty ()) {
281- return nullptr ;
282- }
283-
284- size_t nbytes = sizeof (ggml_vk_device) * (devices.size ());
285- auto * arr = static_cast <ggml_vk_device *>(malloc (nbytes));
286- memcpy (arr, devices.data (), nbytes);
287- return arr;
277+ static std::vector<ggml_vk_device>& ggml_vk_available_devices () {
278+ static std::vector<ggml_vk_device> devices = ggml_vk_available_devices_internal (0 );
279+ return devices;
288280}
289281
290282static void ggml_vk_filterByVendor (std::vector<ggml_vk_device>& devices, const std::string& targetVendor) {
@@ -341,7 +333,7 @@ ggml_vk_device ggml_vk_current_device() {
341333 if (!komputeManager ()->hasDevice ())
342334 return ggml_vk_device ();
343335
344- auto devices = ggml_vk_available_devices_internal ( 0 );
336+ auto devices = ggml_vk_available_devices ( );
345337 ggml_vk_filterByName (devices, komputeManager ()->physicalDevice ()->getProperties ().deviceName .data ());
346338 GGML_ASSERT (!devices.empty ());
347339 return devices.front ();
@@ -1323,17 +1315,7 @@ static void ggml_vk_cpy_f16_f32(Args&&... args) {
13231315 ggml_vk_cpy (spirv, 2 , 4 , std::forward<Args>(args)...);
13241316}
13251317
1326- static bool ggml_vk_supports_op (const struct ggml_tensor * op) {
1327- switch (op->type ) {
1328- case GGML_TYPE_F16:
1329- case GGML_TYPE_F32:
1330- case GGML_TYPE_Q4_0:
1331- case GGML_TYPE_Q4_1:
1332- break ;
1333- default :
1334- return false ;
1335- }
1336-
1318+ static bool ggml_backend_kompute_device_supports_op (ggml_backend_dev_t dev, const struct ggml_tensor * op) {
13371319 switch (op->op ) {
13381320 case GGML_OP_UNARY:
13391321 switch (ggml_get_unary_op (op)) {
@@ -1410,6 +1392,8 @@ static bool ggml_vk_supports_op(const struct ggml_tensor * op) {
14101392 ;
14111393 }
14121394 return false ;
1395+
1396+ GGML_UNUSED (dev);
14131397}
14141398
14151399static void ggml_vk_graph_compute (struct ggml_kompute_context * ctx, struct ggml_cgraph * gf) {
@@ -1458,11 +1442,6 @@ static void ggml_vk_graph_compute(struct ggml_kompute_context * ctx, struct ggml
14581442
14591443 any_commands_recorded = true ;
14601444
1461- if (!ggml_vk_supports_op (dst)) {
1462- fprintf (stderr, " %s: error: unsupported op '%s'\n " , __func__, ggml_op_desc (dst));
1463- GGML_ABORT (" unsupported op" );
1464- }
1465-
14661445 const int32_t ne00 = src0 ? src0->ne [0 ] : 0 ;
14671446 const int32_t ne01 = src0 ? src0->ne [1 ] : 0 ;
14681447 const int32_t ne02 = src0 ? src0->ne [2 ] : 0 ;
@@ -1907,25 +1886,31 @@ static ggml_backend_buffer_type_i ggml_backend_kompute_buffer_type_interface = {
19071886};
19081887
19091888ggml_backend_buffer_type_t ggml_backend_kompute_buffer_type (int device) {
1910- static std::vector<ggml_backend_buffer_type> bufts = []() {
1911- std::vector<ggml_backend_buffer_type> vec;
1912- auto devices = ggml_vk_available_devices_internal (0 );
1913- vec.reserve (devices.size ());
1914-
1915- for (const auto & dev : devices) {
1916- vec.push_back ({
1917- /* .iface = */ ggml_backend_kompute_buffer_type_interface,
1918- /* .device = */ nullptr ,
1919- /* .context = */ new ggml_backend_kompute_buffer_type_context (dev.index , dev.bufferAlignment , dev.maxAlloc )
1920- });
1889+ static std::mutex mutex;
1890+ std::lock_guard<std::mutex> lock (mutex);
1891+
1892+ auto devices = ggml_vk_available_devices ();
1893+ int32_t device_count = (int32_t ) devices.size ();
1894+ GGML_ASSERT (device < device_count);
1895+ GGML_ASSERT (devices.size () <= GGML_KOMPUTE_MAX_DEVICES);
1896+
1897+ static ggml_backend_buffer_type
1898+ ggml_backend_kompute_buffer_types[GGML_KOMPUTE_MAX_DEVICES];
1899+
1900+ static bool ggml_backend_kompute_buffer_type_initialized = false ;
1901+
1902+ if (!ggml_backend_kompute_buffer_type_initialized) {
1903+ for (int32_t i = 0 ; i < device_count; i++) {
1904+ ggml_backend_kompute_buffer_types[i] = {
1905+ /* .iface = */ ggml_backend_kompute_buffer_type_interface,
1906+ /* .device = */ ggml_backend_reg_dev_get (ggml_backend_kompute_reg (), i),
1907+ /* .context = */ new ggml_backend_kompute_buffer_type_context{ i, devices[i].bufferAlignment , devices[i].maxAlloc },
1908+ };
19211909 }
1922- return vec ;
1923- }();
1910+ ggml_backend_kompute_buffer_type_initialized = true ;
1911+ }
19241912
1925- auto it = std::find_if (bufts.begin (), bufts.end (), [device](const ggml_backend_buffer_type & t) {
1926- return device == static_cast <ggml_backend_kompute_buffer_type_context *>(t.context )->device ;
1927- });
1928- return it < bufts.end () ? &*it : nullptr ;
1913+ return &ggml_backend_kompute_buffer_types[device];
19291914}
19301915
19311916// backend
@@ -1953,16 +1938,6 @@ static ggml_status ggml_backend_kompute_graph_compute(ggml_backend_t backend, st
19531938 return GGML_STATUS_SUCCESS;
19541939}
19551940
1956- static bool ggml_backend_kompute_supports_op (ggml_backend_t backend, const struct ggml_tensor * op) {
1957- GGML_UNUSED (backend);
1958- return ggml_vk_supports_op (op);
1959- }
1960-
1961- static bool ggml_backend_kompute_supports_buft (ggml_backend_t backend, ggml_backend_buffer_type_t buft) {
1962- GGML_UNUSED (backend);
1963- return buft->iface .get_name == ggml_backend_kompute_buffer_type_get_name;
1964- }
1965-
19661941static struct ggml_backend_i kompute_backend_i = {
19671942 /* .get_name = */ ggml_backend_kompute_name,
19681943 /* .free = */ ggml_backend_kompute_free,
@@ -1991,7 +1966,7 @@ ggml_backend_t ggml_backend_kompute_init(int device) {
19911966 ggml_backend_t kompute_backend = new ggml_backend {
19921967 /* .guid = */ ggml_backend_kompute_guid (),
19931968 /* .interface = */ kompute_backend_i,
1994- /* .device = */ nullptr ,
1969+ /* .device = */ ggml_backend_reg_dev_get ( ggml_backend_kompute_reg (), device) ,
19951970 /* .context = */ s_kompute_context,
19961971 };
19971972
@@ -2001,3 +1976,167 @@ ggml_backend_t ggml_backend_kompute_init(int device) {
20011976bool ggml_backend_is_kompute (ggml_backend_t backend) {
20021977 return backend != NULL && ggml_guid_matches (backend->guid , ggml_backend_kompute_guid ());
20031978}
1979+
1980+ static size_t ggml_backend_kompute_get_device_count () {
1981+ auto devices = ggml_vk_available_devices ();
1982+ return devices.size ();
1983+ }
1984+
1985+ static void ggml_backend_kompute_get_device_description (int device, char * description, size_t description_size) {
1986+ auto devices = ggml_vk_available_devices ();
1987+ GGML_ASSERT ((size_t ) device < devices.size ());
1988+ snprintf (description, description_size, " %s" , devices[device].name );
1989+ }
1990+
1991+ static void ggml_backend_kompute_get_device_memory (int device, size_t * free, size_t * total) {
1992+ auto devices = ggml_vk_available_devices ();
1993+ GGML_ASSERT ((size_t ) device < devices.size ());
1994+ *total = devices[device].heapSize ;
1995+ *free = devices[device].heapSize ;
1996+ }
1997+
1998+ // ////////////////////////
1999+
2000+ struct ggml_backend_kompute_device_context {
2001+ int device;
2002+ std::string name;
2003+ std::string description;
2004+ };
2005+
2006+ static const char * ggml_backend_kompute_device_get_name (ggml_backend_dev_t dev) {
2007+ ggml_backend_kompute_device_context * ctx = (ggml_backend_kompute_device_context *)dev->context ;
2008+ return ctx->name .c_str ();
2009+ }
2010+
2011+ static const char * ggml_backend_kompute_device_get_description (ggml_backend_dev_t dev) {
2012+ ggml_backend_kompute_device_context * ctx = (ggml_backend_kompute_device_context *)dev->context ;
2013+ return ctx->description .c_str ();
2014+ }
2015+
2016+ static void ggml_backend_kompute_device_get_memory (ggml_backend_dev_t dev, size_t * free, size_t * total) {
2017+ ggml_backend_kompute_device_context * ctx = (ggml_backend_kompute_device_context *)dev->context ;
2018+ ggml_backend_kompute_get_device_memory (ctx->device , free, total);
2019+ }
2020+
2021+ static ggml_backend_buffer_type_t ggml_backend_kompute_device_get_buffer_type (ggml_backend_dev_t dev) {
2022+ ggml_backend_kompute_device_context * ctx = (ggml_backend_kompute_device_context *)dev->context ;
2023+ return ggml_backend_kompute_buffer_type (ctx->device );
2024+ }
2025+
2026+ static bool ggml_backend_kompute_device_supports_buft (ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
2027+ if (buft->iface .get_name != ggml_backend_kompute_buffer_type_get_name) {
2028+ return false ;
2029+ }
2030+
2031+ ggml_backend_kompute_device_context * ctx = (ggml_backend_kompute_device_context *)dev->context ;
2032+ ggml_backend_kompute_buffer_type_context * buft_ctx = (ggml_backend_kompute_buffer_type_context *)buft->context ;
2033+
2034+ return buft_ctx->device == ctx->device ;
2035+ }
2036+
2037+ static enum ggml_backend_dev_type ggml_backend_kompute_device_get_type (ggml_backend_dev_t dev) {
2038+ GGML_UNUSED (dev);
2039+ return GGML_BACKEND_DEVICE_TYPE_GPU;
2040+ }
2041+
2042+ static void ggml_backend_kompute_device_get_props (ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) {
2043+ props->name = ggml_backend_kompute_device_get_name (dev);
2044+ props->description = ggml_backend_kompute_device_get_description (dev);
2045+ props->type = ggml_backend_kompute_device_get_type (dev);
2046+ ggml_backend_kompute_device_get_memory (dev, &props->memory_free , &props->memory_total );
2047+ props->caps = {
2048+ /* async = */ false ,
2049+ /* host_buffer = */ false ,
2050+ /* .buffer_from_host_ptr = */ false ,
2051+ /* events = */ false ,
2052+ };
2053+ }
2054+
2055+ static ggml_backend_t ggml_backend_kompute_device_init (ggml_backend_dev_t dev, const char * params) {
2056+ GGML_UNUSED (params);
2057+ ggml_backend_kompute_device_context * ctx = (ggml_backend_kompute_device_context *)dev->context ;
2058+ return ggml_backend_kompute_init (ctx->device );
2059+ }
2060+
2061+ static bool ggml_backend_kompute_device_offload_op (ggml_backend_dev_t dev, const ggml_tensor * op) {
2062+ const int min_batch_size = 32 ;
2063+
2064+ return (op->ne [1 ] >= min_batch_size && op->op != GGML_OP_GET_ROWS) ||
2065+ (op->ne [2 ] >= min_batch_size && op->op == GGML_OP_MUL_MAT_ID);
2066+
2067+ GGML_UNUSED (dev);
2068+ }
2069+
2070+ static const struct ggml_backend_device_i ggml_backend_kompute_device_i = {
2071+ /* .get_name = */ ggml_backend_kompute_device_get_name,
2072+ /* .get_description = */ ggml_backend_kompute_device_get_description,
2073+ /* .get_memory = */ ggml_backend_kompute_device_get_memory,
2074+ /* .get_type = */ ggml_backend_kompute_device_get_type,
2075+ /* .get_props = */ ggml_backend_kompute_device_get_props,
2076+ /* .init_backend = */ ggml_backend_kompute_device_init,
2077+ /* .get_buffer_type = */ ggml_backend_kompute_device_get_buffer_type,
2078+ /* .get_host_buffer_type = */ NULL ,
2079+ /* .buffer_from_host_ptr = */ NULL ,
2080+ /* .supports_op = */ ggml_backend_kompute_device_supports_op,
2081+ /* .supports_buft = */ ggml_backend_kompute_device_supports_buft,
2082+ /* .offload_op = */ ggml_backend_kompute_device_offload_op,
2083+ /* .event_new = */ NULL ,
2084+ /* .event_free = */ NULL ,
2085+ /* .event_synchronize = */ NULL ,
2086+ };
2087+
2088+ static const char * ggml_backend_kompute_reg_get_name (ggml_backend_reg_t reg) {
2089+ GGML_UNUSED (reg);
2090+ return " Kompute" ;
2091+ }
2092+
2093+ static size_t ggml_backend_kompute_reg_get_device_count (ggml_backend_reg_t reg) {
2094+ GGML_UNUSED (reg);
2095+ return ggml_backend_kompute_get_device_count ();
2096+ }
2097+
2098+ static ggml_backend_dev_t ggml_backend_kompute_reg_get_device (ggml_backend_reg_t reg, size_t device) {
2099+ static std::vector<ggml_backend_dev_t > devices;
2100+
2101+ static bool initialized = false ;
2102+
2103+ {
2104+ static std::mutex mutex;
2105+ std::lock_guard<std::mutex> lock (mutex);
2106+ if (!initialized) {
2107+ for (size_t i = 0 ; i < ggml_backend_kompute_get_device_count (); i++) {
2108+ ggml_backend_kompute_device_context * ctx = new ggml_backend_kompute_device_context;
2109+ char desc[256 ];
2110+ ggml_backend_kompute_get_device_description (i, desc, sizeof (desc));
2111+ ctx->device = i;
2112+ ctx->name = " Kompute" + std::to_string (i);
2113+ ctx->description = desc;
2114+ devices.push_back (new ggml_backend_device {
2115+ /* .iface = */ ggml_backend_kompute_device_i,
2116+ /* .reg = */ reg,
2117+ /* .context = */ ctx,
2118+ });
2119+ }
2120+ initialized = true ;
2121+ }
2122+ }
2123+
2124+ GGML_ASSERT (device < devices.size ());
2125+ return devices[device];
2126+ }
2127+
2128+ static const struct ggml_backend_reg_i ggml_backend_kompute_reg_i = {
2129+ /* .get_name = */ ggml_backend_kompute_reg_get_name,
2130+ /* .get_device_count = */ ggml_backend_kompute_reg_get_device_count,
2131+ /* .get_device = */ ggml_backend_kompute_reg_get_device,
2132+ /* .get_proc_address = */ NULL ,
2133+ };
2134+
2135+ ggml_backend_reg_t ggml_backend_kompute_reg () {
2136+ static ggml_backend_reg reg = {
2137+ /* .iface = */ ggml_backend_kompute_reg_i,
2138+ /* .context = */ nullptr ,
2139+ };
2140+
2141+ return ®
2142+ }
0 commit comments