6060#include < stdlib.h>
6161#include < string>
6262#include < vector>
63+ #include < cufile.h>
64+ #ifdef _WIN32
65+ #else
66+ #include < fcntl.h>
67+ #include < unistd.h>
68+ #endif
6369
6470static_assert (sizeof (half) == sizeof (ggml_fp16_t ), " wrong fp16 size" );
6571
@@ -3410,6 +3416,68 @@ static ggml_backend_feature * ggml_backend_cuda_get_features(ggml_backend_reg_t
34103416 GGML_UNUSED (reg);
34113417}
34123418
3419+ static bool ggml_backend_cuda_buffer_load_tensor (ggml_backend_buffer_t buffer, ggml_tensor * tensor, const char * path, size_t file_offset, size_t tensor_offset, size_t size) {
3420+ #ifdef _WIN32
3421+ GGML_UNUSED (buffer);
3422+ GGML_UNUSED (tensor);
3423+ GGML_UNUSED (path);
3424+ GGML_UNUSED (file_offset);
3425+ GGML_UNUSED (tensor_offset);
3426+ GGML_UNUSED (size);
3427+ return false ;
3428+ #else
3429+ static bool initialized = false ;
3430+ static bool use_cufile = false ;
3431+ if (!initialized) {
3432+ CUfileError_t err = cuFileDriverOpen ();
3433+ initialized = true ;
3434+ if (err.err != CU_FILE_SUCCESS) {
3435+ use_cufile = false ;
3436+ return false ;
3437+ }
3438+ CUfileDrvProps_t props;
3439+ err = cuFileDriverGetProperties (&props);
3440+ if (err.err != CU_FILE_SUCCESS) {
3441+ use_cufile = false ;
3442+ return false ;
3443+ }
3444+ if (props.nvfs .dcontrolflags & (1 << CU_FILE_ALLOW_COMPAT_MODE)) {
3445+ // do not use CUfile if the driver is in compatibility mode
3446+ // as we have faster mechanisms in llama-model-loader
3447+ use_cufile = false ;
3448+ return false ;
3449+ }
3450+ use_cufile = true ;
3451+ }
3452+ if (!use_cufile) {
3453+ return false ;
3454+ }
3455+ ggml_backend_cuda_buffer_context * ctx = (ggml_backend_cuda_buffer_context *)buffer->context ;
3456+ ggml_cuda_set_device (ctx->device );
3457+
3458+ int fd = open (path, O_RDONLY | O_DIRECT);
3459+ if (fd < 0 ) {
3460+ return false ;
3461+ }
3462+ CUfileDescr_t cf_descr;
3463+ CUfileHandle_t cf_handle;
3464+ memset ((void *)&cf_descr, 0 , sizeof (CUfileDescr_t));
3465+ cf_descr.handle .fd = fd;
3466+ cf_descr.type = CU_FILE_HANDLE_TYPE_OPAQUE_FD;
3467+ CUfileError_t status = cuFileHandleRegister (&cf_handle, &cf_descr);
3468+ if (status.err != CU_FILE_SUCCESS) {
3469+ return false ;
3470+ }
3471+ ssize_t ret = cuFileRead (cf_handle, (char *)tensor->data , size, file_offset, tensor_offset);
3472+ if (ret < 0 ) {
3473+ return false ;
3474+ }
3475+ cuFileHandleDeregister (cf_handle);
3476+ close (fd);
3477+ return true ;
3478+ #endif
3479+ }
3480+
34133481static void * ggml_backend_cuda_reg_get_proc_address (ggml_backend_reg_t reg, const char * name) {
34143482 GGML_UNUSED (reg);
34153483 if (strcmp (name, " ggml_backend_split_buffer_type" ) == 0 ) {
@@ -3424,6 +3492,9 @@ static void * ggml_backend_cuda_reg_get_proc_address(ggml_backend_reg_t reg, con
34243492 if (strcmp (name, " ggml_backend_get_features" ) == 0 ) {
34253493 return (void *)ggml_backend_cuda_get_features;
34263494 }
3495+ if (strcmp (name, " ggml_backend_tensor_load" ) == 0 ) {
3496+ return (void *)ggml_backend_cuda_buffer_load_tensor;
3497+ }
34273498 return nullptr ;
34283499}
34293500
0 commit comments