File tree Expand file tree Collapse file tree 2 files changed +12
-0
lines changed Expand file tree Collapse file tree 2 files changed +12
-0
lines changed Original file line number Diff line number Diff line change @@ -155,6 +155,9 @@ option(GGML_CUDA_NO_VMM                     "ggml: do not try to use CUDA VMM"
155155option (GGML_CUDA_FA                         "ggml: compile ggml FlashAttention CUDA kernels"   ON )
156156option (GGML_CUDA_FA_ALL_QUANTS              "ggml: compile all quants for FlashAttention"      OFF )
157157option (GGML_CUDA_GRAPHS                     "ggml: use CUDA graphs (llama.cpp only)"           ${GGML_CUDA_GRAPHS_DEFAULT} )
158+ set    (GGML_CUDA_COMPRESSION_MODE "size"  CACHE  STRING 
159+                                             "ggml: cuda link binary compression mode; requires cuda 12.8+" )
160+ set_property (CACHE  GGML_CUDA_COMPRESSION_MODE PROPERTY STRINGS  "none;speed;balance;size" )
158161
159162option (GGML_HIP                             "ggml: use HIP"                                    OFF )
160163option (GGML_HIP_GRAPHS                      "ggml: use HIP graph, experimental, slow"          OFF )
Original file line number Diff line number Diff line change @@ -102,6 +102,15 @@ if (CUDAToolkit_FOUND)
102102
103103    set (CUDA_FLAGS -use_fast_math)
104104
105+     if  (CUDAToolkit_VERSION VERSION_GREATER_EQUAL  "12.8" )
106+         # Options are: 
107+         # - none (not recommended) 
108+         # - speed (nvcc's default) 
109+         # - balance 
110+         # - size 
111+         list (APPEND  CUDA_FLAGS -compress-mode=${GGML_CUDA_COMPRESSION_MODE} )
112+     endif ()
113+ 
105114    if  (GGML_FATAL_WARNINGS)
106115        list (APPEND  CUDA_FLAGS -Werror all -warnings)
107116    endif ()
    
 
   
 
     
   
   
          
     
  
    
     
 
    
      
     
 
     
    You can’t perform that action at this time.
  
 
    
  
     
    
      
        
     
 
       
      
     
   
 
    
    
  
 
  
 
     
    
0 commit comments