@@ -29,7 +29,7 @@ def git_url(self) -> str:
2929        return  "https://github.com/ggerganov/llama.cpp" 
3030
3131    def  git_hash (self ) ->  str :
32-         return  "1ee9eea094fe5846c7d8d770aa7caa749d246b23 " 
32+         return  "916c83bfe7f8b08ada609c3b8e583cf5301e594b " 
3333
3434    def  setup (self ):
3535        if  options .sycl  is  None :
@@ -47,9 +47,9 @@ def setup(self):
4747
4848        self .model  =  download (
4949            self .models_dir ,
50-             "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf /resolve/main/Phi-3-mini-4k-instruct-q4 .gguf" ,
51-             "Phi-3-mini-4k-instruct-q4 .gguf" ,
52-             checksum = "fc4f45c9729874a33a527465b2ec78189a18e5726b7121182623feeae38632ace4f280617b01d4a04875acf49d263ee4 " ,
50+             "https://huggingface.co/ggml-org/DeepSeek-R1-Distill-Qwen-1.5B-Q4_0-GGUF /resolve/main/deepseek-r1-distill-qwen-1.5b-q4_0 .gguf" ,
51+             "deepseek-r1-distill-qwen-1.5b-q4_0 .gguf" ,
52+             checksum = "791f6091059b653a24924b9f2b9c3141c8f892ae13fff15725f77a2bf7f9b1b6b71c85718f1e9c0f26c2549aba44d191 " ,
5353        )
5454
5555        self .oneapi  =  get_oneapi ()
@@ -64,10 +64,11 @@ def setup(self):
6464            f"-DGGML_SYCL=ON" ,
6565            f"-DCMAKE_C_COMPILER=clang" ,
6666            f"-DCMAKE_CXX_COMPILER=clang++" ,
67-             f"-DDNNL_DIR= { self . oneapi . dnn_cmake () }  " ,
67+             f"-DDNNL_GPU_VENDOR=INTEL " ,
6868            f"-DTBB_DIR={ self .oneapi .tbb_cmake ()}  " ,
69-             f'-DCMAKE_CXX_FLAGS=-I"{ self .oneapi .mkl_include ()}  "' ,
70-             f"-DCMAKE_SHARED_LINKER_FLAGS=-L{ self .oneapi .compiler_lib ()}   -L{ self .oneapi .mkl_lib ()}  " ,
69+             f"-DDNNL_DIR={ self .oneapi .dnn_cmake ()}  " ,
70+             f"-DSYCL_COMPILER=ON" ,
71+             f"-DMKL_DIR={ self .oneapi .mkl_cmake ()}  " ,
7172        ]
7273
7374        run (configure_command , add_sycl = True )
@@ -96,14 +97,17 @@ def __init__(self, bench):
9697    def  setup (self ):
9798        self .benchmark_bin  =  os .path .join (self .bench .build_path , "bin" , "llama-bench" )
9899
100+     def  model (self ):
101+         return  "DeepSeek-R1-Distill-Qwen-1.5B-Q4_0.gguf" 
102+ 
99103    def  name (self ):
100-         return  f"llama.cpp" 
104+         return  f"llama.cpp  { self . model () }  " 
101105
102106    def  description (self ) ->  str :
103107        return  (
104108            "Performance testing tool for llama.cpp that measures LLM inference speed in tokens per second. " 
105109            "Runs both prompt processing (initial context processing) and text generation benchmarks with " 
106-             "different batch sizes. Higher values indicate better performance. Uses the Phi-3-mini-4k-instruct  " 
110+             f "different batch sizes. Higher values indicate better performance. Uses the { self . model () }   "
107111            "quantized model and leverages SYCL with oneDNN for acceleration." 
108112        )
109113
@@ -122,12 +126,18 @@ def run(self, env_vars) -> list[Result]:
122126            "128" ,
123127            "-p" ,
124128            "512" ,
125-             "-b" ,
126-             "128,256,512" ,
129+             "-pg" ,
130+             "0,0" ,
131+             "-sm" ,
132+             "none" ,
133+             "-ngl" ,
134+             "99" ,
127135            "--numa" ,
128136            "isolate" ,
129137            "-t" ,
130-             "56" ,  # TODO: use only as many threads as numa node 0 has cpus 
138+             "8" ,
139+             "--mmap" ,
140+             "0" ,
131141            "--model" ,
132142            f"{ self .bench .model }  " ,
133143        ]
0 commit comments