@@ -72,6 +72,13 @@ func TestGetArgs(t *testing.T) {
7272 modelPath := "/path/to/model"
7373 socket := "unix:///tmp/socket"
7474
75+ // Build base expected args based on architecture
76+ baseArgs := []string {"--jinja" , "-ngl" , "999" , "--metrics" }
77+ if runtime .GOARCH == "arm64" {
78+ nThreads := min (2 , runtime .NumCPU ()/ 2 )
79+ baseArgs = append (baseArgs , "--threads" , strconv .Itoa (nThreads ))
80+ }
81+
7582 tests := []struct {
7683 name string
7784 bundle types.ModelBundle
@@ -85,30 +92,24 @@ func TestGetArgs(t *testing.T) {
8592 bundle : & fakeBundle {
8693 ggufPath : modelPath ,
8794 },
88- expected : []string {
89- "--jinja" ,
90- "-ngl" , "999" ,
91- "--metrics" ,
95+ expected : append (append ([]string {}, baseArgs ... ),
9296 "--model" , modelPath ,
9397 "--host" , socket ,
9498 "--ctx-size" , "4096" ,
95- } ,
99+ ) ,
96100 },
97101 {
98102 name : "embedding mode" ,
99103 mode : inference .BackendModeEmbedding ,
100104 bundle : & fakeBundle {
101105 ggufPath : modelPath ,
102106 },
103- expected : []string {
104- "--jinja" ,
105- "-ngl" , "999" ,
106- "--metrics" ,
107+ expected : append (append ([]string {}, baseArgs ... ),
107108 "--model" , modelPath ,
108109 "--host" , socket ,
109110 "--embeddings" ,
110111 "--ctx-size" , "4096" ,
111- } ,
112+ ) ,
112113 },
113114 {
114115 name : "context size from backend config" ,
@@ -119,15 +120,12 @@ func TestGetArgs(t *testing.T) {
119120 config : & inference.BackendConfiguration {
120121 ContextSize : 1234 ,
121122 },
122- expected : []string {
123- "--jinja" ,
124- "-ngl" , "999" ,
125- "--metrics" ,
123+ expected : append (append ([]string {}, baseArgs ... ),
126124 "--model" , modelPath ,
127125 "--host" , socket ,
128126 "--embeddings" ,
129127 "--ctx-size" , "1234" , // should add this flag
130- } ,
128+ ) ,
131129 },
132130 {
133131 name : "context size from model config" ,
@@ -141,15 +139,12 @@ func TestGetArgs(t *testing.T) {
141139 config : & inference.BackendConfiguration {
142140 ContextSize : 1234 ,
143141 },
144- expected : []string {
145- "--jinja" ,
146- "-ngl" , "999" ,
147- "--metrics" ,
142+ expected : append (append ([]string {}, baseArgs ... ),
148143 "--model" , modelPath ,
149144 "--host" , socket ,
150145 "--embeddings" ,
151146 "--ctx-size" , "2096" , // model config takes precedence
152- } ,
147+ ) ,
153148 },
154149 {
155150 name : "chat template from model artifact" ,
@@ -158,15 +153,12 @@ func TestGetArgs(t *testing.T) {
158153 ggufPath : modelPath ,
159154 templatePath : "/path/to/bundle/template.jinja" ,
160155 },
161- expected : []string {
162- "--jinja" ,
163- "-ngl" , "999" ,
164- "--metrics" ,
156+ expected : append (append ([]string {}, baseArgs ... ),
165157 "--model" , modelPath ,
166158 "--host" , socket ,
167159 "--chat-template-file" , "/path/to/bundle/template.jinja" ,
168160 "--ctx-size" , "4096" ,
169- } ,
161+ ) ,
170162 },
171163 {
172164 name : "raw flags from backend config" ,
@@ -177,16 +169,13 @@ func TestGetArgs(t *testing.T) {
177169 config : & inference.BackendConfiguration {
178170 RuntimeFlags : []string {"--some" , "flag" },
179171 },
180- expected : []string {
181- "--jinja" ,
182- "-ngl" , "999" ,
183- "--metrics" ,
172+ expected : append (append ([]string {}, baseArgs ... ),
184173 "--model" , modelPath ,
185174 "--host" , socket ,
186175 "--embeddings" ,
187176 "--ctx-size" , "4096" ,
188177 "--some" , "flag" , // model config takes precedence
189- } ,
178+ ) ,
190179 },
191180 }
192181
0 commit comments