Merge pull request #184 from docker/fix-tests-on-arm

ericcurtin · web-flow · commit c733ed7ab8fa · 2025-09-29T16:32:21.000+01:00
test: update tests to support ARM architecture
diff --git a/.gitignore b/.gitignore
@@ -4,6 +4,8 @@ model-runner
 model-runner.sock
 # Default MODELS_PATH in Makefile
 models-store/
+# Default MODELS_PATH in mdltool
+model-store/
 # Directory where we store the updated llama.cpp
 updated-inference/
 vendor/
diff --git a/pkg/inference/backends/llamacpp/llamacpp_config.go b/pkg/inference/backends/llamacpp/llamacpp_config.go
@@ -25,7 +25,7 @@ func NewDefaultLlamaCppConfig() *Config {
 		// Using a thread count equal to core count results in bad performance, and there seems to be little to no gain
 		// in going beyond core_count/2.
 		if !containsArg(args, "--threads") {
-			nThreads := min(2, runtime.NumCPU()/2)
+			nThreads := max(2, runtime.NumCPU()/2)
 			args = append(args, "--threads", strconv.Itoa(nThreads))
 		}
 	}
diff --git a/pkg/inference/backends/llamacpp/llamacpp_config_test.go b/pkg/inference/backends/llamacpp/llamacpp_config_test.go
@@ -2,6 +2,7 @@ package llamacpp
 
 import (
 	"runtime"
+	"slices"
 	"strconv"
 	"testing"
 
@@ -72,6 +73,13 @@ func TestGetArgs(t *testing.T) {
 	modelPath := "/path/to/model"
 	socket := "unix:///tmp/socket"
 
+	// Build base expected args based on architecture
+	baseArgs := []string{"--jinja", "-ngl", "999", "--metrics"}
+	if runtime.GOARCH == "arm64" {
+		nThreads := max(2, runtime.NumCPU()/2)
+		baseArgs = append(baseArgs, "--threads", strconv.Itoa(nThreads))
+	}
+
 	tests := []struct {
 		name     string
 		bundle   types.ModelBundle
@@ -85,30 +93,24 @@ func TestGetArgs(t *testing.T) {
 			bundle: &fakeBundle{
 				ggufPath: modelPath,
 			},
-			expected: []string{
-				"--jinja",
-				"-ngl", "999",
-				"--metrics",
+			expected: append(slices.Clone(baseArgs),
 				"--model", modelPath,
 				"--host", socket,
 				"--ctx-size", "4096",
-			},
+			),
 		},
 		{
 			name: "embedding mode",
 			mode: inference.BackendModeEmbedding,
 			bundle: &fakeBundle{
 				ggufPath: modelPath,
 			},
-			expected: []string{
-				"--jinja",
-				"-ngl", "999",
-				"--metrics",
+			expected: append(slices.Clone(baseArgs),
 				"--model", modelPath,
 				"--host", socket,
 				"--embeddings",
 				"--ctx-size", "4096",
-			},
+			),
 		},
 		{
 			name: "context size from backend config",
@@ -119,15 +121,12 @@ func TestGetArgs(t *testing.T) {
 			config: &inference.BackendConfiguration{
 				ContextSize: 1234,
 			},
-			expected: []string{
-				"--jinja",
-				"-ngl", "999",
-				"--metrics",
+			expected: append(slices.Clone(baseArgs),
 				"--model", modelPath,
 				"--host", socket,
 				"--embeddings",
 				"--ctx-size", "1234", // should add this flag
-			},
+			),
 		},
 		{
 			name: "context size from model config",
@@ -141,15 +140,12 @@ func TestGetArgs(t *testing.T) {
 			config: &inference.BackendConfiguration{
 				ContextSize: 1234,
 			},
-			expected: []string{
-				"--jinja",
-				"-ngl", "999",
-				"--metrics",
+			expected: append(slices.Clone(baseArgs),
 				"--model", modelPath,
 				"--host", socket,
 				"--embeddings",
 				"--ctx-size", "2096", // model config takes precedence
-			},
+			),
 		},
 		{
 			name: "chat template from model artifact",
@@ -158,15 +154,12 @@ func TestGetArgs(t *testing.T) {
 				ggufPath:     modelPath,
 				templatePath: "/path/to/bundle/template.jinja",
 			},
-			expected: []string{
-				"--jinja",
-				"-ngl", "999",
-				"--metrics",
+			expected: append(slices.Clone(baseArgs),
 				"--model", modelPath,
 				"--host", socket,
 				"--chat-template-file", "/path/to/bundle/template.jinja",
 				"--ctx-size", "4096",
-			},
+			),
 		},
 		{
 			name: "raw flags from backend config",
@@ -177,16 +170,13 @@ func TestGetArgs(t *testing.T) {
 			config: &inference.BackendConfiguration{
 				RuntimeFlags: []string{"--some", "flag"},
 			},
-			expected: []string{
-				"--jinja",
-				"-ngl", "999",
-				"--metrics",
+			expected: append(slices.Clone(baseArgs),
 				"--model", modelPath,
 				"--host", socket,
 				"--embeddings",
 				"--ctx-size", "4096",
 				"--some", "flag", // model config takes precedence
-			},
+			),
 		},
 	}
 

Original file line number	Diff line number	Diff line change
`@@ -25,7 +25,7 @@ func NewDefaultLlamaCppConfig() *Config {`
`25`	`25`	`// Using a thread count equal to core count results in bad performance, and there seems to be little to no gain`
`26`	`26`	`// in going beyond core_count/2.`
`27`	`27`	`if !containsArg(args, "--threads") {`
`28`		`- nThreads := min(2, runtime.NumCPU()/2)`
	`28`	`+ nThreads := max(2, runtime.NumCPU()/2)`
`29`	`29`	`args = append(args, "--threads", strconv.Itoa(nThreads))`
`30`	`30`	`}`
`31`	`31`	`}`