Skip to content

Commit 9372ac3

Browse files
author
Piotr Stankiewicz
committed
inference: Fix failing llama_config unit tests
Signed-off-by: Piotr Stankiewicz <piotr.stankiewicz@docker.com>
1 parent ebfa2c2 commit 9372ac3

File tree

2 files changed

+6
-1
lines changed

2 files changed

+6
-1
lines changed

pkg/gpuinfo/memory_windows.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,11 @@ import (
99
"runtime"
1010
"strconv"
1111
"strings"
12+
"time"
1213
)
1314

1415
// getVRAMSize returns total system GPU memory in bytes
15-
func getVRAMSize(ctx context.Context, modelRuntimeInstallPath string) (uint64, error) {
16+
func getVRAMSize(modelRuntimeInstallPath string) (uint64, error) {
1617
if runtime.GOARCH == "arm64" {
1718
// TODO(p1-0tr): For now, on windows/arm64, stick to the old behaviour. This will
1819
// require backend.GetRequiredMemoryForModel to return 1 as well.
@@ -21,6 +22,7 @@ func getVRAMSize(ctx context.Context, modelRuntimeInstallPath string) (uint64, e
2122

2223
nvGPUInfoBin := filepath.Join(modelRuntimeInstallPath, "com.docker.nv-gpu-info.exe")
2324

25+
ctx, _ := context.WithTimeout(context.Background(), 30*time.Second)
2426
cmd := exec.CommandContext(ctx, nvGPUInfoBin)
2527
out, err := cmd.CombinedOutput()
2628
if err != nil {

pkg/inference/backends/llamacpp/llamacpp_config_test.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ func TestGetArgs(t *testing.T) {
9191
"--metrics",
9292
"--model", modelPath,
9393
"--host", socket,
94+
"--ctx-size", "4096",
9495
},
9596
},
9697
{
@@ -106,6 +107,7 @@ func TestGetArgs(t *testing.T) {
106107
"--model", modelPath,
107108
"--host", socket,
108109
"--embeddings",
110+
"--ctx-size", "4096",
109111
},
110112
},
111113
{
@@ -165,6 +167,7 @@ func TestGetArgs(t *testing.T) {
165167
"--model", modelPath,
166168
"--host", socket,
167169
"--embeddings",
170+
"--ctx-size", "4096",
168171
"--some", "flag", // model config takes precedence
169172
},
170173
},

0 commit comments

Comments
 (0)