Skip to content

Commit f8a6e88

Browse files
authored
Only load supported models on new engine (ollama#11362)
* Only load supported models on new engine Verify the model is supported before trying to load * int: testcase for all library models
1 parent 35fda7b commit f8a6e88

File tree

4 files changed

+261
-0
lines changed

4 files changed

+261
-0
lines changed

integration/library_models_test.go

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
//go:build integration && library
2+
3+
package integration
4+
5+
import (
6+
"context"
7+
"log/slog"
8+
"testing"
9+
"time"
10+
11+
"github.com/ollama/ollama/api"
12+
)
13+
14+
// First run of this scenario on a target system will take a long time to download
15+
// ~1.5TB of models. Set a sufficiently large -timeout for your network speed
16+
func TestLibraryModelsGenerate(t *testing.T) {
17+
softTimeout, hardTimeout := getTimeouts(t)
18+
slog.Info("Setting timeouts", "soft", softTimeout, "hard", hardTimeout)
19+
ctx, cancel := context.WithTimeout(context.Background(), hardTimeout)
20+
defer cancel()
21+
client, _, cleanup := InitServerConnection(ctx, t)
22+
defer cleanup()
23+
24+
chatModels := libraryChatModels
25+
for _, model := range chatModels {
26+
t.Run(model, func(t *testing.T) {
27+
if time.Now().Sub(started) > softTimeout {
28+
t.Skip("skipping remaining tests to avoid excessive runtime")
29+
}
30+
if err := PullIfMissing(ctx, client, model); err != nil {
31+
t.Fatalf("pull failed %s", err)
32+
}
33+
req := api.GenerateRequest{
34+
Model: model,
35+
Prompt: "why is the sky blue?",
36+
KeepAlive: &api.Duration{Duration: 10 * time.Second},
37+
Options: map[string]interface{}{
38+
"temperature": 0.1,
39+
"seed": 123,
40+
},
41+
}
42+
anyResp := []string{"rayleigh", "scatter", "atmosphere", "nitrogen", "oxygen", "wavelength"}
43+
// Special cases
44+
if model == "duckdb-nsql" {
45+
anyResp = []string{"select", "from"}
46+
} else if model == "granite3-guardian" || model == "shieldgemma" || model == "llama-guard3" || model == "bespoke-minicheck" {
47+
anyResp = []string{"yes", "no", "safe", "unsafe"}
48+
} else if model == "openthinker" || model == "nexusraven" {
49+
anyResp = []string{"plugin", "im_sep", "components", "function call"}
50+
} else if model == "starcoder" || model == "starcoder2" || model == "magicoder" || model == "deepseek-coder" {
51+
req.Prompt = "def fibonacci():"
52+
anyResp = []string{"f(n)", "sequence", "n-1", "main()", "__main__", "while"}
53+
}
54+
DoGenerate(ctx, t, client, req, anyResp, 120*time.Second, 30*time.Second)
55+
})
56+
}
57+
}

integration/utils_test.go

Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,187 @@ var (
7272
"stablelm2:latest", // Predictions are off, crashes on small VRAM GPUs
7373
"falcon:latest",
7474
}
75+
76+
// Some library models are quite large - ensure large VRAM and sufficient disk space
77+
// before running scenarios based on this set
78+
libraryChatModels = []string{
79+
"alfred",
80+
"athene-v2",
81+
"aya-expanse",
82+
"aya",
83+
"bakllava",
84+
"bespoke-minicheck",
85+
"codebooga",
86+
"codegeex4",
87+
"codegemma",
88+
"codellama",
89+
"codeqwen",
90+
"codestral",
91+
"codeup",
92+
"cogito",
93+
"command-a",
94+
"command-r-plus",
95+
"command-r",
96+
"command-r7b-arabic",
97+
"command-r7b",
98+
"dbrx",
99+
"deepcoder",
100+
"deepscaler",
101+
"deepseek-coder-v2",
102+
"deepseek-coder",
103+
"deepseek-llm",
104+
"deepseek-r1",
105+
// "deepseek-v2.5", // requires 155 GB VRAM
106+
"deepseek-v2",
107+
// "deepseek-v3", // requires 482 GB VRAM
108+
"devstral",
109+
"dolphin-llama3",
110+
"dolphin-mistral",
111+
"dolphin-mixtral",
112+
"dolphin-phi",
113+
"dolphin3",
114+
"dolphincoder",
115+
"duckdb-nsql",
116+
"everythinglm",
117+
"exaone-deep",
118+
"exaone3.5",
119+
"falcon",
120+
"falcon2",
121+
"falcon3",
122+
"firefunction-v2",
123+
"gemma",
124+
"gemma2",
125+
"gemma3",
126+
"gemma3n",
127+
"glm4",
128+
"goliath",
129+
"granite-code",
130+
"granite3-dense",
131+
"granite3-guardian",
132+
"granite3-moe",
133+
"granite3.1-dense",
134+
"granite3.1-moe",
135+
"granite3.2-vision",
136+
"granite3.2",
137+
"granite3.3",
138+
"hermes3",
139+
"internlm2",
140+
"llama-guard3",
141+
"llama-pro",
142+
"llama2-chinese",
143+
"llama2-uncensored",
144+
"llama2",
145+
"llama3-chatqa",
146+
"llama3-gradient",
147+
"llama3-groq-tool-use",
148+
"llama3.1",
149+
"llama3.2-vision",
150+
"llama3.2",
151+
"llama3.3",
152+
"llama3",
153+
"llama4",
154+
"llava-llama3",
155+
"llava-phi3",
156+
"llava",
157+
"magicoder",
158+
"magistral",
159+
"marco-o1",
160+
"mathstral",
161+
"meditron",
162+
"medllama2",
163+
"megadolphin",
164+
"minicpm-v",
165+
"mistral-large",
166+
"mistral-nemo",
167+
"mistral-openorca",
168+
"mistral-small",
169+
"mistral-small3.1",
170+
"mistral-small3.2",
171+
"mistral",
172+
"mistrallite",
173+
"mixtral",
174+
"moondream",
175+
"nemotron-mini",
176+
"nemotron",
177+
"neural-chat",
178+
"nexusraven",
179+
"notus",
180+
"nous-hermes",
181+
"nous-hermes2-mixtral",
182+
"nous-hermes2",
183+
"nuextract",
184+
"olmo2",
185+
"open-orca-platypus2",
186+
"openchat",
187+
"opencoder",
188+
"openhermes",
189+
"openthinker",
190+
"orca-mini",
191+
"orca2",
192+
// "phi", // unreliable
193+
"phi3.5",
194+
"phi3",
195+
"phi4-mini-reasoning",
196+
"phi4-mini",
197+
"phi4-reasoning",
198+
"phi4",
199+
"phind-codellama",
200+
"qwen",
201+
"qwen2-math",
202+
"qwen2.5-coder",
203+
"qwen2.5",
204+
"qwen2.5vl",
205+
"qwen2",
206+
"qwen3:0.6b", // dense
207+
"qwen3:30b", // MOE
208+
"qwq",
209+
"r1-1776",
210+
"reader-lm",
211+
"reflection",
212+
"sailor2",
213+
"samantha-mistral",
214+
"shieldgemma",
215+
"smallthinker",
216+
"smollm",
217+
"smollm2",
218+
"solar-pro",
219+
"solar",
220+
"sqlcoder",
221+
"stable-beluga",
222+
"stable-code",
223+
"stablelm-zephyr",
224+
"stablelm2",
225+
"starcoder",
226+
"starcoder2",
227+
"starling-lm",
228+
"tinydolphin",
229+
"tinyllama",
230+
"tulu3",
231+
"vicuna",
232+
"wizard-math",
233+
"wizard-vicuna-uncensored",
234+
"wizard-vicuna",
235+
"wizardcoder",
236+
"wizardlm-uncensored",
237+
"wizardlm2",
238+
"xwinlm",
239+
"yarn-llama2",
240+
"yarn-mistral",
241+
"yi-coder",
242+
"yi",
243+
"zephyr",
244+
}
245+
libraryEmbedModels = []string{
246+
"all-minilm",
247+
"bge-large",
248+
"bge-m3",
249+
"granite-embedding",
250+
"mxbai-embed-large",
251+
"nomic-embed-text",
252+
"paraphrase-multilingual",
253+
"snowflake-arctic-embed",
254+
"snowflake-arctic-embed2",
255+
}
75256
)
76257

77258
func Init() {
@@ -313,6 +494,10 @@ func DoGenerate(ctx context.Context, t *testing.T, client *api.Client, genReq ap
313494
t.Errorf("generate stalled. Response so far:%s", buf.String())
314495
}
315496
case <-done:
497+
if genErr != nil && strings.Contains(genErr.Error(), "model requires more system memory") {
498+
slog.Warn("model is too large for the target test system", "model", genReq.Model, "error", genErr)
499+
return
500+
}
316501
require.NoError(t, genErr, "failed with %s request prompt %s ", genReq.Model, genReq.Prompt)
317502
// Verify the response contains the expected data
318503
response := buf.String()

model/models/llama/model.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package llama
22

33
import (
44
"cmp"
5+
"fmt"
56
"math"
67

78
"github.com/ollama/ollama/fs"
@@ -33,6 +34,14 @@ type Model struct {
3334
}
3435

3536
func New(c fs.Config) (model.Model, error) {
37+
// This model currently only supports the gpt2 tokenizer
38+
if c.String("tokenizer.ggml.model") == "llama" {
39+
return nil, fmt.Errorf("unsupported tokenizer: llama")
40+
}
41+
// Best effort detection of library/deepseek-coder model(s) which are incompatible
42+
if c.String("general.name") == "deepseek-ai" {
43+
return nil, fmt.Errorf("unsupported model: %s", c.String("general.name"))
44+
}
3645
m := Model{
3746
BytePairEncoding: model.NewBytePairEncoding(
3847
c.String("tokenizer.ggml.pretokenizer", `(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+`),

model/models/qwen2/model.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@ package qwen2
22

33
import (
44
"cmp"
5+
"fmt"
56
"math"
7+
"strings"
68

79
"github.com/ollama/ollama/fs"
810
"github.com/ollama/ollama/kvcache"
@@ -126,6 +128,14 @@ func (m Model) Shift(ctx ml.Context, layer int, key, shift ml.Tensor) (ml.Tensor
126128
}
127129

128130
func New(c fs.Config) (model.Model, error) {
131+
// This model currently only supports the gpt2 tokenizer
132+
if c.String("tokenizer.ggml.model") == "llama" {
133+
return nil, fmt.Errorf("unsupported tokenizer: llama")
134+
}
135+
// detect library/qwen model(s) which are incompatible
136+
if strings.HasPrefix(c.String("general.name"), "Qwen2-beta") {
137+
return nil, fmt.Errorf("unsupported model: %s", c.String("general.name"))
138+
}
129139
m := Model{
130140
Layers: make([]DecoderLayer, c.Uint("block_count")),
131141
BytePairEncoding: model.NewBytePairEncoding(

0 commit comments

Comments
 (0)