Skip to content

Commit ec07367

Browse files
committed
Enhance Ollama model auto-pull feature
* Fix configuration inheritance issue when default value is not specified. * Make it possible to enable the auto-pull feature only for specific model types (e.g. for chat models only). * Add the possibility to list explicit models to auto-pull at startup time. Signed-off-by: Thomas Vitale <[email protected]>
1 parent d17c072 commit ec07367

File tree

11 files changed

+163
-12
lines changed

11 files changed

+163
-12
lines changed

models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/OllamaChatModel.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -379,7 +379,7 @@ public ChatOptions getDefaultOptions() {
379379
* Pull the given model into Ollama based on the specified strategy.
380380
*/
381381
private void initializeModelIfEnabled(String model, PullModelStrategy pullModelStrategy) {
382-
if (!PullModelStrategy.NEVER.equals(pullModelStrategy)) {
382+
if (pullModelStrategy != null && !PullModelStrategy.NEVER.equals(pullModelStrategy)) {
383383
this.modelManager.pullModel(model, pullModelStrategy);
384384
}
385385
}

models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/OllamaEmbeddingModel.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ private EmbeddingOptions buildRequestOptions(OllamaApi.EmbeddingsRequest request
163163
* Pull the given model into Ollama based on the specified strategy.
164164
*/
165165
private void initializeModelIfEnabled(String model, PullModelStrategy pullModelStrategy) {
166-
if (!PullModelStrategy.NEVER.equals(pullModelStrategy)) {
166+
if (pullModelStrategy != null && !PullModelStrategy.NEVER.equals(pullModelStrategy)) {
167167
this.modelManager.pullModel(model, pullModelStrategy);
168168
}
169169
}

models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaOptions.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -309,7 +309,7 @@ public class OllamaOptions implements FunctionCallingOptions, ChatOptions, Embed
309309
* Strategy for pulling models at run-time.
310310
*/
311311
@JsonIgnore
312-
private PullModelStrategy pullModelStrategy = PullModelStrategy.NEVER;
312+
private PullModelStrategy pullModelStrategy;
313313

314314
public static OllamaOptions builder() {
315315
return new OllamaOptions();

models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/management/ModelManagementOptions.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,17 @@
1616
package org.springframework.ai.ollama.management;
1717

1818
import java.time.Duration;
19+
import java.util.List;
1920

2021
/**
2122
* Options for managing models in Ollama.
2223
*
2324
* @author Thomas Vitale
2425
* @since 1.0.0
2526
*/
26-
public record ModelManagementOptions(PullModelStrategy pullModelStrategy, Duration timeout, Integer maxRetries) {
27+
public record ModelManagementOptions(PullModelStrategy pullModelStrategy, List<String> additionalModels,
28+
Duration timeout, Integer maxRetries) {
2729
public static ModelManagementOptions defaults() {
28-
return new ModelManagementOptions(PullModelStrategy.NEVER, Duration.ofMinutes(5), 0);
30+
return new ModelManagementOptions(PullModelStrategy.NEVER, List.of(), Duration.ofMinutes(5), 0);
2931
}
3032
}

models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/management/OllamaModelManager.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,10 @@ public OllamaModelManager(OllamaApi ollamaApi) {
4848
public OllamaModelManager(OllamaApi ollamaApi, ModelManagementOptions options) {
4949
this.ollamaApi = ollamaApi;
5050
this.options = options;
51+
52+
if (!CollectionUtils.isEmpty(options.additionalModels())) {
53+
options.additionalModels().forEach(this::pullModel);
54+
}
5155
}
5256

5357
public boolean isModelAvailable(String modelName) {

models/spring-ai-ollama/src/test/java/org/springframework/ai/ollama/BaseOllamaIT.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import org.springframework.ai.ollama.api.OllamaApi;
44
import org.springframework.ai.ollama.management.OllamaModelManager;
55
import org.springframework.ai.ollama.management.PullModelStrategy;
6+
import org.springframework.util.StringUtils;
67
import org.testcontainers.ollama.OllamaContainer;
78

89
public class BaseOllamaIT {
@@ -31,14 +32,20 @@ public static boolean isDisabled() {
3132
return false;
3233
}
3334

35+
public static OllamaApi buildOllamaApi() {
36+
return buildOllamaApiWithModel(null);
37+
}
38+
3439
public static OllamaApi buildOllamaApiWithModel(String model) {
3540
var baseUrl = "http://localhost:11434";
3641
if (useTestcontainers) {
3742
baseUrl = ollamaContainer.getEndpoint();
3843
}
3944
var ollamaApi = new OllamaApi(baseUrl);
4045

41-
ensureModelIsPresent(ollamaApi, model);
46+
if (StringUtils.hasText(model)) {
47+
ensureModelIsPresent(ollamaApi, model);
48+
}
4249

4350
return ollamaApi;
4451
}

models/spring-ai-ollama/src/test/java/org/springframework/ai/ollama/management/OllamaModelManagerIT.java

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
import org.testcontainers.junit.jupiter.Testcontainers;
2424

2525
import java.io.IOException;
26+
import java.time.Duration;
27+
import java.util.List;
2628

2729
import static org.assertj.core.api.Assertions.assertThat;
2830

@@ -81,4 +83,21 @@ public void pullAndDeleteModel() {
8183
assertThat(isModelAvailable).isFalse();
8284
}
8385

86+
@Test
87+
public void pullAdditionalModels() {
88+
var model = "all-minilm";
89+
var isModelAvailable = modelManager.isModelAvailable(model);
90+
assertThat(isModelAvailable).isFalse();
91+
92+
new OllamaModelManager(buildOllamaApi(),
93+
new ModelManagementOptions(PullModelStrategy.WHEN_MISSING, List.of(model), Duration.ofMinutes(5), 0));
94+
95+
isModelAvailable = modelManager.isModelAvailable(model);
96+
assertThat(isModelAvailable).isTrue();
97+
98+
modelManager.deleteModel(model);
99+
isModelAvailable = modelManager.isModelAvailable(model);
100+
assertThat(isModelAvailable).isFalse();
101+
}
102+
84103
}

spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/ollama-chat.adoc

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,8 @@ Here are the properties for initializing the Ollama integration and xref:auto-pu
7272
| spring.ai.ollama.init.pull-model-strategy | Whether to pull models at startup-time and how. | `never`
7373
| spring.ai.ollama.init.timeout | How long to wait for a model to be pulled. | `5m`
7474
| spring.ai.ollama.init.max-retries | Maximum number of retries for the model pull operation. | `0`
75+
| spring.ai.ollama.init.chat.include | Include this type of models in the initialization task. | `true`
76+
| spring.ai.ollama.init.chat.additional-models | Additional models to initialize besides the ones configured via default properties. | `[]`
7577
|====
7678

7779
=== Chat Properties
@@ -188,6 +190,34 @@ spring:
188190

189191
CAUTION: The application will not complete its initialization until all the models become available in Ollama. Depending on the model size and the speed of the Internet connection, your application might be slow at starting up.
190192

193+
You can also initialize additional models at startup time, useful for those models used dynamically at runtime.
194+
195+
[source,yaml]
196+
----
197+
spring:
198+
ai:
199+
ollama:
200+
init:
201+
pull-model-strategy: always
202+
chat:
203+
additional-models:
204+
- llama3.2
205+
- qwen2.5
206+
----
207+
208+
If you want to apply the pulling strategy only to other types of models, you can exclude the chat models from the initialization task.
209+
210+
[source,yaml]
211+
----
212+
spring:
213+
ai:
214+
ollama:
215+
init:
216+
pull-model-strategy: always
217+
chat:
218+
include: false
219+
----
220+
191221
=== Pulling models at runtime
192222

193223
To enable auto-pulling of models at runtime, you can configure the `pullModelStrategy` option in your `OllamaOptions`:
@@ -205,7 +235,7 @@ ChatResponse response = chatModel.call(new Prompt(
205235

206236
You can also configure this option using the following property: `spring.ai.ollama.chat.options.pull-model-strategy=always`.
207237

208-
CAUTION: The time to process an incoming request might incur unexpected delays, waiting for the needed model to become available in Ollama. Depending on the model size and the speed of the Internet connection, your application might be slow at processing requests.
238+
CAUTION: The time to process an incoming request might incur unexpected delays, waiting for the needed model to become available in Ollama. Depending on the model size and the speed of the Internet connection, your application might be slow at processing requests. You might want to initialize these models at startup time instead, using the `spring.ai.ollama.init.chat.additional-models` property.
209239

210240
== Function Calling
211241

spring-ai-docs/src/main/antora/modules/ROOT/pages/api/embeddings/ollama-embeddings.adoc

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,8 @@ Here are the properties for initializing the Ollama integration and xref:auto-pu
7676
| spring.ai.ollama.init.pull-model-strategy | Whether to pull models at startup-time and how. | `never`
7777
| spring.ai.ollama.init.timeout | How long to wait for a model to be pulled. | `5m`
7878
| spring.ai.ollama.init.max-retries | Maximum number of retries for the model pull operation. | `0`
79+
| spring.ai.ollama.init.embedding.include | Include this type of models in the initialization task. | `true`
80+
| spring.ai.ollama.init.embedding.additional-models | Additional models to initialize besides the ones configured via default properties. | `[]`
7981
|====
8082

8183
=== Embedding Properties
@@ -190,6 +192,34 @@ spring:
190192

191193
CAUTION: The application will not complete its initialization until all the models become available in Ollama. Depending on the model size and the speed of the Internet connection, your application might be slow at starting up.
192194

195+
You can also initialize additional models at startup time, useful for those models used dynamically at runtime.
196+
197+
[source,yaml]
198+
----
199+
spring:
200+
ai:
201+
ollama:
202+
init:
203+
pull-model-strategy: always
204+
embedding:
205+
additional-models:
206+
- mxbai-embed-large
207+
- nomic-embed-text
208+
----
209+
210+
If you want to apply the pulling strategy only to other types of models, you can exclude the embedding models from the initialization task.
211+
212+
[source,yaml]
213+
----
214+
spring:
215+
ai:
216+
ollama:
217+
init:
218+
pull-model-strategy: always
219+
embedding:
220+
include: false
221+
----
222+
193223
=== Pulling models at runtime
194224

195225
To enable auto-pulling of models at runtime, you can configure the `pullModelStrategy` option in your `OllamaOptions`:
@@ -206,7 +236,7 @@ EmbeddingResponse embeddingResponse = embeddingModel
206236

207237
You can also configure this option using the following property: `spring.ai.ollama.embedding.options.pull-model-strategy=always`.
208238

209-
CAUTION: The time to process an incoming request might incur unexpected delays, waiting for the needed model to become available in Ollama. Depending on the model size and the speed of the Internet connection, your application might be slow at processing requests.
239+
CAUTION: The time to process an incoming request might incur unexpected delays, waiting for the needed model to become available in Ollama. Depending on the model size and the speed of the Internet connection, your application might be slow at processing requests. You might want to initialize these models at startup time instead, using the `spring.ai.ollama.init.embedding.additional-models` property.
210240

211241
== Sample Controller
212242

spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/ollama/OllamaAutoConfiguration.java

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import org.springframework.ai.ollama.OllamaEmbeddingModel;
2626
import org.springframework.ai.ollama.api.OllamaApi;
2727
import org.springframework.ai.ollama.management.ModelManagementOptions;
28+
import org.springframework.ai.ollama.management.PullModelStrategy;
2829
import org.springframework.beans.factory.ObjectProvider;
2930
import org.springframework.boot.autoconfigure.AutoConfiguration;
3031
import org.springframework.boot.autoconfigure.ImportAutoConfiguration;
@@ -80,14 +81,18 @@ public OllamaChatModel ollamaChatModel(OllamaApi ollamaApi, OllamaChatProperties
8081
OllamaInitializationProperties initProperties, List<FunctionCallback> toolFunctionCallbacks,
8182
FunctionCallbackContext functionCallbackContext, ObjectProvider<ObservationRegistry> observationRegistry,
8283
ObjectProvider<ChatModelObservationConvention> observationConvention) {
84+
var chatModelPullStrategy = initProperties.getChat().isInclude() ? initProperties.getPullModelStrategy()
85+
: PullModelStrategy.NEVER;
86+
8387
var chatModel = OllamaChatModel.builder()
8488
.withOllamaApi(ollamaApi)
8589
.withDefaultOptions(properties.getOptions())
8690
.withFunctionCallbackContext(functionCallbackContext)
8791
.withToolFunctionCallbacks(toolFunctionCallbacks)
8892
.withObservationRegistry(observationRegistry.getIfUnique(() -> ObservationRegistry.NOOP))
89-
.withModelManagementOptions(new ModelManagementOptions(initProperties.getPullModelStrategy(),
90-
initProperties.getTimeout(), initProperties.getMaxRetries()))
93+
.withModelManagementOptions(
94+
new ModelManagementOptions(chatModelPullStrategy, initProperties.getChat().getAdditionalModels(),
95+
initProperties.getTimeout(), initProperties.getMaxRetries()))
9196
.build();
9297

9398
observationConvention.ifAvailable(chatModel::setObservationConvention);
@@ -102,12 +107,16 @@ public OllamaChatModel ollamaChatModel(OllamaApi ollamaApi, OllamaChatProperties
102107
public OllamaEmbeddingModel ollamaEmbeddingModel(OllamaApi ollamaApi, OllamaEmbeddingProperties properties,
103108
OllamaInitializationProperties initProperties, ObjectProvider<ObservationRegistry> observationRegistry,
104109
ObjectProvider<EmbeddingModelObservationConvention> observationConvention) {
110+
var embeddingModelPullStrategy = initProperties.getEmbedding().isInclude()
111+
? initProperties.getPullModelStrategy() : PullModelStrategy.NEVER;
112+
105113
var embeddingModel = OllamaEmbeddingModel.builder()
106114
.withOllamaApi(ollamaApi)
107115
.withDefaultOptions(properties.getOptions())
108116
.withObservationRegistry(observationRegistry.getIfUnique(() -> ObservationRegistry.NOOP))
109-
.withModelManagementOptions(new ModelManagementOptions(initProperties.getPullModelStrategy(),
110-
initProperties.getTimeout(), initProperties.getMaxRetries()))
117+
.withModelManagementOptions(new ModelManagementOptions(embeddingModelPullStrategy,
118+
initProperties.getEmbedding().getAdditionalModels(), initProperties.getTimeout(),
119+
initProperties.getMaxRetries()))
111120
.build();
112121

113122
observationConvention.ifAvailable(embeddingModel::setObservationConvention);

0 commit comments

Comments
 (0)