diff --git a/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/OllamaChatModel.java b/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/OllamaChatModel.java index 2e953246184..93edabe8c95 100644 --- a/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/OllamaChatModel.java +++ b/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/OllamaChatModel.java @@ -97,7 +97,7 @@ public OllamaChatModel(OllamaApi ollamaApi, OllamaOptions defaultOptions, this.defaultOptions = defaultOptions; this.observationRegistry = observationRegistry; this.modelManager = new OllamaModelManager(chatApi, modelManagementOptions); - initializeModelIfEnabled(defaultOptions.getModel(), modelManagementOptions.pullModelStrategy()); + initializeModel(defaultOptions.getModel(), modelManagementOptions.pullModelStrategy()); } public static Builder builder() { @@ -302,11 +302,6 @@ else if (message instanceof ToolResponseMessage toolMessage) { } OllamaOptions mergedOptions = ModelOptionsUtils.merge(runtimeOptions, this.defaultOptions, OllamaOptions.class); - mergedOptions.setPullModelStrategy(this.defaultOptions.getPullModelStrategy()); - if (runtimeOptions != null && runtimeOptions.getPullModelStrategy() != null) { - mergedOptions.setPullModelStrategy(runtimeOptions.getPullModelStrategy()); - } - // Override the model. if (!StringUtils.hasText(mergedOptions.getModel())) { throw new IllegalArgumentException("Model is not set!"); @@ -331,8 +326,6 @@ else if (message instanceof ToolResponseMessage toolMessage) { requestBuilder.withTools(this.getFunctionTools(functionsForThisRequest)); } - initializeModelIfEnabled(mergedOptions.getModel(), mergedOptions.getPullModelStrategy()); - return requestBuilder.build(); } @@ -379,7 +372,7 @@ public ChatOptions getDefaultOptions() { /** * Pull the given model into Ollama based on the specified strategy. */ - private void initializeModelIfEnabled(String model, PullModelStrategy pullModelStrategy) { + private void initializeModel(String model, PullModelStrategy pullModelStrategy) { if (pullModelStrategy != null && !PullModelStrategy.NEVER.equals(pullModelStrategy)) { this.modelManager.pullModel(model, pullModelStrategy); } diff --git a/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/OllamaEmbeddingModel.java b/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/OllamaEmbeddingModel.java index 534d1ba33bf..7034a9c035f 100644 --- a/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/OllamaEmbeddingModel.java +++ b/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/OllamaEmbeddingModel.java @@ -77,7 +77,7 @@ public OllamaEmbeddingModel(OllamaApi ollamaApi, OllamaOptions defaultOptions, this.observationRegistry = observationRegistry; this.modelManager = new OllamaModelManager(ollamaApi, modelManagementOptions); - initializeModelIfEnabled(defaultOptions.getModel(), modelManagementOptions.pullModelStrategy()); + initializeModel(defaultOptions.getModel(), modelManagementOptions.pullModelStrategy()); } public static Builder builder() { @@ -139,19 +139,12 @@ OllamaApi.EmbeddingsRequest ollamaEmbeddingRequest(List inputContent, Em OllamaOptions mergedOptions = ModelOptionsUtils.merge(runtimeOptions, this.defaultOptions, OllamaOptions.class); - mergedOptions.setPullModelStrategy(this.defaultOptions.getPullModelStrategy()); - if (runtimeOptions != null && runtimeOptions.getPullModelStrategy() != null) { - mergedOptions.setPullModelStrategy(runtimeOptions.getPullModelStrategy()); - } - // Override the model. if (!StringUtils.hasText(mergedOptions.getModel())) { throw new IllegalArgumentException("Model is not set!"); } String model = mergedOptions.getModel(); - initializeModelIfEnabled(mergedOptions.getModel(), mergedOptions.getPullModelStrategy()); - return new OllamaApi.EmbeddingsRequest(model, inputContent, DurationParser.parse(mergedOptions.getKeepAlive()), OllamaOptions.filterNonSupportedFields(mergedOptions.toMap()), mergedOptions.getTruncate()); } @@ -163,7 +156,7 @@ private EmbeddingOptions buildRequestOptions(OllamaApi.EmbeddingsRequest request /** * Pull the given model into Ollama based on the specified strategy. */ - private void initializeModelIfEnabled(String model, PullModelStrategy pullModelStrategy) { + private void initializeModel(String model, PullModelStrategy pullModelStrategy) { if (pullModelStrategy != null && !PullModelStrategy.NEVER.equals(pullModelStrategy)) { this.modelManager.pullModel(model, pullModelStrategy); } diff --git a/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaOptions.java b/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaOptions.java index f7a780668d0..a0dad31a4b0 100644 --- a/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaOptions.java +++ b/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaOptions.java @@ -28,7 +28,6 @@ import org.springframework.ai.model.ModelOptionsUtils; import org.springframework.ai.model.function.FunctionCallback; import org.springframework.ai.model.function.FunctionCallingOptions; -import org.springframework.ai.ollama.management.PullModelStrategy; import org.springframework.boot.context.properties.NestedConfigurationProperty; import org.springframework.util.Assert; @@ -303,12 +302,6 @@ public class OllamaOptions implements FunctionCallingOptions, ChatOptions, Embed @JsonIgnore private Map toolContext; - /** - * Strategy for pulling models at run-time. - */ - @JsonIgnore - private PullModelStrategy pullModelStrategy; - public static OllamaOptions builder() { return new OllamaOptions(); } @@ -521,11 +514,6 @@ public OllamaOptions withToolContext(Map toolContext) { return this; } - public OllamaOptions withPullModelStrategy(PullModelStrategy pullModelStrategy) { - this.pullModelStrategy = pullModelStrategy; - return this; - } - // ------------------- // Getters and Setters // ------------------- @@ -866,14 +854,6 @@ public void setToolContext(Map toolContext) { this.toolContext = toolContext; } - public PullModelStrategy getPullModelStrategy() { - return this.pullModelStrategy; - } - - public void setPullModelStrategy(PullModelStrategy pullModelStrategy) { - this.pullModelStrategy = pullModelStrategy; - } - /** * Convert the {@link OllamaOptions} object to a {@link Map} of key/value pairs. * @return The {@link Map} of key/value pairs. @@ -944,8 +924,7 @@ public static OllamaOptions fromOptions(OllamaOptions fromOptions) { .withFunctions(fromOptions.getFunctions()) .withProxyToolCalls(fromOptions.getProxyToolCalls()) .withFunctionCallbacks(fromOptions.getFunctionCallbacks()) - .withToolContext(fromOptions.getToolContext()) - .withPullModelStrategy(fromOptions.getPullModelStrategy()); + .withToolContext(fromOptions.getToolContext()); } // @formatter:on @@ -975,8 +954,7 @@ public boolean equals(Object o) { && Objects.equals(penalizeNewline, that.penalizeNewline) && Objects.equals(stop, that.stop) && Objects.equals(functionCallbacks, that.functionCallbacks) && Objects.equals(proxyToolCalls, that.proxyToolCalls) && Objects.equals(functions, that.functions) - && Objects.equals(toolContext, that.toolContext) - && Objects.equals(pullModelStrategy, that.pullModelStrategy); + && Objects.equals(toolContext, that.toolContext); } @Override @@ -987,7 +965,7 @@ public int hashCode() { this.topP, tfsZ, this.typicalP, this.repeatLastN, this.temperature, this.repeatPenalty, this.presencePenalty, this.frequencyPenalty, this.mirostat, this.mirostatTau, this.mirostatEta, this.penalizeNewline, this.stop, this.functionCallbacks, this.functions, this.proxyToolCalls, - this.toolContext, this.pullModelStrategy); + this.toolContext); } } diff --git a/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/management/ModelManagementOptions.java b/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/management/ModelManagementOptions.java index b49d0978cd5..5d600b14ed3 100644 --- a/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/management/ModelManagementOptions.java +++ b/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/management/ModelManagementOptions.java @@ -26,7 +26,49 @@ */ public record ModelManagementOptions(PullModelStrategy pullModelStrategy, List additionalModels, Duration timeout, Integer maxRetries) { + public static ModelManagementOptions defaults() { return new ModelManagementOptions(PullModelStrategy.NEVER, List.of(), Duration.ofMinutes(5), 0); } + + public static Builder builder() { + return new Builder(); + } + + public static class Builder { + + private PullModelStrategy pullModelStrategy = PullModelStrategy.NEVER; + + private List additionalModels = List.of(); + + private Duration timeout = Duration.ofMinutes(5); + + private Integer maxRetries = 0; + + public Builder withPullModelStrategy(PullModelStrategy pullModelStrategy) { + this.pullModelStrategy = pullModelStrategy; + return this; + } + + public Builder withAdditionalModels(List additionalModels) { + this.additionalModels = additionalModels; + return this; + } + + public Builder withTimeout(Duration timeout) { + this.timeout = timeout; + return this; + } + + public Builder withMaxRetries(Integer maxRetries) { + this.maxRetries = maxRetries; + return this; + } + + public ModelManagementOptions build() { + return new ModelManagementOptions(pullModelStrategy, additionalModels, timeout, maxRetries); + } + + } + } diff --git a/models/spring-ai-ollama/src/test/java/org/springframework/ai/ollama/OllamaChatModelIT.java b/models/spring-ai-ollama/src/test/java/org/springframework/ai/ollama/OllamaChatModelIT.java index 3b23f39b123..4b2fac29e55 100644 --- a/models/spring-ai-ollama/src/test/java/org/springframework/ai/ollama/OllamaChatModelIT.java +++ b/models/spring-ai-ollama/src/test/java/org/springframework/ai/ollama/OllamaChatModelIT.java @@ -33,6 +33,7 @@ import org.springframework.ai.converter.MapOutputConverter; import org.springframework.ai.ollama.api.OllamaApi; import org.springframework.ai.ollama.api.OllamaModel; +import org.springframework.ai.ollama.management.ModelManagementOptions; import org.springframework.ai.ollama.management.OllamaModelManager; import org.springframework.ai.ollama.api.OllamaOptions; import org.springframework.ai.ollama.management.PullModelStrategy; @@ -56,6 +57,8 @@ class OllamaChatModelIT extends BaseOllamaIT { private static final String MODEL = OllamaModel.LLAMA3_2.getName(); + private static final String ADDITIONAL_MODEL = "tinyllama"; + @Autowired private OllamaChatModel chatModel; @@ -65,23 +68,17 @@ class OllamaChatModelIT extends BaseOllamaIT { @Test void autoPullModelTest() { var modelManager = new OllamaModelManager(ollamaApi); - var model = "tinyllama"; - modelManager.deleteModel(model); - assertThat(modelManager.isModelAvailable(model)).isFalse(); + assertThat(modelManager.isModelAvailable(ADDITIONAL_MODEL)).isTrue(); String joke = ChatClient.create(chatModel) .prompt("Tell me a joke") - .options(OllamaOptions.builder() - .withModel(model) - .withPullModelStrategy(PullModelStrategy.WHEN_MISSING) - .build()) + .options(OllamaOptions.builder().withModel(ADDITIONAL_MODEL).build()) .call() .content(); assertThat(joke).isNotEmpty(); - assertThat(modelManager.isModelAvailable(model)).isTrue(); - modelManager.deleteModel(model); + modelManager.deleteModel(ADDITIONAL_MODEL); } @Test @@ -249,6 +246,10 @@ public OllamaChatModel ollamaChat(OllamaApi ollamaApi) { return OllamaChatModel.builder() .withOllamaApi(ollamaApi) .withDefaultOptions(OllamaOptions.create().withModel(MODEL).withTemperature(0.9)) + .withModelManagementOptions(ModelManagementOptions.builder() + .withPullModelStrategy(PullModelStrategy.WHEN_MISSING) + .withAdditionalModels(List.of(ADDITIONAL_MODEL)) + .build()) .build(); } diff --git a/models/spring-ai-ollama/src/test/java/org/springframework/ai/ollama/OllamaEmbeddingModelIT.java b/models/spring-ai-ollama/src/test/java/org/springframework/ai/ollama/OllamaEmbeddingModelIT.java index f7ce804e0e9..ae0612ac339 100644 --- a/models/spring-ai-ollama/src/test/java/org/springframework/ai/ollama/OllamaEmbeddingModelIT.java +++ b/models/spring-ai-ollama/src/test/java/org/springframework/ai/ollama/OllamaEmbeddingModelIT.java @@ -21,6 +21,7 @@ import org.springframework.ai.embedding.EmbeddingResponse; import org.springframework.ai.ollama.api.OllamaApi; import org.springframework.ai.ollama.api.OllamaModel; +import org.springframework.ai.ollama.management.ModelManagementOptions; import org.springframework.ai.ollama.management.OllamaModelManager; import org.springframework.ai.ollama.api.OllamaOptions; import org.springframework.ai.ollama.management.PullModelStrategy; @@ -41,6 +42,8 @@ class OllamaEmbeddingModelIT extends BaseOllamaIT { private static final String MODEL = OllamaModel.NOMIC_EMBED_TEXT.getName(); + private static final String ADDITIONAL_MODEL = "all-minilm"; + @Autowired private OllamaEmbeddingModel embeddingModel; @@ -65,36 +68,29 @@ void embeddings() { } @Test - void autoPullModel() { + void autoPullModelAtStartupTime() { var model = "all-minilm"; assertThat(embeddingModel).isNotNull(); var modelManager = new OllamaModelManager(ollamaApi); - modelManager.deleteModel(model); - assertThat(modelManager.isModelAvailable(model)).isFalse(); + assertThat(modelManager.isModelAvailable(ADDITIONAL_MODEL)).isTrue(); EmbeddingResponse embeddingResponse = embeddingModel .call(new EmbeddingRequest(List.of("Hello World", "Something else"), - OllamaOptions.builder() - .withModel(model) - .withPullModelStrategy(PullModelStrategy.WHEN_MISSING) - .withTruncate(false) - .build())); - - assertThat(modelManager.isModelAvailable(model)).isTrue(); + OllamaOptions.builder().withModel(model).withTruncate(false).build())); assertThat(embeddingResponse.getResults()).hasSize(2); assertThat(embeddingResponse.getResults().get(0).getIndex()).isEqualTo(0); assertThat(embeddingResponse.getResults().get(0).getOutput()).isNotEmpty(); assertThat(embeddingResponse.getResults().get(1).getIndex()).isEqualTo(1); assertThat(embeddingResponse.getResults().get(1).getOutput()).isNotEmpty(); - assertThat(embeddingResponse.getMetadata().getModel()).contains(model); + assertThat(embeddingResponse.getMetadata().getModel()).contains(ADDITIONAL_MODEL); assertThat(embeddingResponse.getMetadata().getUsage().getPromptTokens()).isEqualTo(4); assertThat(embeddingResponse.getMetadata().getUsage().getTotalTokens()).isEqualTo(4); assertThat(embeddingModel.dimensions()).isEqualTo(768); - modelManager.deleteModel(model); + modelManager.deleteModel(ADDITIONAL_MODEL); } @SpringBootConfiguration @@ -110,6 +106,10 @@ public OllamaEmbeddingModel ollamaEmbedding(OllamaApi ollamaApi) { return OllamaEmbeddingModel.builder() .withOllamaApi(ollamaApi) .withDefaultOptions(OllamaOptions.create().withModel(MODEL)) + .withModelManagementOptions(ModelManagementOptions.builder() + .withPullModelStrategy(PullModelStrategy.WHEN_MISSING) + .withAdditionalModels(List.of(ADDITIONAL_MODEL)) + .build()) .build(); } diff --git a/models/spring-ai-ollama/src/test/java/org/springframework/ai/ollama/OllamaImage.java b/models/spring-ai-ollama/src/test/java/org/springframework/ai/ollama/OllamaImage.java index 0b57cb16e0b..8a13c29b5ca 100644 --- a/models/spring-ai-ollama/src/test/java/org/springframework/ai/ollama/OllamaImage.java +++ b/models/spring-ai-ollama/src/test/java/org/springframework/ai/ollama/OllamaImage.java @@ -22,6 +22,6 @@ */ public class OllamaImage { - public static final DockerImageName DEFAULT_IMAGE = DockerImageName.parse("ollama/ollama:0.3.13"); + public static final DockerImageName DEFAULT_IMAGE = DockerImageName.parse("ollama/ollama:0.3.14"); } diff --git a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/ollama-chat.adoc b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/ollama-chat.adoc index 76f4d188eec..d992e77defc 100644 --- a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/ollama-chat.adoc +++ b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/ollama-chat.adoc @@ -95,7 +95,6 @@ Here are the advanced request parameter for the Ollama chat model: | spring.ai.ollama.chat.options.model | The name of the https://github.com/ollama/ollama?tab=readme-ov-file#model-library[supported model] to use. | mistral | spring.ai.ollama.chat.options.format | The format to return a response in. Currently, the only accepted value is `json` | - | spring.ai.ollama.chat.options.keep_alive | Controls how long the model will stay loaded into memory following the request | 5m -| spring.ai.ollama.chat.options.pull-model-strategy | Strategy for pulling models at run-time. | `never` |==== The remaining `options` properties are based on the link:https://github.com/ollama/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values[Ollama Valid Parameters and Values] and link:https://github.com/ollama/ollama/blob/main/api/types.go[Ollama Types]. The default values are based on the link:https://github.com/ollama/ollama/blob/b538dc3858014f94b099730a592751a5454cab0a/api/types.go#L364[Ollama Types Defaults]. @@ -164,23 +163,21 @@ TIP: In addition to the model specific link:https://github.com/spring-projects/s [[auto-pulling-models]] == Auto-pulling Models -Spring AI Ollama can automatically pull models when not available in your Ollama instance. -This feature is particularly useful when working with different models or when deploying your application to new environments. +Spring AI Ollama can automatically pull models when they are not available in your Ollama instance. +This feature is particularly useful for development and testing as well as for deploying your applications to new environments. -TIP: you can also pull, by name, any of the thousands, free, xref:https://huggingface.co/models?library=gguf&sort=trending[GGUF Hugging Face Models]. +TIP: You can also pull, by name, any of the thousands, free, xref:https://huggingface.co/models?library=gguf&sort=trending[GGUF Hugging Face Models]. There are three strategies for pulling models: -* `always` (defined in `PullModelStrategy.ALWAYS`). Always pull the model, even if it's already available. Useful to ensure you're using the latest version of that model. -* `when_missing` (defined in `PullModelStrategy.WHEN_MISSING`). Only pull the model if it's not already available. It might be an older version of the model. -* `never` (defined in `PullModelStrategy.NEVER`). Never pull the model. +* `always` (defined in `PullModelStrategy.ALWAYS`): Always pull the model, even if it's already available. Useful to ensure you're using the latest version of the model. +* `when_missing` (defined in `PullModelStrategy.WHEN_MISSING`): Only pull the model if it's not already available. This may result in using an older version of the model. +* `never` (defined in `PullModelStrategy.NEVER`): Never pull the model automatically. -CAUTION: Due to the unexpected delays while downloading models, this feature is not recommended for production environments. Instead, consider to assess and pre-download the necessary models in advance. - -=== Pulling models at startup time +CAUTION: Due to potential delays while downloading models, automatic pulling is not recommended for production environments. Instead, consider assessing and pre-downloading the necessary models in advance. All models defined via configuration properties and default options can be automatically pulled at startup time. -You can configure strategy, timeout, and max number of retries via configuration properties. +You can configure the pull strategy, timeout, and maximum number of retries using configuration properties: [source,yaml] ---- @@ -193,9 +190,9 @@ spring: max-retries: 1 ---- -CAUTION: The application will not complete its initialization until all the models become available in Ollama. Depending on the model size and the speed of the Internet connection, your application might be slow at starting up. +CAUTION: The application will not complete its initialization until all specified models are available in Ollama. Depending on the model size and internet connection speed, this may significantly slow down your application's startup time. -You can also initialize additional models at startup time, useful for those models used dynamically at runtime. +You can initialize additional models at startup, which is useful for models used dynamically at runtime: [source,yaml] ---- @@ -210,7 +207,7 @@ spring: - qwen2.5 ---- -If you want to apply the pulling strategy only to other types of models, you can exclude the chat models from the initialization task. +If you want to apply the pulling strategy only to specific types of models, you can exclude chat models from the initialization task: [source,yaml] ---- @@ -223,24 +220,7 @@ spring: include: false ---- -=== Pulling models at runtime - -To enable auto-pulling of models at runtime, you can configure the `pullModelStrategy` option in your `OllamaOptions`: - -[source,java] ----- -ChatResponse response = chatModel.call(new Prompt( - "Generate the names of 5 famous pirates.", - OllamaOptions.builder() - .withModel("llama3.2") - .withPullModelStrategy(PullModelStrategy.ALWAYS) - .build() - )); ----- - -You can also configure this option using the following property: `spring.ai.ollama.chat.options.pull-model-strategy=always`. - -CAUTION: The time to process an incoming request might incur unexpected delays, waiting for the needed model to become available in Ollama. Depending on the model size and the speed of the Internet connection, your application might be slow at processing requests. You might want to initialize these models at startup time instead, using the `spring.ai.ollama.init.chat.additional-models` property. +This configuration will apply the pulling strategy to all models except chat models. == Function Calling diff --git a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/embeddings/ollama-embeddings.adoc b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/embeddings/ollama-embeddings.adoc index e414df62589..67cbc3f4460 100644 --- a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/embeddings/ollama-embeddings.adoc +++ b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/embeddings/ollama-embeddings.adoc @@ -7,8 +7,6 @@ Small distances suggest high relatedness and large distances suggest low related The `OllamaEmbeddingModel` implementation leverages the Ollama https://github.com/ollama/ollama/blob/main/docs/api.md#generate-embeddings[Embeddings API] endpoint. -TIP: you can also pull, by name, any of the thousands, free, xref:https://huggingface.co/models?library=gguf&sort=trending[GGUF HuggingFace Models] - == Prerequisites You first need access to an Ollama instance. There are a few options, including the following: @@ -95,7 +93,6 @@ Here are the advanced request parameter for the Ollama embedding model: You can use dedicated https://ollama.com/search?c=embedding[Embedding Model] types | mistral | spring.ai.ollama.embedding.options.keep_alive | Controls how long the model will stay loaded into memory following the request | 5m | spring.ai.ollama.embedding.options.truncate | Truncates the end of each input to fit within context length. Returns error if false and context length is exceeded. | true -| spring.ai.ollama.embedding.options.pull-model-strategy | Strategy for pulling models at run-time. | `never` |==== The remaining `options` properties are based on the link:https://github.com/ollama/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values[Ollama Valid Parameters and Values] and link:https://github.com/ollama/ollama/blob/main/api/types.go[Ollama Types]. The default values are based on: link:https://github.com/ollama/ollama/blob/b538dc3858014f94b099730a592751a5454cab0a/api/types.go#L364[Ollama type defaults]. @@ -161,23 +158,21 @@ EmbeddingResponse embeddingResponse = embeddingModel.call( [[auto-pulling-models]] == Auto-pulling Models -Spring AI Ollama can automatically pull models when not available in your Ollama instance. -This feature is particularly useful when working with different models or when deploying your application to new environments. +Spring AI Ollama can automatically pull models when they are not available in your Ollama instance. +This feature is particularly useful for development and testing as well as for deploying your applications to new environments. -TIP: you can also pull, by name, any of the thousands, free, xref:https://huggingface.co/models?library=gguf&sort=trending[GGUF Hugging Face Models]. +TIP: You can also pull, by name, any of the thousands, free, xref:https://huggingface.co/models?library=gguf&sort=trending[GGUF Hugging Face Models]. There are three strategies for pulling models: -* `always` (defined in `PullModelStrategy.ALWAYS`). Always pull the model, even if it's already available. Useful to ensure you're using the latest version of that model. -* `when_missing` (defined in `PullModelStrategy.WHEN_MISSING`). Only pull the model if it's not already available. It might be an older version of the model. -* `never` (defined in `PullModelStrategy.NEVER`). Never pull the model. - -CAUTION: Due to the unexpected delays while downloading models, this feature is not recommended for production environments. Instead, consider to assess and pre-download the necessary models in advance. +* `always` (defined in `PullModelStrategy.ALWAYS`): Always pull the model, even if it's already available. Useful to ensure you're using the latest version of the model. +* `when_missing` (defined in `PullModelStrategy.WHEN_MISSING`): Only pull the model if it's not already available. This may result in using an older version of the model. +* `never` (defined in `PullModelStrategy.NEVER`): Never pull the model automatically. -=== Pulling models at startup time +CAUTION: Due to potential delays while downloading models, automatic pulling is not recommended for production environments. Instead, consider assessing and pre-downloading the necessary models in advance. All models defined via configuration properties and default options can be automatically pulled at startup time. -You can configure strategy, timeout, and max number of retries via configuration properties. +You can configure the pull strategy, timeout, and maximum number of retries using configuration properties: [source,yaml] ---- @@ -190,9 +185,9 @@ spring: max-retries: 1 ---- -CAUTION: The application will not complete its initialization until all the models become available in Ollama. Depending on the model size and the speed of the Internet connection, your application might be slow at starting up. +CAUTION: The application will not complete its initialization until all specified models are available in Ollama. Depending on the model size and internet connection speed, this may significantly slow down your application's startup time. -You can also initialize additional models at startup time, useful for those models used dynamically at runtime. +You can initialize additional models at startup, which is useful for models used dynamically at runtime: [source,yaml] ---- @@ -207,7 +202,7 @@ spring: - nomic-embed-text ---- -If you want to apply the pulling strategy only to other types of models, you can exclude the embedding models from the initialization task. +If you want to apply the pulling strategy only to specific types of models, you can exclude embedding models from the initialization task: [source,yaml] ---- @@ -220,23 +215,7 @@ spring: include: false ---- -=== Pulling models at runtime - -To enable auto-pulling of models at runtime, you can configure the `pullModelStrategy` option in your `OllamaOptions`: - -[source,java] ----- -EmbeddingResponse embeddingResponse = embeddingModel - .call(new EmbeddingRequest(List.of("Hello World", "Something else"), - OllamaOptions.builder() - .withModel("all-minilm") - .withPullModelStrategy(PullModelStrategy.ALWAYS) - .build())); ----- - -You can also configure this option using the following property: `spring.ai.ollama.embedding.options.pull-model-strategy=always`. - -CAUTION: The time to process an incoming request might incur unexpected delays, waiting for the needed model to become available in Ollama. Depending on the model size and the speed of the Internet connection, your application might be slow at processing requests. You might want to initialize these models at startup time instead, using the `spring.ai.ollama.init.embedding.additional-models` property. +This configuration will apply the pulling strategy to all models except embedding models. == Sample Controller diff --git a/spring-ai-spring-boot-autoconfigure/src/test/java/org/springframework/ai/autoconfigure/ollama/OllamaImage.java b/spring-ai-spring-boot-autoconfigure/src/test/java/org/springframework/ai/autoconfigure/ollama/OllamaImage.java index d4b621ee768..fb9db0f36d7 100644 --- a/spring-ai-spring-boot-autoconfigure/src/test/java/org/springframework/ai/autoconfigure/ollama/OllamaImage.java +++ b/spring-ai-spring-boot-autoconfigure/src/test/java/org/springframework/ai/autoconfigure/ollama/OllamaImage.java @@ -17,6 +17,6 @@ public class OllamaImage { - public static final String IMAGE = "ollama/ollama:0.3.13"; + public static final String IMAGE = "ollama/ollama:0.3.14"; } diff --git a/spring-ai-spring-boot-testcontainers/src/test/java/org/springframework/ai/testcontainers/service/connection/ollama/OllamaImage.java b/spring-ai-spring-boot-testcontainers/src/test/java/org/springframework/ai/testcontainers/service/connection/ollama/OllamaImage.java index ed9a1aaab8a..59cf4940360 100644 --- a/spring-ai-spring-boot-testcontainers/src/test/java/org/springframework/ai/testcontainers/service/connection/ollama/OllamaImage.java +++ b/spring-ai-spring-boot-testcontainers/src/test/java/org/springframework/ai/testcontainers/service/connection/ollama/OllamaImage.java @@ -22,6 +22,6 @@ */ public class OllamaImage { - public static final DockerImageName DEFAULT_IMAGE = DockerImageName.parse("ollama/ollama:0.3.13"); + public static final DockerImageName DEFAULT_IMAGE = DockerImageName.parse("ollama/ollama:0.3.14"); }