spring-projects
diff --git a/‎models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/OllamaChatModel.java
Lines changed: 4 additions & 6 deletions b/‎models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/OllamaChatModel.java
Lines changed: 4 additions & 6 deletions
diff --git a/‎models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaApi.java
Lines changed: 36 additions & 26 deletions b/‎models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaApi.java
Lines changed: 36 additions & 26 deletions
diff --git a/‎models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaModel.java
Lines changed: 17 additions & 0 deletions b/‎models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaModel.java
Lines changed: 17 additions & 0 deletions
diff --git a/‎models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaOptions.java
Lines changed: 38 additions & 20 deletions b/‎models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaOptions.java
Lines changed: 38 additions & 20 deletions
diff --git a/‎models/spring-ai-ollama/src/test/java/org/springframework/ai/ollama/OllamaChatModelFunctionCallingIT.java
Lines changed: 3 additions & 2 deletions b/‎models/spring-ai-ollama/src/test/java/org/springframework/ai/ollama/OllamaChatModelFunctionCallingIT.java
Lines changed: 3 additions & 2 deletions
diff --git a/‎models/spring-ai-ollama/src/test/java/org/springframework/ai/ollama/OllamaChatModelIT.java
Lines changed: 1 addition & 1 deletion b/‎models/spring-ai-ollama/src/test/java/org/springframework/ai/ollama/OllamaChatModelIT.java
Lines changed: 1 addition & 1 deletion
@@ -59,6 +59,7 @@
  *
  * @author Christian Tzolov
  * @author luocongqiu
+ * @author Thomas Vitale
  * @since 1.0.0
  */
 public class OllamaChatModel extends AbstractToolCallSupport implements ChatModel {
@@ -125,13 +126,13 @@ public ChatResponse call(Prompt prompt) {
 
 		ChatGenerationMetadata generationMetadata = ChatGenerationMetadata.NULL;
 		if (response.promptEvalCount() != null && response.evalCount() != null) {
-			generationMetadata = ChatGenerationMetadata.from("DONE", null);
+			generationMetadata = ChatGenerationMetadata.from(response.doneReason(), null);
 		}
 
 		var generator = new Generation(assistantMessage, generationMetadata);
 		var chatResponse = new ChatResponse(List.of(generator), from(response));
 
-		if (isToolCall(chatResponse, Set.of("DONE"))) {
+		if (isToolCall(chatResponse, Set.of("stop"))) {
 			var toolCallConversation = handleToolCalls(prompt, chatResponse);
 			// Recursively call the call method with the tool call message
 			// conversation that contains the call responses.
@@ -176,7 +177,7 @@ public Flux<ChatResponse> stream(Prompt prompt) {
 
 			ChatGenerationMetadata generationMetadata = ChatGenerationMetadata.NULL;
 			if (chunk.promptEvalCount() != null && chunk.evalCount() != null) {
-				generationMetadata = ChatGenerationMetadata.from("DONE", null);
+				generationMetadata = ChatGenerationMetadata.from(chunk.doneReason(), null);
 			}
 
 			var generator = new Generation(assistantMessage, generationMetadata);
@@ -203,9 +204,6 @@ OllamaApi.ChatRequest ollamaChatRequest(Prompt prompt, boolean stream) {
 
 		List<OllamaApi.Message> ollamaMessages = prompt.getInstructions()
 			.stream()
-			.filter(message -> message.getMessageType() == MessageType.USER
-					|| message.getMessageType() == MessageType.ASSISTANT
-					|| message.getMessageType() == MessageType.SYSTEM || message.getMessageType() == MessageType.TOOL)
 			.map(message -> {
 				if (message instanceof UserMessage userMessage) {
 					var messageBuilder = OllamaApi.Message.builder(Role.USER).withContent(message.getContent());
 
@@ -47,6 +47,7 @@
  * Java Client for the Ollama API. <a href="https://ollama.ai/">https://ollama.ai</a>
  *
  * @author Christian Tzolov
+ * @author Thomas Vitale
  * @since 0.8.0
  */
 // @formatter:off
@@ -454,15 +455,20 @@ public Message build() {
 	/**
 	 * Chat request object.
 	 *
-	 * @param model The model to use for completion.
-	 * @param messages The list of messages to chat with.
-	 * @param stream Whether to stream the response.
-	 * @param format The format to return the response in. Currently, the only accepted
-	 * value is "json".
-	 * @param keepAlive The duration to keep the model loaded in ollama while idle.
-	 * @param options Additional model parameters. You can use the {@link OllamaOptions} builder
-	 * to create the options then {@link OllamaOptions#toMap()} to convert the options into a
-	 * map.
+	 * @param model The model to use for completion. It should be a name familiar to Ollama from the <a href="https://ollama.com/library">Library</a>.
+	 * @param messages The list of messages in the chat. This can be used to keep a chat memory.
+	 * @param stream Whether to stream the response. If false, the response will be returned as a single response object rather than a stream of objects.
+	 * @param format The format to return the response in. Currently, the only accepted value is "json".
+	 * @param keepAlive Controls how long the model will stay loaded into memory following this request (default: 5m).
+	 * @param tools List of tools the model has access to.
+	 * @param options Model-specific options. For example, "temperature" can be set through this field, if the model supports it.
+	 * You can use the {@link OllamaOptions} builder to create the options then {@link OllamaOptions#toMap()} to convert the options into a map.
+	 *
+	 * @see <a href=
+	 * "https://github.com/ollama/ollama/blob/main/docs/api.md#generate-a-chat-completion">Chat
+	 * Completion API</a>
+	 * @see <a href="https://github.com/ollama/ollama/blob/main/api/types.go">Ollama
+	 * Types</a>
 	 */
 	@JsonInclude(Include.NON_NULL)
 	public record ChatRequest(
@@ -471,9 +477,9 @@ public record ChatRequest(
 			@JsonProperty("stream") Boolean stream,
 			@JsonProperty("format") String format,
 			@JsonProperty("keep_alive") String keepAlive,
-			@JsonProperty("options") Map<String, Object> options,
-			@JsonProperty("tools") List<Tool> tools) {
-
+			@JsonProperty("tools") List<Tool> tools,
+			@JsonProperty("options") Map<String, Object> options
+	) {
 
 		/**
 		 * Represents a tool the model may call. Currently, only functions are supported as a tool.
@@ -544,8 +550,8 @@ public static class Builder {
 			private boolean stream = false;
 			private String format;
 			private String keepAlive;
-			private Map<String, Object> options = Map.of();
 			private List<Tool> tools = List.of();
+			private Map<String, Object> options = Map.of();
 
 			public Builder(String model) {
 				Assert.notNull(model, "The model can not be null.");
@@ -572,6 +578,11 @@ public Builder withKeepAlive(String keepAlive) {
 				return this;
 			}
 
+			public Builder withTools(List<Tool> tools) {
+				this.tools = tools;
+				return this;
+			}
+
 			public Builder withOptions(Map<String, Object> options) {
 				Objects.requireNonNull(options, "The options can not be null.");
 
@@ -585,33 +596,30 @@ public Builder withOptions(OllamaOptions options) {
 				return this;
 			}
 
-			public Builder withTools(List<Tool> tools) {
-				this.tools = tools;
-				return this;
-			}
-
 			public ChatRequest build() {
-				return new ChatRequest(model, messages, stream, format, keepAlive, options, tools);
+				return new ChatRequest(model, messages, stream, format, keepAlive, tools, options);
 			}
 		}
 	}
 
 	/**
 	 * Ollama chat response object.
 	 *
-	 * @param model The model name used for completion.
-	 * @param createdAt When the request was made.
+	 * @param model The model used for generating the response.
+	 * @param createdAt The timestamp of the response generation.
 	 * @param message The response {@link Message} with {@link Message.Role#ASSISTANT}.
+	 * @param doneReason The reason the model stopped generating text.
 	 * @param done Whether this is the final response. For streaming response only the
 	 * last message is marked as done. If true, this response may be followed by another
 	 * response with the following, additional fields: context, prompt_eval_count,
 	 * prompt_eval_duration, eval_count, eval_duration.
 	 * @param totalDuration Time spent generating the response.
 	 * @param loadDuration Time spent loading the model.
-	 * @param promptEvalCount number of tokens in the prompt.(*)
-	 * @param promptEvalDuration time spent evaluating the prompt.
-	 * @param evalCount number of tokens in the response.
-	 * @param evalDuration time spent generating the response.
+	 * @param promptEvalCount Number of tokens in the prompt.
+	 * @param promptEvalDuration Time spent evaluating the prompt.
+	 * @param evalCount Number of tokens in the response.
+	 * @param evalDuration Time spent generating the response.
+	 *
 	 * @see <a href=
 	 * "https://github.com/ollama/ollama/blob/main/docs/api.md#generate-a-chat-completion">Chat
 	 * Completion API</a>
@@ -623,13 +631,15 @@ public record ChatResponse(
 			@JsonProperty("model") String model,
 			@JsonProperty("created_at") Instant createdAt,
 			@JsonProperty("message") Message message,
+			@JsonProperty("done_reason") String doneReason,
 			@JsonProperty("done") Boolean done,
 			@JsonProperty("total_duration") Duration totalDuration,
 			@JsonProperty("load_duration") Duration loadDuration,
 			@JsonProperty("prompt_eval_count") Integer promptEvalCount,
 			@JsonProperty("prompt_eval_duration") Duration promptEvalDuration,
 			@JsonProperty("eval_count") Integer evalCount,
-			@JsonProperty("eval_duration") Duration evalDuration) {
+			@JsonProperty("eval_duration") Duration evalDuration
+	) {
 	}
 
 	/**
 
@@ -21,6 +21,7 @@
  * Helper class for common Ollama models.
  *
  * @author Siarhei Blashuk
+ * @author Thomas Vitale
  * @since 0.8.1
  */
 public enum OllamaModel implements ChatModelDescription {
@@ -35,11 +36,27 @@ public enum OllamaModel implements ChatModelDescription {
 	 */
 	LLAMA3("llama3"),
 
+	/**
+	 * The 8B language model from Meta.
+	 */
+	LLAMA3_1("llama3-1"),
+
 	/**
 	 * The 7B parameters model
 	 */
 	MISTRAL("mistral"),
 
+	/**
+	 * A 12B model with 128k context length, built by Mistral AI in collaboration with
+	 * NVIDIA.
+	 */
+	MISTRAL_NEMO("mistral-nemo"),
+
+	/**
+	 * A small vision language model designed to run efficiently on edge devices.
+	 */
+	MOONDREAM("moondream"),
+
 	/**
 	 * The 2.7B uncensored Dolphin model
 	 */
 
@@ -40,6 +40,7 @@
  * Helper class for creating strongly-typed Ollama options.
  *
  * @author Christian Tzolov
+ * @author Thomas Vitale
  * @since 0.8.0
  * @see <a href=
  * "https://github.com/ollama/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values">Ollama
@@ -53,11 +54,13 @@ public class OllamaOptions implements FunctionCallingOptions, ChatOptions, Embed
 
 	private static final List<String> NON_SUPPORTED_FIELDS = List.of("model", "format", "keep_alive");
 
-	// Following fields are ptions which must be set when the model is loaded into memory.
+	// Following fields are options which must be set when the model is loaded into memory.
+	// See: https://github.com/ggerganov/llama.cpp/blob/master/examples/main/README.md
 
 	// @formatter:off
+
 	/**
-	 * useNUMA Whether to use NUMA.
+	 * Whether to use NUMA. (Default: false)
 	 */
 	@JsonProperty("numa") private Boolean useNUMA;
 
@@ -67,63 +70,78 @@ public class OllamaOptions implements FunctionCallingOptions, ChatOptions, Embed
 	@JsonProperty("num_ctx") private Integer numCtx;
 
 	/**
-	 * ???
+	 * Prompt processing maximum batch size. (Default: 512)
 	 */
 	@JsonProperty("num_batch") private Integer numBatch;
 
 	/**
 	 * The number of layers to send to the GPU(s). On macOS, it defaults to 1
 	 * to enable metal support, 0 to disable.
-		*/
+	 * (Default: -1, which indicates that numGPU should be set dynamically)
+	*/
 	@JsonProperty("num_gpu") private Integer numGPU;
 
 	/**
-	 * ???
+	 * When using multiple GPUs this option controls which GPU is used
+	 * for small tensors for which the overhead of splitting the computation
+	 * across all GPUs is not worthwhile. The GPU in question will use slightly
+	 * more VRAM to store a scratch buffer for temporary results.
+	 * By default, GPU 0 is used.
 	 */
 	@JsonProperty("main_gpu")private Integer mainGPU;
 
 	/**
-	 * ???
+	 * (Default: false)
 	 */
 	@JsonProperty("low_vram") private Boolean lowVRAM;
 
 	/**
-	 * ???
+	 * (Default: true)
 	 */
 	@JsonProperty("f16_kv") private Boolean f16KV;
 
 	/**
-	 * ???
+	 * Return logits for all the tokens, not just the last one.
+	 * To enable completions to return logprobs, this must be true.
 	 */
 	@JsonProperty("logits_all") private Boolean logitsAll;
 
 	/**
-	 * ???
+	 * Load only the vocabulary, not the weights.
 	 */
 	@JsonProperty("vocab_only") private Boolean vocabOnly;
 
 	/**
-	 * ???
+	 * By default, models are mapped into memory, which allows the system to load only the necessary parts
+	 * of the model as needed. However, if the model is larger than your total amount of RAM or if your system is low
+	 * on available memory, using mmap might increase the risk of pageouts, negatively impacting performance.
+	 * Disabling mmap results in slower load times but may reduce pageouts if you're not using mlock.
+	 * Note that if the model is larger than the total amount of RAM, turning off mmap would prevent
+	 * the model from loading at all.
+	 * (Default: null)
 	 */
 	@JsonProperty("use_mmap") private Boolean useMMap;
 
 	/**
-	 * ???
+	 * Lock the model in memory, preventing it from being swapped out when memory-mapped.
+	 * This can improve performance but trades away some of the advantages of memory-mapping
+	 * by requiring more RAM to run and potentially slowing down load times as the model loads into RAM.
+	 * (Default: false)
 	 */
 	@JsonProperty("use_mlock") private Boolean useMLock;
 
 	/**
-	 * Sets the number of threads to use during computation. By default,
-	 * Ollama will detect this for optimal performance. It is recommended to set this
-	 * value to the number of physical CPU cores your system has (as opposed to the
-	 * logical number of cores).
+	 * Set the number of threads to use during generation. For optimal performance, it is recommended to set this value
+	 * to the number of physical CPU cores your system has (as opposed to the logical number of cores).
+	 * Using the correct number of threads can greatly improve performance.
+	 * By default, Ollama will detect this value for optimal performance.
 	 */
 	@JsonProperty("num_thread") private Integer numThread;
 
 	// Following fields are predict options used at runtime.
 
 	/**
-	 * ???
+	 * (Default: 4)
 	 */
 	@JsonProperty("num_keep") private Integer numKeep;
 
@@ -162,7 +180,7 @@ public class OllamaOptions implements FunctionCallingOptions, ChatOptions, Embed
 	@JsonProperty("tfs_z") private Float tfsZ;
 
 	/**
-	 * ???
+	 * (Default: 1.0)
 	 */
 	@JsonProperty("typical_p") private Float typicalP;
 
@@ -186,12 +204,12 @@ public class OllamaOptions implements FunctionCallingOptions, ChatOptions, Embed
 	@JsonProperty("repeat_penalty") private Float repeatPenalty;
 
 	/**
-	 * ???
+	 * (Default: 0.0)
 	 */
 	@JsonProperty("presence_penalty") private Float presencePenalty;
 
 	/**
-	 * ???
+	 * (Default: 0.0)
 	 */
 	@JsonProperty("frequency_penalty") private Float frequencyPenalty;
 
@@ -215,7 +233,7 @@ public class OllamaOptions implements FunctionCallingOptions, ChatOptions, Embed
 	@JsonProperty("mirostat_eta") private Float mirostatEta;
 
 	/**
-	 * ???
+	 * (Default: true)
 	 */
 	@JsonProperty("penalize_newline") private Boolean penalizeNewline;
 
 
@@ -36,6 +36,7 @@
 import org.springframework.ai.chat.prompt.Prompt;
 import org.springframework.ai.model.function.FunctionCallbackWrapper;
 import org.springframework.ai.ollama.api.OllamaApi;
+import org.springframework.ai.ollama.api.OllamaModel;
 import org.springframework.ai.ollama.api.OllamaOptions;
 import org.springframework.ai.ollama.api.tool.MockWeatherService;
 import org.springframework.beans.factory.annotation.Autowired;
@@ -55,10 +56,10 @@ class OllamaChatModelFunctionCallingIT {
 
 	private static final Logger logger = LoggerFactory.getLogger(OllamaChatModelFunctionCallingIT.class);
 
-	private static String MODEL = "mistral";
+	private static final String MODEL = OllamaModel.MISTRAL.getName();
 
 	@Container
-	static OllamaContainer ollamaContainer = new OllamaContainer("ollama/ollama:0.2.8");
+	static OllamaContainer ollamaContainer = new OllamaContainer(OllamaImage.DEFAULT_IMAGE);
 
 	static String baseUrl = "http://localhost:11434";
 
 
@@ -63,7 +63,7 @@ class OllamaChatModelIT {
 	private static final Log logger = LogFactory.getLog(OllamaChatModelIT.class);
 
 	@Container
-	static OllamaContainer ollamaContainer = new OllamaContainer("ollama/ollama:0.2.8");
+	static OllamaContainer ollamaContainer = new OllamaContainer(OllamaImage.DEFAULT_IMAGE);
 
 	static String baseUrl = "http://localhost:11434";