spring-projects · ThomasVitale · Jul 26, 2024 · ThomasVitale · Jul 25, 2024 · ThomasVitale
diff --git a/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/OllamaChatModel.java b/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/OllamaChatModel.java
@@ -59,6 +59,7 @@
  *
  * @author Christian Tzolov
  * @author luocongqiu
+ * @author Thomas Vitale
  * @since 1.0.0
  */
 public class OllamaChatModel extends AbstractToolCallSupport implements ChatModel {
@@ -125,13 +126,13 @@ public ChatResponse call(Prompt prompt) {
 
 		ChatGenerationMetadata generationMetadata = ChatGenerationMetadata.NULL;
 		if (response.promptEvalCount() != null && response.evalCount() != null) {
-			generationMetadata = ChatGenerationMetadata.from("DONE", null);
+			generationMetadata = ChatGenerationMetadata.from(response.doneReason(), null);
 		}
 
 		var generator = new Generation(assistantMessage, generationMetadata);
 		var chatResponse = new ChatResponse(List.of(generator), from(response));
 
-		if (isToolCall(chatResponse, Set.of("DONE"))) {
+		if (isToolCall(chatResponse, Set.of("stop"))) {
 			var toolCallConversation = handleToolCalls(prompt, chatResponse);
 			// Recursively call the call method with the tool call message
 			// conversation that contains the call responses.
@@ -176,15 +177,15 @@ public Flux<ChatResponse> stream(Prompt prompt) {
 
 			ChatGenerationMetadata generationMetadata = ChatGenerationMetadata.NULL;
 			if (chunk.promptEvalCount() != null && chunk.evalCount() != null) {
-				generationMetadata = ChatGenerationMetadata.from("DONE", null);
+				generationMetadata = ChatGenerationMetadata.from(chunk.doneReason(), null);
 			}
 
 			var generator = new Generation(assistantMessage, generationMetadata);
 			return new ChatResponse(List.of(generator), from(chunk));
 		});
 
 		return chatResponse.flatMap(response -> {
-			if (isToolCall(response, Set.of("DONE"))) {
+			if (isToolCall(response, Set.of("stop"))) {
 				var toolCallConversation = handleToolCalls(prompt, response);
 				// Recursively call the stream method with the tool call message
 				// conversation that contains the call responses.
@@ -201,53 +202,43 @@ public Flux<ChatResponse> stream(Prompt prompt) {
 	 */
 	OllamaApi.ChatRequest ollamaChatRequest(Prompt prompt, boolean stream) {
 
-		List<OllamaApi.Message> ollamaMessages = prompt.getInstructions()
-			.stream()
-			.filter(message -> message.getMessageType() == MessageType.USER
-					|| message.getMessageType() == MessageType.ASSISTANT
-					|| message.getMessageType() == MessageType.SYSTEM || message.getMessageType() == MessageType.TOOL)
-			.map(message -> {
-				if (message instanceof UserMessage userMessage) {
-					var messageBuilder = OllamaApi.Message.builder(Role.USER).withContent(message.getContent());
-					if (!CollectionUtils.isEmpty(userMessage.getMedia())) {
-						messageBuilder.withImages(userMessage.getMedia()
-							.stream()
-							.map(media -> this.fromMediaData(media.getData()))
-							.toList());
-					}
-					return List.of(messageBuilder.build());
+		List<OllamaApi.Message> ollamaMessages = prompt.getInstructions().stream().map(message -> {
+			if (message instanceof UserMessage userMessage) {
+				var messageBuilder = OllamaApi.Message.builder(Role.USER).withContent(message.getContent());
+				if (!CollectionUtils.isEmpty(userMessage.getMedia())) {
+					messageBuilder.withImages(
+							userMessage.getMedia().stream().map(media -> this.fromMediaData(media.getData())).toList());
 				}
-				else if (message instanceof SystemMessage systemMessage) {
-					return List
-						.of(OllamaApi.Message.builder(Role.SYSTEM).withContent(systemMessage.getContent()).build());
-				}
-				else if (message instanceof AssistantMessage assistantMessage) {
-					List<ToolCall> toolCalls = null;
-					if (!CollectionUtils.isEmpty(assistantMessage.getToolCalls())) {
-						toolCalls = assistantMessage.getToolCalls().stream().map(toolCall -> {
-							var function = new ToolCallFunction(toolCall.name(),
-									ModelOptionsUtils.jsonToMap(toolCall.arguments()));
-							return new ToolCall(function);
-						}).toList();
-					}
-					return List.of(OllamaApi.Message.builder(Role.ASSISTANT)
-						.withContent(assistantMessage.getContent())
-						.withToolCalls(toolCalls)
-						.build());
+				return List.of(messageBuilder.build());
+			}
+			else if (message instanceof SystemMessage systemMessage) {
+				return List.of(OllamaApi.Message.builder(Role.SYSTEM).withContent(systemMessage.getContent()).build());
+			}
+			else if (message instanceof AssistantMessage assistantMessage) {
+				List<ToolCall> toolCalls = null;
+				if (!CollectionUtils.isEmpty(assistantMessage.getToolCalls())) {
+					toolCalls = assistantMessage.getToolCalls().stream().map(toolCall -> {
+						var function = new ToolCallFunction(toolCall.name(),
+								ModelOptionsUtils.jsonToMap(toolCall.arguments()));
+						return new ToolCall(function);
+					}).toList();
 				}
-				else if (message instanceof ToolResponseMessage toolMessage) {
+				return List.of(OllamaApi.Message.builder(Role.ASSISTANT)
+					.withContent(assistantMessage.getContent())
+					.withToolCalls(toolCalls)
+					.build());
+			}
+			else if (message instanceof ToolResponseMessage toolMessage) {
 
-					List<OllamaApi.Message> responseMessages = toolMessage.getResponses()
-						.stream()
-						.map(tr -> OllamaApi.Message.builder(Role.TOOL).withContent(tr.responseData()).build())
-						.toList();
+				List<OllamaApi.Message> responseMessages = toolMessage.getResponses()
+					.stream()
+					.map(tr -> OllamaApi.Message.builder(Role.TOOL).withContent(tr.responseData()).build())
+					.toList();
 
-					return responseMessages;
-				}
-				throw new IllegalArgumentException("Unsupported message type: " + message.getMessageType());
-			})
-			.flatMap(List::stream)
-			.toList();
+				return responseMessages;
+			}
+			throw new IllegalArgumentException("Unsupported message type: " + message.getMessageType());
+		}).flatMap(List::stream).toList();
 
 		Set<String> functionsForThisRequest = new HashSet<>();
 

diff --git a/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaApi.java b/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaApi.java
@@ -47,6 +47,7 @@
  * Java Client for the Ollama API. <a href="https://ollama.ai/">https://ollama.ai</a>
  *
  * @author Christian Tzolov
+ * @author Thomas Vitale
  * @since 0.8.0
  */
 // @formatter:off
@@ -454,15 +455,20 @@ public Message build() {
 	/**
 	 * Chat request object.
 	 *
-	 * @param model The model to use for completion.
-	 * @param messages The list of messages to chat with.
-	 * @param stream Whether to stream the response.
-	 * @param format The format to return the response in. Currently, the only accepted
-	 * value is "json".
-	 * @param keepAlive The duration to keep the model loaded in ollama while idle.
-	 * @param options Additional model parameters. You can use the {@link OllamaOptions} builder
-	 * to create the options then {@link OllamaOptions#toMap()} to convert the options into a
-	 * map.
+	 * @param model The model to use for completion. It should be a name familiar to Ollama from the <a href="https://ollama.com/library">Library</a>.
+	 * @param messages The list of messages in the chat. This can be used to keep a chat memory.
+	 * @param stream Whether to stream the response. If false, the response will be returned as a single response object rather than a stream of objects.
+	 * @param format The format to return the response in. Currently, the only accepted value is "json".
+	 * @param keepAlive Controls how long the model will stay loaded into memory following this request (default: 5m).
+	 * @param tools List of tools the model has access to.
+	 * @param options Model-specific options. For example, "temperature" can be set through this field, if the model supports it.
+	 * You can use the {@link OllamaOptions} builder to create the options then {@link OllamaOptions#toMap()} to convert the options into a map.
+	 *
+	 * @see <a href=
+	 * "https://github.com/ollama/ollama/blob/main/docs/api.md#generate-a-chat-completion">Chat
+	 * Completion API</a>
+	 * @see <a href="https://github.com/ollama/ollama/blob/main/api/types.go">Ollama
+	 * Types</a>
 	 */
 	@JsonInclude(Include.NON_NULL)
 	public record ChatRequest(
@@ -471,9 +477,9 @@ public record ChatRequest(
 			@JsonProperty("stream") Boolean stream,
 			@JsonProperty("format") String format,
 			@JsonProperty("keep_alive") String keepAlive,
-			@JsonProperty("options") Map<String, Object> options,
-			@JsonProperty("tools") List<Tool> tools) {
-
+			@JsonProperty("tools") List<Tool> tools,
+			@JsonProperty("options") Map<String, Object> options
+	) {
 
 		/**
 		 * Represents a tool the model may call. Currently, only functions are supported as a tool.
@@ -544,8 +550,8 @@ public static class Builder {
 			private boolean stream = false;
 			private String format;
 			private String keepAlive;
-			private Map<String, Object> options = Map.of();
 			private List<Tool> tools = List.of();
+			private Map<String, Object> options = Map.of();
 
 			public Builder(String model) {
 				Assert.notNull(model, "The model can not be null.");
@@ -572,6 +578,11 @@ public Builder withKeepAlive(String keepAlive) {
 				return this;
 			}
 
+			public Builder withTools(List<Tool> tools) {
+				this.tools = tools;
+				return this;
+			}
+
 			public Builder withOptions(Map<String, Object> options) {
 				Objects.requireNonNull(options, "The options can not be null.");
 
@@ -585,33 +596,30 @@ public Builder withOptions(OllamaOptions options) {
 				return this;
 			}
 
-			public Builder withTools(List<Tool> tools) {
-				this.tools = tools;
-				return this;
-			}
-
 			public ChatRequest build() {
-				return new ChatRequest(model, messages, stream, format, keepAlive, options, tools);
+				return new ChatRequest(model, messages, stream, format, keepAlive, tools, options);
 			}
 		}
 	}
 
 	/**
 	 * Ollama chat response object.
 	 *
-	 * @param model The model name used for completion.
-	 * @param createdAt When the request was made.
+	 * @param model The model used for generating the response.
+	 * @param createdAt The timestamp of the response generation.
 	 * @param message The response {@link Message} with {@link Message.Role#ASSISTANT}.
+	 * @param doneReason The reason the model stopped generating text.
 	 * @param done Whether this is the final response. For streaming response only the
 	 * last message is marked as done. If true, this response may be followed by another
 	 * response with the following, additional fields: context, prompt_eval_count,
 	 * prompt_eval_duration, eval_count, eval_duration.
 	 * @param totalDuration Time spent generating the response.
 	 * @param loadDuration Time spent loading the model.
-	 * @param promptEvalCount number of tokens in the prompt.(*)
-	 * @param promptEvalDuration time spent evaluating the prompt.
-	 * @param evalCount number of tokens in the response.
-	 * @param evalDuration time spent generating the response.
+	 * @param promptEvalCount Number of tokens in the prompt.
+	 * @param promptEvalDuration Time spent evaluating the prompt.
+	 * @param evalCount Number of tokens in the response.
+	 * @param evalDuration Time spent generating the response.
+	 *
 	 * @see <a href=
 	 * "https://github.com/ollama/ollama/blob/main/docs/api.md#generate-a-chat-completion">Chat
 	 * Completion API</a>
@@ -623,13 +631,15 @@ public record ChatResponse(
 			@JsonProperty("model") String model,
 			@JsonProperty("created_at") Instant createdAt,
 			@JsonProperty("message") Message message,
+			@JsonProperty("done_reason") String doneReason,
 			@JsonProperty("done") Boolean done,
 			@JsonProperty("total_duration") Duration totalDuration,
 			@JsonProperty("load_duration") Duration loadDuration,
 			@JsonProperty("prompt_eval_count") Integer promptEvalCount,
 			@JsonProperty("prompt_eval_duration") Duration promptEvalDuration,
 			@JsonProperty("eval_count") Integer evalCount,
-			@JsonProperty("eval_duration") Duration evalDuration) {
+			@JsonProperty("eval_duration") Duration evalDuration
+	) {
 	}
 
 	/**

diff --git a/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaModel.java b/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaModel.java
@@ -21,6 +21,7 @@
  * Helper class for common Ollama models.
  *
  * @author Siarhei Blashuk
+ * @author Thomas Vitale
  * @since 0.8.1
  */
 public enum OllamaModel implements ChatModelDescription {
@@ -35,11 +36,27 @@ public enum OllamaModel implements ChatModelDescription {
 	 */
 	LLAMA3("llama3"),
 
+	/**
+	 * The 8B language model from Meta.
+	 */
+	LLAMA3_1("llama3.1"),
+
 	/**
 	 * The 7B parameters model
 	 */
 	MISTRAL("mistral"),
 
+	/**
+	 * A 12B model with 128k context length, built by Mistral AI in collaboration with
+	 * NVIDIA.
+	 */
+	MISTRAL_NEMO("mistral-nemo"),
+
+	/**
+	 * A small vision language model designed to run efficiently on edge devices.
+	 */
+	MOONDREAM("moondream"),
+
 	/**
 	 * The 2.7B uncensored Dolphin model
 	 */