spring-projects · 10veU · Feb 9, 2025 · Feb 9, 2025 · Feb 16, 2025 · Feb 9, 2025
diff --git a/models/spring-ai-moonshot/src/main/java/org/springframework/ai/moonshot/api/MoonshotApi.java b/models/spring-ai-moonshot/src/main/java/org/springframework/ai/moonshot/api/MoonshotApi.java
@@ -48,6 +48,7 @@
  *
  * @author Geng Rong
  * @author Thomas Vitale
+ * @author Wang Xiaojie
  */
 public class MoonshotApi {
 
@@ -207,14 +208,47 @@ public enum ChatCompletionFinishReason {
 	 * Moonshot Chat Completion Models:
 	 *
 	 * <ul>
+	 * <li><b>MOONSHOT_V1_AUTO</b> - moonshot-v1-auto</li>
 	 * <li><b>MOONSHOT_V1_8K</b> - moonshot-v1-8k</li>
 	 * <li><b>MOONSHOT_V1_32K</b> - moonshot-v1-32k</li>
 	 * <li><b>MOONSHOT_V1_128K</b> - moonshot-v1-128k</li>
 	 * </ul>
+	 *
+	 * {@code moonshot-v1-auto} can select the appropriate model based on the number of
+	 * Tokens occupied by the current context. The available models for selection include:
+	 * <ul>
+	 * <li>{@code moonshot-v1-8k}</li>
+	 * <li>{@code moonshot-v1-32k}</li>
+	 * <li>{@code moonshot-v1-128k}</li>
+	 * </ul>
+	 * <p>
+	 * {@code moonshot-v1-auto} can be regarded as a model router, which decides which
+	 * specific model to select based on the number of Tokens occupied by the current
+	 * context. In terms of performance and output, {@code moonshot-v1-auto} is
+	 * indistinguishable from the aforementioned models.
+	 * </p>
+	 * The routing rules for the model selected by {@code moonshot-v1-auto} are as
+	 * follows:
+	 * <ul>
+	 * <li>If {@code total_tokens ≤ 8 * 1024}, choose {@code moonshot-v1-8k}.</li>
+	 * <li>If {@code 8 * 1024 < total_tokens ≤ 32 * 1024}, choose
+	 * {@code moonshot-v1-32k}.</li>
+	 * <li>If {@code total_tokens > 32 * 1024}, choose {@code moonshot-v1-128k}.</li>
+	 * </ul>
+	 * The calculation formula is: {@code total_tokens = prompt_tokens + max_tokens}
+	 * <p>
+	 * The total number of Tokens is composed of two parts:
+	 * <ul>
+	 * <li>{@code prompt_tokens}: The number of Tokens occupied by the input prompt
+	 * (Prompt).</li>
+	 * <li>{@code max_tokens}: The maximum number of Tokens expected to be generated as
+	 * output.</li>
+	 * </ul>
 	 */
 	public enum ChatModel implements ChatModelDescription {
 
 		// @formatter:off
+		MOONSHOT_V1_AUTO("moonshot-v1-auto"),
 		MOONSHOT_V1_8K("moonshot-v1-8k"),
 		MOONSHOT_V1_32K("moonshot-v1-32k"),
 		MOONSHOT_V1_128K("moonshot-v1-128k");

diff --git a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/moonshot-chat.adoc b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/moonshot-chat.adoc
@@ -89,7 +89,7 @@ The prefix `spring.ai.moonshot.chat` is the property prefix that lets you config
 | spring.ai.moonshot.chat.enabled | Enable Moonshot chat model.  | true
 | spring.ai.moonshot.chat.base-url | Optional overrides the spring.ai.moonshot.base-url to provide chat specific url |  -
 | spring.ai.moonshot.chat.api-key | Optional overrides the spring.ai.moonshot.api-key to provide chat specific api-key |  -
-| spring.ai.moonshot.chat.options.model | This is the Moonshot Chat model to use | `moonshot-v1-8k` (the `moonshot-v1-8k`, `moonshot-v1-32k`, and `moonshot-v1-128k` point to the latest model versions)
+| spring.ai.moonshot.chat.options.model | This is the Moonshot Chat model to use | `moonshot-v1-8k` (the `moonshot-v1-auto`, `moonshot-v1-8k`, `moonshot-v1-32k`, and `moonshot-v1-128k` point to the latest model versions)
 | spring.ai.moonshot.chat.options.maxTokens | The maximum number of tokens to generate in the chat completion. The total length of input tokens and generated tokens is limited by the model's context length. | -
 | spring.ai.moonshot.chat.options.temperature | The sampling temperature to use that controls the apparent creativity of generated completions. Higher values will make output more random while lower values will make results more focused and deterministic. It is not recommended to modify temperature and top_p for the same completions request as the interaction of these two settings is difficult to predict. | 0.7
 | spring.ai.moonshot.chat.options.topP | An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both. | 1.0
@@ -103,6 +103,10 @@ NOTE: You can override the common `spring.ai.moonshot.base-url` and `spring.ai.m
 The `spring.ai.moonshot.chat.base-url` and `spring.ai.moonshot.chat.api-key` properties if set take precedence over the common properties.
 This is useful if you want to use different Moonshot accounts for different models and different model endpoints.
 
+NOTE: When the value of `spring.ai.moonshot.chat.options.model` is set to `moonshot-v1-auto`, it can select the appropriate model based on the number of Tokens occupied by the current context.
+The available models for selection include: `moonshot-v1-8k`, `moonshot-v1-32k` and `moonshot-v1-128k`.
+`moonshot-v1-auto` can be considered as a model router. It decides which specific model to select based on the number of Tokens occupied by the current context. In terms of performance and output, `moonshot-v1-auto` is indistinguishable from the aforementioned models.
+
 TIP: All properties prefixed with `spring.ai.moonshot.chat.options` can be overridden at runtime by adding a request specific <<chat-options>> to the `Prompt` call.
 
 == Runtime Options [[chat-options]]