|
48 | 48 | * |
49 | 49 | * @author Geng Rong |
50 | 50 | * @author Thomas Vitale |
| 51 | + * @author Wang Xiaojie |
51 | 52 | */ |
52 | 53 | public class MoonshotApi { |
53 | 54 |
|
@@ -207,14 +208,37 @@ public enum ChatCompletionFinishReason { |
207 | 208 | * Moonshot Chat Completion Models: |
208 | 209 | * |
209 | 210 | * <ul> |
| 211 | + * <li><b>MOONSHOT_V1_AUTO</b> - moonshot-v1-auto</li> |
210 | 212 | * <li><b>MOONSHOT_V1_8K</b> - moonshot-v1-8k</li> |
211 | 213 | * <li><b>MOONSHOT_V1_32K</b> - moonshot-v1-32k</li> |
212 | 214 | * <li><b>MOONSHOT_V1_128K</b> - moonshot-v1-128k</li> |
213 | 215 | * </ul> |
| 216 | + * |
| 217 | + * {@code moonshot-v1-auto} can select the appropriate model based on the number of Tokens occupied by the current context. The available models for selection include: |
| 218 | + * <ul> |
| 219 | + * <li>{@code moonshot-v1-8k}</li> |
| 220 | + * <li>{@code moonshot-v1-32k}</li> |
| 221 | + * <li>{@code moonshot-v1-128k}</li> |
| 222 | + * </ul> |
| 223 | + * <p>{@code moonshot-v1-auto} can be regarded as a model router, which decides which specific model to select based on the number of Tokens occupied by the current context. In terms of performance and output, {@code moonshot-v1-auto} is indistinguishable from the aforementioned models.</p> |
| 224 | + * The routing rules for the model selected by {@code moonshot-v1-auto} are as follows: |
| 225 | + * <ul> |
| 226 | + * <li>If {@code total_tokens ≤ 8 * 1024}, choose {@code moonshot-v1-8k}.</li> |
| 227 | + * <li>If {@code 8 * 1024 < total_tokens ≤ 32 * 1024}, choose {@code moonshot-v1-32k}.</li> |
| 228 | + * <li>If {@code total_tokens > 32 * 1024}, choose {@code moonshot-v1-128k}.</li> |
| 229 | + * </ul> |
| 230 | + * The calculation formula is: |
| 231 | + * {@code total_tokens = prompt_tokens + max_tokens} |
| 232 | + * <p>The total number of Tokens is composed of two parts: |
| 233 | + * <ul> |
| 234 | + * <li>{@code prompt_tokens}: The number of Tokens occupied by the input prompt (Prompt).</li> |
| 235 | + * <li>{@code max_tokens}: The maximum number of Tokens expected to be generated as output.</li> |
| 236 | + * </ul> |
214 | 237 | */ |
215 | 238 | public enum ChatModel implements ChatModelDescription { |
216 | 239 |
|
217 | 240 | // @formatter:off |
| 241 | + MOONSHOT_V1_AUTO("moonshot-v1-auto"), |
218 | 242 | MOONSHOT_V1_8K("moonshot-v1-8k"), |
219 | 243 | MOONSHOT_V1_32K("moonshot-v1-32k"), |
220 | 244 | MOONSHOT_V1_128K("moonshot-v1-128k"); |
|
0 commit comments