We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
2 parents 6de8dea + 8e6071f commit f3d6e99Copy full SHA for f3d6e99
relay/channel/aws/constants.go
@@ -3,7 +3,7 @@ package aws
3
import "strings"
4
5
var awsModelIDMap = map[string]string{
6
- "claude-3-sonnet-20240229": "anthropic.claude-3-sonnet-20240229-v1:0",
+ "claude-3-sonnet-20240229": "anthropic.claude-3-sonnet-20240229-v1:0",
7
"claude-3-opus-20240229": "anthropic.claude-3-opus-20240229-v1:0",
8
"claude-3-haiku-20240307": "anthropic.claude-3-haiku-20240307-v1:0",
9
"claude-3-5-sonnet-20240620": "anthropic.claude-3-5-sonnet-20240620-v1:0",
@@ -16,6 +16,7 @@ var awsModelIDMap = map[string]string{
16
"claude-sonnet-4-5-20250929": "anthropic.claude-sonnet-4-5-20250929-v1:0",
17
"claude-haiku-4-5-20251001": "anthropic.claude-haiku-4-5-20251001-v1:0",
18
"claude-opus-4-5-20251101": "anthropic.claude-opus-4-5-20251101-v1:0",
19
+ "claude-opus-4-6": "anthropic.claude-opus-4-6-v1",
20
// Nova models
21
"nova-micro-v1:0": "amazon.nova-micro-v1:0",
22
"nova-lite-v1:0": "amazon.nova-lite-v1:0",
@@ -79,6 +80,11 @@ var awsModelCanCrossRegionMap = map[string]map[string]bool{
79
80
"ap": true,
81
"eu": true,
82
},
83
+ "anthropic.claude-opus-4-6-v1": {
84
+ "us": true,
85
+ "ap": true,
86
+ "eu": true,
87
+ },
88
"anthropic.claude-haiku-4-5-20251001-v1:0": {
89
"us": true,
90
relay/channel/aws/dto.go
@@ -26,6 +26,7 @@ type AwsClaudeRequest struct {
26
Tools any `json:"tools,omitempty"`
27
ToolChoice any `json:"tool_choice,omitempty"`
28
Thinking *dto.Thinking `json:"thinking,omitempty"`
29
+ OutputConfig json.RawMessage `json:"output_config,omitempty"`
30
}
31
32
func formatRequest(requestBody io.Reader, requestHeader http.Header) (*AwsClaudeRequest, error) {
relay/channel/claude/constants.go
@@ -20,6 +20,11 @@ var ModelList = []string{
"claude-sonnet-4-5-20250929-thinking",
"claude-opus-4-5-20251101",
"claude-opus-4-5-20251101-thinking",
23
+ "claude-opus-4-6",
24
+ "claude-opus-4-6-max",
25
+ "claude-opus-4-6-high",
+ "claude-opus-4-6-medium",
+ "claude-opus-4-6-low",
var ChannelName = "claude"
relay/channel/claude/relay-claude.go
@@ -17,6 +17,7 @@ import (
"github.com/QuantumNous/new-api/relay/reasonmap"
"github.com/QuantumNous/new-api/service"
"github.com/QuantumNous/new-api/setting/model_setting"
+ "github.com/QuantumNous/new-api/setting/reasoning"
"github.com/QuantumNous/new-api/types"
"github.com/gin-gonic/gin"
@@ -141,7 +142,16 @@ func RequestOpenAI2ClaudeMessage(c *gin.Context, textRequest dto.GeneralOpenAIRe
141
142
claudeRequest.MaxTokens = uint(model_setting.GetClaudeSettings().GetDefaultMaxTokens(textRequest.Model))
143
144
- if model_setting.GetClaudeSettings().ThinkingAdapterEnabled &&
145
+ if baseModel, effortLevel, ok := reasoning.TrimEffortSuffix(textRequest.Model); ok && effortLevel != "" &&
146
+ strings.HasPrefix(textRequest.Model, "claude-opus-4-6") {
147
+ claudeRequest.Model = baseModel
148
+ claudeRequest.Thinking = &dto.Thinking{
149
+ Type: "adaptive",
150
+ }
151
+ claudeRequest.OutputConfig = json.RawMessage(fmt.Sprintf(`{"effort":"%s"}`, effortLevel))
152
+ claudeRequest.TopP = 0
153
+ claudeRequest.Temperature = common.GetPointer[float64](1.0)
154
+ } else if model_setting.GetClaudeSettings().ThinkingAdapterEnabled &&
155
strings.HasSuffix(textRequest.Model, "-thinking") {
156
157
// 因为BudgetTokens 必须大于1024
relay/channel/vertex/adaptor.go
@@ -42,6 +42,7 @@ var claudeModelMap = map[string]string{
42
"claude-sonnet-4-5-20250929": "claude-sonnet-4-5@20250929",
43
"claude-haiku-4-5-20251001": "claude-haiku-4-5@20251001",
44
"claude-opus-4-5-20251101": "claude-opus-4-5@20251101",
45
+ "claude-opus-4-6": "claude-opus-4-6",
46
47
48
const anthropicVersion = "vertex-2023-10-16"
relay/channel/vertex/dto.go
@@ -1,6 +1,8 @@
1
package vertex
2
import (
+ "encoding/json"
+
"github.com/QuantumNous/new-api/dto"
)
@@ -17,6 +19,7 @@ type VertexAIClaudeRequest struct {
func copyRequest(req *dto.ClaudeRequest, version string) *VertexAIClaudeRequest {
@@ -33,5 +36,6 @@ func copyRequest(req *dto.ClaudeRequest, version string) *VertexAIClaudeRequest
33
36
Tools: req.Tools,
34
37
ToolChoice: req.ToolChoice,
35
38
Thinking: req.Thinking,
39
+ OutputConfig: req.OutputConfig,
40
41
relay/claude_handler.go
@@ -2,6 +2,7 @@ package relay
"bytes"
"fmt"
"io"
"net/http"
@@ -14,6 +15,7 @@ import (
14
15
"github.com/QuantumNous/new-api/relay/helper"
@@ -49,7 +51,17 @@ func ClaudeHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *typ
49
51
request.MaxTokens = uint(model_setting.GetClaudeSettings().GetDefaultMaxTokens(request.Model))
50
52
53
54
+ if baseModel, effortLevel, ok := reasoning.TrimEffortSuffix(request.Model); ok && effortLevel != "" &&
55
+ strings.HasPrefix(request.Model, "claude-opus-4-6") {
56
+ request.Model = baseModel
57
+ request.Thinking = &dto.Thinking{
58
59
60
+ request.OutputConfig = json.RawMessage(fmt.Sprintf(`{"effort":"%s"}`, effortLevel))
61
+ request.TopP = 0
62
+ request.Temperature = common.GetPointer[float64](1.0)
63
+ info.UpstreamModelName = request.Model
64
65
strings.HasSuffix(request.Model, "-thinking") {
66
if request.Thinking == nil {
67
setting/ratio_setting/cache_ratio.go
@@ -60,6 +60,12 @@ var defaultCacheRatio = map[string]float64{
"claude-sonnet-4-5-20250929-thinking": 0.1,
"claude-opus-4-5-20251101": 0.1,
"claude-opus-4-5-20251101-thinking": 0.1,
+ "claude-opus-4-6": 0.1,
+ "claude-opus-4-6-thinking": 0.1,
+ "claude-opus-4-6-max": 0.1,
+ "claude-opus-4-6-high": 0.1,
+ "claude-opus-4-6-medium": 0.1,
68
+ "claude-opus-4-6-low": 0.1,
69
70
71
var defaultCreateCacheRatio = map[string]float64{
@@ -82,6 +88,12 @@ var defaultCreateCacheRatio = map[string]float64{
"claude-sonnet-4-5-20250929-thinking": 1.25,
"claude-opus-4-5-20251101": 1.25,
"claude-opus-4-5-20251101-thinking": 1.25,
91
+ "claude-opus-4-6": 1.25,
92
+ "claude-opus-4-6-thinking": 1.25,
93
+ "claude-opus-4-6-max": 1.25,
94
+ "claude-opus-4-6-high": 1.25,
95
+ "claude-opus-4-6-medium": 1.25,
96
+ "claude-opus-4-6-low": 1.25,
97
98
99
//var defaultCreateCacheRatio = map[string]float64{}
setting/ratio_setting/model_ratio.go
@@ -142,6 +142,11 @@ var defaultModelRatio = map[string]float64{
"claude-sonnet-4-20250514": 1.5,
"claude-sonnet-4-5-20250929": 1.5,
"claude-opus-4-5-20251101": 2.5,
+ "claude-opus-4-6": 2.5,
+ "claude-opus-4-6-max": 2.5,
+ "claude-opus-4-6-high": 2.5,
+ "claude-opus-4-6-medium": 2.5,
+ "claude-opus-4-6-low": 2.5,
"claude-3-opus-20240229": 7.5, // $15 / 1M tokens
"claude-opus-4-20250514": 7.5,
"claude-opus-4-1-20250805": 7.5,
setting/reasoning/suffix.go
@@ -6,7 +6,7 @@ import (
"github.com/samber/lo"
-var EffortSuffixes = []string{"-high", "-medium", "-low", "-minimal"}
+var EffortSuffixes = []string{"-max", "-high", "-medium", "-low", "-minimal"}
10
11
// TrimEffortSuffix -> modelName level(low) exists
12
func TrimEffortSuffix(modelName string) (string, string, bool) {
0 commit comments