@@ -7,12 +7,19 @@ import {
77 type MainlandZAiModelId ,
88 ZAI_DEFAULT_TEMPERATURE ,
99} from "@roo-code/types"
10+ import { Anthropic } from "@anthropic-ai/sdk"
11+ import OpenAI from "openai"
1012
1113import type { ApiHandlerOptions } from "../../shared/api"
14+ import { ApiStream } from "../transform/stream"
15+ import { convertToOpenAiMessages } from "../transform/openai-format"
16+ import type { ApiHandlerCreateMessageMetadata } from "../index"
1217
1318import { BaseOpenAiCompatibleProvider } from "./base-openai-compatible-provider"
1419
1520export class ZAiHandler extends BaseOpenAiCompatibleProvider < InternationalZAiModelId | MainlandZAiModelId > {
21+ private readonly isGLM45 : boolean
22+
1623 constructor ( options : ApiHandlerOptions ) {
1724 const isChina = options . zaiApiLine === "china"
1825 const models = isChina ? mainlandZAiModels : internationalZAiModels
@@ -27,5 +34,187 @@ export class ZAiHandler extends BaseOpenAiCompatibleProvider<InternationalZAiMod
2734 providerModels : models ,
2835 defaultTemperature : ZAI_DEFAULT_TEMPERATURE ,
2936 } )
37+
38+ // Check if the model is GLM-4.5 or GLM-4.5-Air
39+ const modelId = options . apiModelId || defaultModelId
40+ this . isGLM45 = modelId . includes ( "glm-4.5" )
41+ }
42+
43+ /**
44+ * Override createMessage to add GLM-specific handling
45+ */
46+ override async * createMessage (
47+ systemPrompt : string ,
48+ messages : Anthropic . Messages . MessageParam [ ] ,
49+ metadata ?: ApiHandlerCreateMessageMetadata ,
50+ ) : ApiStream {
51+ // For GLM-4.5 models, enhance the system prompt with clearer instructions
52+ let enhancedSystemPrompt = systemPrompt
53+
54+ if ( this . isGLM45 ) {
55+ // Add GLM-specific instructions to prevent hallucination and improve tool understanding
56+ const glmInstructions = `
57+
58+ # CRITICAL INSTRUCTIONS FOR GLM MODEL
59+
60+ ## File and Code Awareness
61+ - NEVER assume or hallucinate files that don't exist. Always verify file existence using the provided tools.
62+ - When exploring code, ALWAYS use the available tools (read_file, list_files, search_files) to examine actual files.
63+ - If you're unsure about a file's existence or location, use list_files to explore the directory structure first.
64+ - Base all code analysis and modifications on actual file contents retrieved through tools, not assumptions.
65+
66+ ## Tool Usage Protocol
67+ - Tools are invoked using XML-style tags as shown in the examples.
68+ - Each tool invocation must be properly formatted with the exact tool name as the XML tag.
69+ - Wait for tool execution results before proceeding to the next step.
70+ - Never simulate or imagine tool outputs - always use actual results.
71+
72+ ## Content Management
73+ - When working with large files or responses, focus on the specific sections relevant to the task.
74+ - Use partial reads when available to efficiently handle large files.
75+ - Condense and summarize appropriately while maintaining accuracy.
76+ - Keep responses concise and within token limits by focusing on essential information.
77+
78+ ## Code Indexing Integration
79+ - The code index provides semantic understanding of the codebase.
80+ - Use codebase_search for initial exploration when available.
81+ - Combine index results with actual file reading for complete understanding.
82+ - Trust the index for finding relevant code patterns and implementations.`
83+
84+ enhancedSystemPrompt = systemPrompt + glmInstructions
85+ }
86+
87+ const {
88+ id : model ,
89+ info : { maxTokens : max_tokens } ,
90+ } = this . getModel ( )
91+
92+ const temperature = this . options . modelTemperature ?? this . defaultTemperature
93+
94+ // For GLM models, we may need to adjust the max_tokens to leave room for proper responses
95+ // GLM models sometimes struggle with very high token limits
96+ const adjustedMaxTokens = this . isGLM45 && max_tokens ? Math . min ( max_tokens , 32768 ) : max_tokens
97+
98+ const params : OpenAI . Chat . Completions . ChatCompletionCreateParamsStreaming = {
99+ model,
100+ max_tokens : adjustedMaxTokens || 32768 ,
101+ temperature,
102+ messages : [
103+ { role : "system" , content : enhancedSystemPrompt } ,
104+ ...this . preprocessMessages ( convertToOpenAiMessages ( messages ) ) ,
105+ ] ,
106+ stream : true ,
107+ stream_options : { include_usage : true } ,
108+ }
109+
110+ // Add additional parameters for GLM models to improve response quality
111+ if ( this . isGLM45 ) {
112+ // GLM models benefit from explicit top_p and frequency_penalty settings
113+ Object . assign ( params , {
114+ top_p : 0.95 ,
115+ frequency_penalty : 0.1 ,
116+ presence_penalty : 0.1 ,
117+ } )
118+ }
119+
120+ const stream = await this . client . chat . completions . create ( params )
121+
122+ for await ( const chunk of stream ) {
123+ const delta = chunk . choices [ 0 ] ?. delta
124+
125+ if ( delta ?. content ) {
126+ yield {
127+ type : "text" ,
128+ text : delta . content ,
129+ }
130+ }
131+
132+ if ( chunk . usage ) {
133+ yield {
134+ type : "usage" ,
135+ inputTokens : chunk . usage . prompt_tokens || 0 ,
136+ outputTokens : chunk . usage . completion_tokens || 0 ,
137+ }
138+ }
139+ }
140+ }
141+
142+ /**
143+ * Preprocess messages for GLM models to ensure better understanding
144+ */
145+ private preprocessMessages (
146+ messages : OpenAI . Chat . ChatCompletionMessageParam [ ] ,
147+ ) : OpenAI . Chat . ChatCompletionMessageParam [ ] {
148+ if ( ! this . isGLM45 ) {
149+ return messages
150+ }
151+
152+ // For GLM models, ensure tool-related messages are clearly formatted
153+ return messages . map ( ( msg ) => {
154+ if ( msg . role === "assistant" && typeof msg . content === "string" ) {
155+ // Ensure XML tags in assistant messages are properly formatted
156+ // GLM models sometimes struggle with complex XML structures
157+ const content = msg . content
158+ . replace ( / ( < \/ ? [ ^ > ] + > ) / g, "\n$1\n" ) // Add newlines around XML tags
159+ . replace ( / \n \n + / g, "\n" ) // Remove excessive newlines
160+ . trim ( )
161+
162+ return { ...msg , content }
163+ }
164+
165+ if ( msg . role === "user" && Array . isArray ( msg . content ) ) {
166+ // For user messages with multiple content blocks, ensure text is clear
167+ const processedContent = msg . content . map ( ( block : any ) => {
168+ if ( block . type === "text" ) {
169+ // Add clear markers for tool results to help GLM understand context
170+ if ( block . text . includes ( "[ERROR]" ) || block . text . includes ( "Error:" ) ) {
171+ return {
172+ ...block ,
173+ text : `[TOOL EXECUTION RESULT - ERROR]\n${ block . text } \n[END TOOL RESULT]` ,
174+ }
175+ } else if ( block . text . includes ( "Success:" ) || block . text . includes ( "successfully" ) ) {
176+ return {
177+ ...block ,
178+ text : `[TOOL EXECUTION RESULT - SUCCESS]\n${ block . text } \n[END TOOL RESULT]` ,
179+ }
180+ }
181+ }
182+ return block
183+ } )
184+
185+ return { ...msg , content : processedContent }
186+ }
187+
188+ return msg
189+ } )
190+ }
191+
192+ /**
193+ * Override completePrompt for better GLM handling
194+ */
195+ override async completePrompt ( prompt : string ) : Promise < string > {
196+ const { id : modelId } = this . getModel ( )
197+
198+ try {
199+ // For GLM models, add a clear instruction prefix
200+ const enhancedPrompt = this . isGLM45
201+ ? `[INSTRUCTION] Please provide a direct and accurate response based on facts. Do not hallucinate or make assumptions.\n\n${ prompt } `
202+ : prompt
203+
204+ const response = await this . client . chat . completions . create ( {
205+ model : modelId ,
206+ messages : [ { role : "user" , content : enhancedPrompt } ] ,
207+ temperature : this . defaultTemperature ,
208+ max_tokens : 4096 ,
209+ } )
210+
211+ return response . choices [ 0 ] ?. message . content || ""
212+ } catch ( error ) {
213+ if ( error instanceof Error ) {
214+ throw new Error ( `${ this . providerName } completion error: ${ error . message } ` )
215+ }
216+
217+ throw error
218+ }
30219 }
31220}
0 commit comments