Revert PR #7188 - Restore temperature parameter to fix TabbyApi/ExLlamaV2 crashes (#7594)

daniel-lxs · web-flow · commit 0ce4e891fd8d · 2025-09-08T10:30:02.000-04:00
diff --git a/src/api/providers/__tests__/chutes.spec.ts b/src/api/providers/__tests__/chutes.spec.ts
@@ -409,6 +409,10 @@ describe("ChutesHandler", () => {
 						content: `${systemPrompt}\n${messages[0].content}`,
 					},
 				],
+				max_tokens: 32768,
+				temperature: 0.6,
+				stream: true,
+				stream_options: { include_usage: true },
 			}),
 		)
 	})
@@ -438,6 +442,7 @@ describe("ChutesHandler", () => {
 			expect.objectContaining({
 				model: modelId,
 				max_tokens: modelInfo.maxTokens,
+				temperature: 0.5,
 				messages: expect.arrayContaining([{ role: "system", content: systemPrompt }]),
 				stream: true,
 				stream_options: { include_usage: true },
diff --git a/src/api/providers/__tests__/fireworks.spec.ts b/src/api/providers/__tests__/fireworks.spec.ts
@@ -373,6 +373,7 @@ describe("FireworksHandler", () => {
 			expect.objectContaining({
 				model: modelId,
 				max_tokens: modelInfo.maxTokens,
+				temperature: 0.5,
 				messages: expect.arrayContaining([{ role: "system", content: systemPrompt }]),
 				stream: true,
 				stream_options: { include_usage: true },
diff --git a/src/api/providers/__tests__/groq.spec.ts b/src/api/providers/__tests__/groq.spec.ts
@@ -160,11 +160,7 @@ describe("GroqHandler", () => {
 	it("createMessage should pass correct parameters to Groq client", async () => {
 		const modelId: GroqModelId = "llama-3.1-8b-instant"
 		const modelInfo = groqModels[modelId]
-		const handlerWithModel = new GroqHandler({
-			apiModelId: modelId,
-			groqApiKey: "test-groq-api-key",
-			modelTemperature: 0.5, // Explicitly set temperature for this test
-		})
+		const handlerWithModel = new GroqHandler({ apiModelId: modelId, groqApiKey: "test-groq-api-key" })
 
 		mockCreate.mockImplementationOnce(() => {
 			return {
@@ -194,77 +190,4 @@ describe("GroqHandler", () => {
 			undefined,
 		)
 	})
-
-	it("should omit temperature when modelTemperature is undefined", async () => {
-		const modelId: GroqModelId = "llama-3.1-8b-instant"
-		const handlerWithoutTemp = new GroqHandler({
-			apiModelId: modelId,
-			groqApiKey: "test-groq-api-key",
-			// modelTemperature is not set
-		})
-
-		mockCreate.mockImplementationOnce(() => {
-			return {
-				[Symbol.asyncIterator]: () => ({
-					async next() {
-						return { done: true }
-					},
-				}),
-			}
-		})
-
-		const systemPrompt = "Test system prompt"
-		const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Test message" }]
-
-		const messageGenerator = handlerWithoutTemp.createMessage(systemPrompt, messages)
-		await messageGenerator.next()
-
-		expect(mockCreate).toHaveBeenCalledWith(
-			expect.objectContaining({
-				model: modelId,
-				messages: expect.arrayContaining([{ role: "system", content: systemPrompt }]),
-				stream: true,
-			}),
-			undefined,
-		)
-
-		// Verify temperature is NOT included
-		const callArgs = mockCreate.mock.calls[0][0]
-		expect(callArgs).not.toHaveProperty("temperature")
-	})
-
-	it("should include temperature when modelTemperature is explicitly set", async () => {
-		const modelId: GroqModelId = "llama-3.1-8b-instant"
-		const handlerWithTemp = new GroqHandler({
-			apiModelId: modelId,
-			groqApiKey: "test-groq-api-key",
-			modelTemperature: 0.7,
-		})
-
-		mockCreate.mockImplementationOnce(() => {
-			return {
-				[Symbol.asyncIterator]: () => ({
-					async next() {
-						return { done: true }
-					},
-				}),
-			}
-		})
-
-		const systemPrompt = "Test system prompt"
-		const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Test message" }]
-
-		const messageGenerator = handlerWithTemp.createMessage(systemPrompt, messages)
-		await messageGenerator.next()
-
-		expect(mockCreate).toHaveBeenCalledWith(
-			expect.objectContaining({
-				model: modelId,
-				temperature: 0.7,
-				messages: expect.arrayContaining([{ role: "system", content: systemPrompt }]),
-				stream: true,
-			}),
-			undefined,
-		)
-	})
 })
diff --git a/src/api/providers/__tests__/openai.spec.ts b/src/api/providers/__tests__/openai.spec.ts
@@ -315,71 +315,6 @@ describe("OpenAiHandler", () => {
 			const callArgs = mockCreate.mock.calls[0][0]
 			expect(callArgs.max_completion_tokens).toBe(4096)
 		})
-
-		it("should omit temperature when modelTemperature is undefined", async () => {
-			const optionsWithoutTemperature: ApiHandlerOptions = {
-				...mockOptions,
-				// modelTemperature is not set, should not include temperature
-			}
-			const handlerWithoutTemperature = new OpenAiHandler(optionsWithoutTemperature)
-			const stream = handlerWithoutTemperature.createMessage(systemPrompt, messages)
-			// Consume the stream to trigger the API call
-			for await (const _chunk of stream) {
-			}
-			// Assert the mockCreate was called without temperature
-			expect(mockCreate).toHaveBeenCalled()
-			const callArgs = mockCreate.mock.calls[0][0]
-			expect(callArgs).not.toHaveProperty("temperature")
-		})
-
-		it("should include temperature when modelTemperature is explicitly set to 0", async () => {
-			const optionsWithZeroTemperature: ApiHandlerOptions = {
-				...mockOptions,
-				modelTemperature: 0,
-			}
-			const handlerWithZeroTemperature = new OpenAiHandler(optionsWithZeroTemperature)
-			const stream = handlerWithZeroTemperature.createMessage(systemPrompt, messages)
-			// Consume the stream to trigger the API call
-			for await (const _chunk of stream) {
-			}
-			// Assert the mockCreate was called with temperature: 0
-			expect(mockCreate).toHaveBeenCalled()
-			const callArgs = mockCreate.mock.calls[0][0]
-			expect(callArgs.temperature).toBe(0)
-		})
-
-		it("should include temperature when modelTemperature is set to a non-zero value", async () => {
-			const optionsWithCustomTemperature: ApiHandlerOptions = {
-				...mockOptions,
-				modelTemperature: 0.7,
-			}
-			const handlerWithCustomTemperature = new OpenAiHandler(optionsWithCustomTemperature)
-			const stream = handlerWithCustomTemperature.createMessage(systemPrompt, messages)
-			// Consume the stream to trigger the API call
-			for await (const _chunk of stream) {
-			}
-			// Assert the mockCreate was called with temperature: 0.7
-			expect(mockCreate).toHaveBeenCalled()
-			const callArgs = mockCreate.mock.calls[0][0]
-			expect(callArgs.temperature).toBe(0.7)
-		})
-
-		it("should include DEEP_SEEK_DEFAULT_TEMPERATURE for deepseek-reasoner models when temperature is not set", async () => {
-			const deepseekOptions: ApiHandlerOptions = {
-				...mockOptions,
-				openAiModelId: "deepseek-reasoner",
-				// modelTemperature is not set
-			}
-			const deepseekHandler = new OpenAiHandler(deepseekOptions)
-			const stream = deepseekHandler.createMessage(systemPrompt, messages)
-			// Consume the stream to trigger the API call
-			for await (const _chunk of stream) {
-			}
-			// Assert the mockCreate was called with DEEP_SEEK_DEFAULT_TEMPERATURE (0.6)
-			expect(mockCreate).toHaveBeenCalled()
-			const callArgs = mockCreate.mock.calls[0][0]
-			expect(callArgs.temperature).toBe(0.6)
-		})
 	})
 
 	describe("error handling", () => {
@@ -515,7 +450,7 @@ describe("OpenAiHandler", () => {
 					],
 					stream: true,
 					stream_options: { include_usage: true },
-					// temperature should be omitted when not set
+					temperature: 0,
 				},
 				{ path: "/models/chat/completions" },
 			)
diff --git a/src/api/providers/__tests__/roo.spec.ts b/src/api/providers/__tests__/roo.spec.ts
@@ -354,16 +354,16 @@ describe("RooHandler", () => {
 	})
 
 	describe("temperature and model configuration", () => {
-		it("should omit temperature when not explicitly set", async () => {
+		it("should use default temperature of 0.7", async () => {
 			handler = new RooHandler(mockOptions)
 			const stream = handler.createMessage(systemPrompt, messages)
 			for await (const _chunk of stream) {
 				// Consume stream
 			}
 
 			expect(mockCreate).toHaveBeenCalledWith(
-				expect.not.objectContaining({
-					temperature: expect.anything(),
+				expect.objectContaining({
+					temperature: 0.7,
 				}),
 				undefined,
 			)
diff --git a/src/api/providers/__tests__/sambanova.spec.ts b/src/api/providers/__tests__/sambanova.spec.ts
@@ -144,6 +144,7 @@ describe("SambaNovaHandler", () => {
 			expect.objectContaining({
 				model: modelId,
 				max_tokens: modelInfo.maxTokens,
+				temperature: 0.7,
 				messages: expect.arrayContaining([{ role: "system", content: systemPrompt }]),
 				stream: true,
 				stream_options: { include_usage: true },
diff --git a/src/api/providers/__tests__/zai.spec.ts b/src/api/providers/__tests__/zai.spec.ts
@@ -220,6 +220,7 @@ describe("ZAiHandler", () => {
 				expect.objectContaining({
 					model: modelId,
 					max_tokens: modelInfo.maxTokens,
+					temperature: ZAI_DEFAULT_TEMPERATURE,
 					messages: expect.arrayContaining([{ role: "system", content: systemPrompt }]),
 					stream: true,
 					stream_options: { include_usage: true },
diff --git a/src/api/providers/base-openai-compatible-provider.ts b/src/api/providers/base-openai-compatible-provider.ts
@@ -74,19 +74,17 @@ export abstract class BaseOpenAiCompatibleProvider<ModelName extends string>
 			info: { maxTokens: max_tokens },
 		} = this.getModel()
 
+		const temperature = this.options.modelTemperature ?? this.defaultTemperature
+
 		const params: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {
 			model,
 			max_tokens,
+			temperature,
 			messages: [{ role: "system", content: systemPrompt }, ...convertToOpenAiMessages(messages)],
 			stream: true,
 			stream_options: { include_usage: true },
 		}
 
-		// Only include temperature if explicitly set
-		if (this.options.modelTemperature !== undefined) {
-			params.temperature = this.options.modelTemperature
-		}
-
 		try {
 			return this.client.chat.completions.create(params, requestOptions)
 		} catch (error) {
diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts
@@ -159,20 +159,13 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 
 			const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {
 				model: modelId,
+				temperature: this.options.modelTemperature ?? (deepseekReasoner ? DEEP_SEEK_DEFAULT_TEMPERATURE : 0),
 				messages: convertedMessages,
 				stream: true as const,
 				...(isGrokXAI ? {} : { stream_options: { include_usage: true } }),
 				...(reasoning && reasoning),
 			}
 
-			// Only include temperature if explicitly set
-			if (this.options.modelTemperature !== undefined) {
-				requestOptions.temperature = this.options.modelTemperature
-			} else if (deepseekReasoner) {
-				// DeepSeek Reasoner has a specific default temperature
-				requestOptions.temperature = DEEP_SEEK_DEFAULT_TEMPERATURE
-			}
-
 			// Add max_tokens if needed
 			this.addMaxTokensIfNeeded(requestOptions, modelInfo)