actions
diff --git a/‎__tests__/inference.test.ts‎
Lines changed: 230 additions & 0 deletions b/‎__tests__/inference.test.ts‎
Lines changed: 230 additions & 0 deletions
diff --git a/‎dist/index.js‎
Lines changed: 24 additions & 4 deletions b/‎dist/index.js‎
Lines changed: 24 additions & 4 deletions
diff --git a/‎dist/index.js.map‎
Lines changed: 1 addition & 1 deletion b/‎dist/index.js.map‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/inference.ts‎
Lines changed: 26 additions & 4 deletions b/‎src/inference.ts‎
Lines changed: 26 additions & 4 deletions
@@ -95,6 +95,49 @@ describe('inference.ts', () => {
       expect(result).toBeNull()
       expect(core.info).toHaveBeenCalledWith('Model response: No response content')
     })
+
+    it('includes response format when specified', async () => {
+      const requestWithResponseFormat = {
+        ...mockRequest,
+        responseFormat: {
+          type: 'json_schema' as const,
+          json_schema: {type: 'object'},
+        },
+      }
+
+      const mockResponse = {
+        choices: [
+          {
+            message: {
+              content: '{"result": "success"}',
+            },
+          },
+        ],
+      }
+
+      mockCreate.mockResolvedValue(mockResponse)
+
+      const result = await simpleInference(requestWithResponseFormat)
+
+      expect(result).toBe('{"result": "success"}')
+
+      // Verify response format was included in the request
+      expect(mockCreate).toHaveBeenCalledWith({
+        messages: [
+          {
+            role: 'system',
+            content: 'You are a test assistant',
+          },
+          {
+            role: 'user',
+            content: 'Hello, AI!',
+          },
+        ],
+        max_tokens: 100,
+        model: 'gpt-4',
+        response_format: requestWithResponseFormat.responseFormat,
+      })
+    })
   })
 
   describe('mcpInference', () => {
@@ -140,6 +183,7 @@ describe('inference.ts', () => {
       // eslint-disable-next-line @typescript-eslint/no-explicit-any
       const callArgs = mockCreate.mock.calls[0][0] as any
       expect(callArgs.tools).toEqual(mockMcpClient.tools)
+      expect(callArgs.response_format).toBeUndefined()
       expect(callArgs.model).toBe('gpt-4')
       expect(callArgs.max_tokens).toBe(100)
     })
@@ -315,5 +359,191 @@ describe('inference.ts', () => {
 
       expect(result).toBe('Second message')
     })
+
+    it('makes additional loop with response format when no tool calls are made', async () => {
+      const requestWithResponseFormat = {
+        ...mockRequest,
+        responseFormat: {
+          type: 'json_schema' as const,
+          json_schema: {type: 'object'},
+        },
+      }
+
+      // First response without tool calls
+      const firstResponse = {
+        choices: [
+          {
+            message: {
+              content: 'First response',
+              tool_calls: null,
+            },
+          },
+        ],
+      }
+
+      // Second response with response format applied
+      const secondResponse = {
+        choices: [
+          {
+            message: {
+              content: '{"result": "formatted response"}',
+              tool_calls: null,
+            },
+          },
+        ],
+      }
+
+      mockCreate.mockResolvedValueOnce(firstResponse).mockResolvedValueOnce(secondResponse)
+
+      const result = await mcpInference(requestWithResponseFormat, mockMcpClient)
+
+      expect(result).toBe('{"result": "formatted response"}')
+      expect(mockCreate).toHaveBeenCalledTimes(2)
+      expect(core.info).toHaveBeenCalledWith('Making one more MCP loop with the requested response format...')
+
+      // First call should have tools but no response format
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      const firstCall = mockCreate.mock.calls[0][0] as any
+      expect(firstCall.tools).toEqual(mockMcpClient.tools)
+      expect(firstCall.response_format).toBeUndefined()
+
+      // Second call should have response format but no tools
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      const secondCall = mockCreate.mock.calls[1][0] as any
+      expect(secondCall.tools).toBeUndefined()
+      expect(secondCall.response_format).toEqual(requestWithResponseFormat.responseFormat)
+
+      // Second call should include the user message requesting JSON format
+      expect(secondCall.messages).toHaveLength(5) // system, user, assistant, user, assistant
+      expect(secondCall.messages[3].role).toBe('user')
+      expect(secondCall.messages[3].content).toContain('Please provide your response in the exact')
+    })
+
+    it('uses response format only on final iteration after tool calls', async () => {
+      const requestWithResponseFormat = {
+        ...mockRequest,
+        responseFormat: {
+          type: 'json_schema' as const,
+          json_schema: {type: 'object'},
+        },
+      }
+
+      const toolCalls = [
+        {
+          id: 'call-123',
+          function: {
+            name: 'test-tool',
+            arguments: '{"param": "value"}',
+          },
+        },
+      ]
+
+      const toolResults = [
+        {
+          tool_call_id: 'call-123',
+          role: 'tool',
+          name: 'test-tool',
+          content: 'Tool result',
+        },
+      ]
+
+      // First response with tool calls
+      const firstResponse = {
+        choices: [
+          {
+            message: {
+              content: 'Using tool',
+              tool_calls: toolCalls,
+            },
+          },
+        ],
+      }
+
+      // Second response without tool calls, but should trigger final message loop
+      const secondResponse = {
+        choices: [
+          {
+            message: {
+              content: 'Intermediate result',
+              tool_calls: null,
+            },
+          },
+        ],
+      }
+
+      // Third response with response format
+      const thirdResponse = {
+        choices: [
+          {
+            message: {
+              content: '{"final": "result"}',
+              tool_calls: null,
+            },
+          },
+        ],
+      }
+
+      mockCreate
+        .mockResolvedValueOnce(firstResponse)
+        .mockResolvedValueOnce(secondResponse)
+        .mockResolvedValueOnce(thirdResponse)
+
+      mockExecuteToolCalls.mockResolvedValue(toolResults)
+
+      const result = await mcpInference(requestWithResponseFormat, mockMcpClient)
+
+      expect(result).toBe('{"final": "result"}')
+      expect(mockCreate).toHaveBeenCalledTimes(3)
+
+      // First call: tools but no response format
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      const firstCall = mockCreate.mock.calls[0][0] as any
+      expect(firstCall.tools).toEqual(mockMcpClient.tools)
+      expect(firstCall.response_format).toBeUndefined()
+
+      // Second call: tools but no response format (after tool execution)
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      const secondCall = mockCreate.mock.calls[1][0] as any
+      expect(secondCall.tools).toEqual(mockMcpClient.tools)
+      expect(secondCall.response_format).toBeUndefined()
+
+      // Third call: response format but no tools (final message)
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      const thirdCall = mockCreate.mock.calls[2][0] as any
+      expect(thirdCall.tools).toBeUndefined()
+      expect(thirdCall.response_format).toEqual(requestWithResponseFormat.responseFormat)
+    })
+
+    it('returns immediately when response format is set and finalMessage is already true', async () => {
+      const requestWithResponseFormat = {
+        ...mockRequest,
+        responseFormat: {
+          type: 'json_schema' as const,
+          json_schema: {type: 'object'},
+        },
+      }
+
+      // Response without tool calls on what would be the final message iteration
+      const mockResponse = {
+        choices: [
+          {
+            message: {
+              content: '{"immediate": "result"}',
+              tool_calls: null,
+            },
+          },
+        ],
+      }
+
+      mockCreate.mockResolvedValue(mockResponse)
+
+      // We need to test a scenario where finalMessage would already be true
+      // This happens when we're already in the final iteration
+      const result = await mcpInference(requestWithResponseFormat, mockMcpClient)
+
+      // The function should make two calls: one normal, then one with response format
+      expect(mockCreate).toHaveBeenCalledTimes(2)
+      expect(result).toBe('{"immediate": "result"}')
+    })
   })
 })
@@ -89,6 +89,9 @@ export async function mcpInference(
 
   let iterationCount = 0
   const maxIterations = 5 // Prevent infinite loops
+  // We want to use response_format (e.g. JSON) on the last iteration only, so the model can output
+  // the final result in the expected format without interfering with tool calls
+  let finalMessage = false
 
   while (iterationCount < maxIterations) {
     iterationCount++
@@ -98,13 +101,14 @@ export async function mcpInference(
       messages: messages as OpenAI.Chat.Completions.ChatCompletionMessageParam[],
       max_tokens: request.maxTokens,
       model: request.modelName,
-      tools: githubMcpClient.tools as OpenAI.Chat.Completions.ChatCompletionTool[],
     }
 
-    // Add response format if specified (only on first iteration to avoid conflicts)
-    if (iterationCount === 1 && request.responseFormat) {
+    // Add response format if specified (only on final iteration to avoid conflicts with tool calls)
+    if (finalMessage && request.responseFormat) {
       // eslint-disable-next-line @typescript-eslint/no-explicit-any
       chatCompletionRequest.response_format = request.responseFormat as any
+    } else {
+      chatCompletionRequest.tools = githubMcpClient.tools as OpenAI.Chat.Completions.ChatCompletionTool[]
     }
 
     try {
@@ -128,7 +132,25 @@ export async function mcpInference(
 
       if (!toolCalls || toolCalls.length === 0) {
         core.info('No tool calls requested, ending GitHub MCP inference loop')
-        return modelResponse || null
+
+        // If we have a response format set and we haven't explicitly run one final message iteration,
+        // do another loop with the response format set
+        if (request.responseFormat && !finalMessage) {
+          core.info('Making one more MCP loop with the requested response format...')
+
+          // Add a user message requesting JSON format and try again
+          messages.push({
+            role: 'user',
+            content: `Please provide your response in the exact ${request.responseFormat.type} format specified.`,
+          })
+
+          finalMessage = true
+
+          // Continue the loop to get a properly formatted response
+          continue
+        } else {
+          return modelResponse || null
+        }
       }
 
       core.info(`Model requested ${toolCalls.length} tool calls`)