Merge pull request #288 from shivasurya/shiva/grok-integ

shivasurya · web-flow · commit 6ab5a0575190 · 2025-09-28T23:00:05.000-04:00
feat: add Grok-4 AI model support with 2M context window and OpenAI-compatible API client
diff --git a/extension/secureflow/packages/secureflow-cli/config/model-context-limits.json b/extension/secureflow/packages/secureflow-cli/config/model-context-limits.json
@@ -82,10 +82,17 @@
           "maxOutput": 8192,
           "description": "Claude 3.5 Haiku with 200K context window, 8K max output tokens"
         }
+      },
+      "xai": {
+        "grok-4-fast-reasoning": {
+          "contextWindow": 2000000,
+          "maxOutput": 65536,
+          "description": "Grok-4 fast reasoning model with 2M context window, 65K max output tokens"
+        }
       }
     },
     "metadata": {
-      "lastUpdated": "2025-09-07",
+      "lastUpdated": "2025-09-28",
       "source": "Verified against official API documentation and provider websites",
       "notes": [
         "Context windows are measured in tokens",
@@ -98,10 +105,13 @@
         "All specifications verified as of September 2025",
         "Claude Opus models have 64K max output, not 32K as previously listed",
         "O3-mini has significantly higher limits than O1-mini",
-        "Gemini models use 65,535 max output tokens, not 65,536"
+        "Gemini models use 65,535 max output tokens, not 65,536",
+        "Grok-4-fast-reasoning has 2M context window with 4M tokens/minute rate limit (verified from xAI docs)",
+        "Grok max output tokens estimated at 65K (not specified in official docs)"
       ],
       "recommendations": {
         "largeContext": [
+          "grok-4-fast-reasoning",
           "gpt-4.1-2025-04-14",
           "gemini-2.5-pro",
           "gemini-2.5-flash"
@@ -119,7 +129,8 @@
         "reasoning": [
           "o1",
           "o1-mini",
-          "o3-mini-2025-01-31"
+          "o3-mini-2025-01-31",
+          "grok-4-fast-reasoning"
         ],
         "highOutput": [
           "o3-mini-2025-01-31",
diff --git a/extension/secureflow/packages/secureflow-cli/lib/ai-client-factory.js b/extension/secureflow/packages/secureflow-cli/lib/ai-client-factory.js
@@ -3,6 +3,7 @@ const { ClaudeClient } = require('./claude-client');
 const { GeminiClient } = require('./gemini-client');
 const { OpenAIClient } = require('./openai-client');
 const { OllamaClient } = require('./ollama-client');
+const { GrokClient } = require('./grok-client');
 
 /**
  * Factory class for creating AI clients
@@ -38,6 +39,10 @@ class AIClientFactory {
       case 'claude-3-5-haiku-20241022':
         return new ClaudeClient();
 
+      // Grok (xAI) models
+      case 'grok-4-fast-reasoning':
+        return new GrokClient();
+
       // Ollama models
       case 'qwen3:4b':
         return new OllamaClient();
diff --git a/extension/secureflow/packages/secureflow-cli/lib/grok-client.d.ts b/extension/secureflow/packages/secureflow-cli/lib/grok-client.d.ts
@@ -0,0 +1,13 @@
+import { AIClient, AIClientOptions, AIResponse, AIResponseChunk } from './ai-client';
+import { HttpClient } from './http-client';
+
+export declare class GrokClient extends HttpClient implements AIClient {
+  constructor();
+  sendRequest(prompt?: string, options?: AIClientOptions, messages?: any): Promise<AIResponse>;
+  sendStreamingRequest(
+    prompt?: string,
+    callback: (chunk: AIResponseChunk) => void,
+    options?: AIClientOptions,
+    messages?: any
+  ): Promise<void>;
+}
diff --git a/extension/secureflow/packages/secureflow-cli/lib/grok-client.js b/extension/secureflow/packages/secureflow-cli/lib/grok-client.js
@@ -0,0 +1,109 @@
+const { HttpClient } = require('./http-client');
+
+class GrokClient extends HttpClient {
+  constructor() {
+    super();
+    this.API_URL = 'https://api.x.ai/v1/chat/completions';
+    this.defaultModel = 'grok-4-fast-reasoning';
+  }
+
+  /**
+   * Send a request to the xAI Grok API (OpenAI-compatible)
+   * @param {string} prompt The prompt to send
+   * @param {import('./ai-client').AIClientOptions} options Options
+   * @param {import('./ai-client').AIMessage[]} [messages] Optional messages array
+   * @returns {Promise<import('./ai-client').AIResponse>} The AI response
+   */
+  async sendRequest(prompt, options, messages) {
+    if (!options?.apiKey) {
+      throw new Error('xAI Grok API key is required');
+    }
+
+    const response = await this.post(
+      this.API_URL,
+      {
+        model: options.model || this.defaultModel,
+        messages: messages || [{ role: 'user', content: prompt }],
+        temperature: options.temperature ?? 0,
+        max_tokens: options.maxTokens ?? 2000,
+        stream: false
+      },
+      {
+        Authorization: `Bearer ${options.apiKey}`,
+        'Content-Type': 'application/json'
+      }
+    );
+
+    return {
+      content: response?.choices?.[0]?.message?.content ?? '',
+      model: response.model,
+      provider: 'grok',
+      usage: response.usage
+    };
+  }
+
+  /**
+   * Send a streaming request to the xAI Grok API (SSE, OpenAI-compatible)
+   * @param {string} prompt The prompt to send
+   * @param {function(import('./ai-client').AIResponseChunk): void} callback Callback for each chunk
+   * @param {import('./ai-client').AIClientOptions} options Options
+   * @param {import('./ai-client').AIMessage[]} [messages] Optional messages array
+   * @returns {Promise<void>}
+   */
+  async sendStreamingRequest(prompt, callback, options, messages) {
+    if (!options?.apiKey) {
+      throw new Error('xAI Grok API key is required');
+    }
+
+    let contentSoFar = '';
+
+    await this.streamingPost(
+      this.API_URL,
+      {
+        model: options.model || this.defaultModel,
+        messages: messages || [{ role: 'user', content: prompt }],
+        temperature: options.temperature ?? 0,
+        max_tokens: options.maxTokens ?? 2000,
+        stream: true
+      },
+      (chunk) => {
+        try {
+          const lines = chunk.split('\n').filter((line) => line.trim() !== '');
+          for (const line of lines) {
+            if (line.startsWith('data: ')) {
+              const data = line.slice(6);
+              if (data === '[DONE]') {
+                callback({ content: contentSoFar, isComplete: true });
+                return;
+              }
+              try {
+                const parsed = JSON.parse(data);
+                if (parsed.choices && parsed.choices[0]?.delta?.content) {
+                  const piece = parsed.choices[0].delta.content;
+                  contentSoFar += piece;
+                  callback({ content: contentSoFar, isComplete: false });
+                }
+              } catch (e) {
+                console.error('Error parsing SSE data:', e);
+              }
+            }
+          }
+        } catch (error) {
+          console.error('Error processing chunk:', error);
+        }
+      },
+      () => {
+        callback({ content: contentSoFar, isComplete: true });
+      },
+      {
+        Authorization: `Bearer ${options.apiKey}`,
+        'Content-Type': 'application/json',
+        Accept: 'text/event-stream'
+      }
+    );
+  }
+}
+
+module.exports = {
+  GrokClient
+};
diff --git a/extension/secureflow/packages/secureflow-cli/lib/token-tracker.js b/extension/secureflow/packages/secureflow-cli/lib/token-tracker.js
@@ -137,10 +137,16 @@ class TokenTracker {
       inputTokens = usage.prompt_tokens || 0;
       outputTokens = usage.completion_tokens || 0;
     }
+    // xAI Grok format (OpenAI-compatible but may have specific fields)
+    else if (usage.prompt_tokens !== undefined && usage.completion_tokens !== undefined) {
+      inputTokens = usage.prompt_tokens || 0;
+      outputTokens = usage.completion_tokens || 0;
+      reasoningTokens = usage.reasoning_tokens || 0; // Grok reasoning tokens
+    }
     // Fallback - try both naming conventions
     else {
-      inputTokens = usage.input_tokens || usage.promptTokenCount || 0;
-      outputTokens = usage.output_tokens || usage.candidatesTokenCount || 0;
+      inputTokens = usage.input_tokens || usage.promptTokenCount || usage.prompt_tokens || 0;
+      outputTokens = usage.output_tokens || usage.candidatesTokenCount || usage.completion_tokens || 0;
       reasoningTokens = usage.reasoning_tokens || usage.thoughtsTokenCount || 0;
     }
     
diff --git a/extension/secureflow/packages/secureflow-cli/lib/types.d.ts b/extension/secureflow/packages/secureflow-cli/lib/types.d.ts
@@ -17,4 +17,5 @@ export type AIModel =
   | 'claude-3-7-sonnet-20250219'
   | 'claude-3-5-sonnet-20241022'
   | 'claude-3-5-haiku-20241022'
+  | 'grok-4-fast-reasoning'
   | 'qwen3:4b';
diff --git a/extension/secureflow/packages/secureflow-cli/lib/types.js b/extension/secureflow/packages/secureflow-cli/lib/types.js
@@ -3,7 +3,7 @@
  */
 
 /**
- * @typedef {'gpt-4o' | 'gpt-4o-mini' | 'o1-mini' | 'o1' | 'gpt-4.1-2025-04-14' | 'o3-mini-2025-01-31' | 'gemini-2.5-pro' | 'gemini-2.5-flash' | 'claude-opus-4-1-20250805' | 'claude-opus-4-20250514' | 'claude-sonnet-4-20250514' | 'claude-3-7-sonnet-20250219' | 'claude-3-5-sonnet-20241022' | 'claude-3-5-haiku-20241022' | 'qwen3:4b'} AIModel
+ * @typedef {'gpt-4o' | 'gpt-4o-mini' | 'o1-mini' | 'o1' | 'gpt-4.1-2025-04-14' | 'o3-mini-2025-01-31' | 'gemini-2.5-pro' | 'gemini-2.5-flash' | 'claude-opus-4-1-20250805' | 'claude-opus-4-20250514' | 'claude-sonnet-4-20250514' | 'claude-3-7-sonnet-20250219' | 'claude-3-5-sonnet-20241022' | 'claude-3-5-haiku-20241022' | 'grok-4-fast-reasoning' | 'qwen3:4b'} AIModel
  */
 
 module.exports = {