updated docs

waleedlatif1 · waleedlatif1 · commit 2cf4b585a96f · 2025-11-19T20:29:09.000-08:00
diff --git a/apps/docs/content/docs/en/tools/stt.mdx b/apps/docs/content/docs/en/tools/stt.mdx
@@ -10,7 +10,6 @@ import { BlockInfoCard } from "@/components/ui/block-info-card"
   color="#181C1E"
 />
 
-
 {/* MANUAL-CONTENT-START:intro */}
 Transcribe speech to text using state-of-the-art AI models from leading providers. The Sim Speech-to-Text (STT) tools allow you to convert audio and video files into accurate transcripts, supporting multiple languages, timestamps, and optional translation.
 
@@ -23,6 +22,7 @@ Supported providers:
 Choose the provider and model best suited to your task—whether fast, production-grade transcription (Deepgram), highly accurate multi-language capability (Whisper), or advanced understanding and language coverage (ElevenLabs).
 {/* MANUAL-CONTENT-END */}
 
+
 ## Usage Instructions
 
 Transcribe audio and video files to text using leading AI providers. Supports multiple languages, timestamps, and speaker diarization.
diff --git a/apps/sim/app/api/proxy/stt/route.ts b/apps/sim/app/api/proxy/stt/route.ts
@@ -32,7 +32,6 @@ export async function POST(request: NextRequest) {
   logger.info(`[${requestId}] STT transcription request started`)
 
   try {
-    // Authenticate
     const authResult = await checkHybridAuth(request, { requireWorkflowId: false })
     if (!authResult.success) {
       return NextResponse.json({ error: 'Unauthorized' }, { status: 401 })
@@ -41,15 +40,13 @@ export async function POST(request: NextRequest) {
     const body: SttRequestBody = await request.json()
     const { provider, apiKey, model, language, timestamps, diarization, translateToEnglish } = body
 
-    // Validate required fields
     if (!provider || !apiKey) {
       return NextResponse.json(
         { error: 'Missing required fields: provider and apiKey' },
         { status: 400 }
       )
     }
 
-    // Get audio file - from upload, reference, or URL
     let audioBuffer: Buffer
     let audioFileName: string
     let audioMimeType: string
@@ -73,7 +70,6 @@ export async function POST(request: NextRequest) {
     } else if (body.audioUrl) {
       logger.info(`[${requestId}] Downloading from URL: ${body.audioUrl}`)
 
-      // Download from external URL
       const response = await fetch(body.audioUrl)
       if (!response.ok) {
         throw new Error(`Failed to download audio from URL: ${response.statusText}`)
@@ -90,7 +86,6 @@ export async function POST(request: NextRequest) {
       )
     }
 
-    // Extract audio from video if needed
     if (isVideoFile(audioMimeType)) {
       logger.info(`[${requestId}] Extracting audio from video file`)
       try {
@@ -115,7 +110,6 @@ export async function POST(request: NextRequest) {
 
     logger.info(`[${requestId}] Transcribing with ${provider}, file: ${audioFileName}`)
 
-    // Route to appropriate provider
     let transcript: string
     let segments: TranscriptSegment[] | undefined
     let detectedLanguage: string | undefined
@@ -173,7 +167,6 @@ export async function POST(request: NextRequest) {
 
     logger.info(`[${requestId}] Transcription completed successfully`)
 
-    // Return response
     return NextResponse.json({
       transcript,
       segments,
@@ -188,8 +181,6 @@ export async function POST(request: NextRequest) {
   }
 }
 
-// Provider-specific transcription functions
-
 async function transcribeWithWhisper(
   audioBuffer: Buffer,
   apiKey: string,
@@ -205,7 +196,6 @@ async function transcribeWithWhisper(
 }> {
   const formData = new FormData()
 
-  // Create a blob from the buffer
   const blob = new Blob([new Uint8Array(audioBuffer)], { type: 'audio/mpeg' })
   formData.append('file', blob, 'audio.mp3')
   formData.append('model', model || 'whisper-1')
@@ -239,7 +229,6 @@ async function transcribeWithWhisper(
 
   const data = await response.json()
 
-  // Process response based on format
   if (timestamps === 'none') {
     return {
       transcript: data.text,
@@ -274,7 +263,6 @@ async function transcribeWithDeepgram(
   duration?: number
   confidence?: number
 }> {
-  // Build query parameters
   const params = new URLSearchParams({
     model: model || 'nova-3',
     smart_format: 'true',
@@ -319,7 +307,6 @@ async function transcribeWithDeepgram(
   const detectedLanguage = data.results?.channels?.[0]?.detected_language
   const confidence = result.confidence
 
-  // Process segments if timestamps requested
   let segments: TranscriptSegment[] | undefined
   if (timestamps !== 'none' && result.words) {
     segments = result.words.map((word: any) => ({
@@ -352,7 +339,6 @@ async function transcribeWithElevenLabs(
   language?: string
   duration?: number
 }> {
-  // ElevenLabs STT API endpoint
   const formData = new FormData()
   const blob = new Blob([new Uint8Array(audioBuffer)], { type: 'audio/mpeg' })
   formData.append('file', blob, 'audio.mp3')
@@ -381,7 +367,6 @@ async function transcribeWithElevenLabs(
 
   const data = await response.json()
 
-  // Process response
   return {
     transcript: data.text || '',
     language: data.language,