Skip to content

Commit 2cf4b58

Browse files
committed
updated docs
1 parent 9e27d70 commit 2cf4b58

File tree

2 files changed

+1
-16
lines changed

2 files changed

+1
-16
lines changed

apps/docs/content/docs/en/tools/stt.mdx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ import { BlockInfoCard } from "@/components/ui/block-info-card"
1010
color="#181C1E"
1111
/>
1212

13-
1413
{/* MANUAL-CONTENT-START:intro */}
1514
Transcribe speech to text using state-of-the-art AI models from leading providers. The Sim Speech-to-Text (STT) tools allow you to convert audio and video files into accurate transcripts, supporting multiple languages, timestamps, and optional translation.
1615

@@ -23,6 +22,7 @@ Supported providers:
2322
Choose the provider and model best suited to your task—whether fast, production-grade transcription (Deepgram), highly accurate multi-language capability (Whisper), or advanced understanding and language coverage (ElevenLabs).
2423
{/* MANUAL-CONTENT-END */}
2524

25+
2626
## Usage Instructions
2727

2828
Transcribe audio and video files to text using leading AI providers. Supports multiple languages, timestamps, and speaker diarization.

apps/sim/app/api/proxy/stt/route.ts

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@ export async function POST(request: NextRequest) {
3232
logger.info(`[${requestId}] STT transcription request started`)
3333

3434
try {
35-
// Authenticate
3635
const authResult = await checkHybridAuth(request, { requireWorkflowId: false })
3736
if (!authResult.success) {
3837
return NextResponse.json({ error: 'Unauthorized' }, { status: 401 })
@@ -41,15 +40,13 @@ export async function POST(request: NextRequest) {
4140
const body: SttRequestBody = await request.json()
4241
const { provider, apiKey, model, language, timestamps, diarization, translateToEnglish } = body
4342

44-
// Validate required fields
4543
if (!provider || !apiKey) {
4644
return NextResponse.json(
4745
{ error: 'Missing required fields: provider and apiKey' },
4846
{ status: 400 }
4947
)
5048
}
5149

52-
// Get audio file - from upload, reference, or URL
5350
let audioBuffer: Buffer
5451
let audioFileName: string
5552
let audioMimeType: string
@@ -73,7 +70,6 @@ export async function POST(request: NextRequest) {
7370
} else if (body.audioUrl) {
7471
logger.info(`[${requestId}] Downloading from URL: ${body.audioUrl}`)
7572

76-
// Download from external URL
7773
const response = await fetch(body.audioUrl)
7874
if (!response.ok) {
7975
throw new Error(`Failed to download audio from URL: ${response.statusText}`)
@@ -90,7 +86,6 @@ export async function POST(request: NextRequest) {
9086
)
9187
}
9288

93-
// Extract audio from video if needed
9489
if (isVideoFile(audioMimeType)) {
9590
logger.info(`[${requestId}] Extracting audio from video file`)
9691
try {
@@ -115,7 +110,6 @@ export async function POST(request: NextRequest) {
115110

116111
logger.info(`[${requestId}] Transcribing with ${provider}, file: ${audioFileName}`)
117112

118-
// Route to appropriate provider
119113
let transcript: string
120114
let segments: TranscriptSegment[] | undefined
121115
let detectedLanguage: string | undefined
@@ -173,7 +167,6 @@ export async function POST(request: NextRequest) {
173167

174168
logger.info(`[${requestId}] Transcription completed successfully`)
175169

176-
// Return response
177170
return NextResponse.json({
178171
transcript,
179172
segments,
@@ -188,8 +181,6 @@ export async function POST(request: NextRequest) {
188181
}
189182
}
190183

191-
// Provider-specific transcription functions
192-
193184
async function transcribeWithWhisper(
194185
audioBuffer: Buffer,
195186
apiKey: string,
@@ -205,7 +196,6 @@ async function transcribeWithWhisper(
205196
}> {
206197
const formData = new FormData()
207198

208-
// Create a blob from the buffer
209199
const blob = new Blob([new Uint8Array(audioBuffer)], { type: 'audio/mpeg' })
210200
formData.append('file', blob, 'audio.mp3')
211201
formData.append('model', model || 'whisper-1')
@@ -239,7 +229,6 @@ async function transcribeWithWhisper(
239229

240230
const data = await response.json()
241231

242-
// Process response based on format
243232
if (timestamps === 'none') {
244233
return {
245234
transcript: data.text,
@@ -274,7 +263,6 @@ async function transcribeWithDeepgram(
274263
duration?: number
275264
confidence?: number
276265
}> {
277-
// Build query parameters
278266
const params = new URLSearchParams({
279267
model: model || 'nova-3',
280268
smart_format: 'true',
@@ -319,7 +307,6 @@ async function transcribeWithDeepgram(
319307
const detectedLanguage = data.results?.channels?.[0]?.detected_language
320308
const confidence = result.confidence
321309

322-
// Process segments if timestamps requested
323310
let segments: TranscriptSegment[] | undefined
324311
if (timestamps !== 'none' && result.words) {
325312
segments = result.words.map((word: any) => ({
@@ -352,7 +339,6 @@ async function transcribeWithElevenLabs(
352339
language?: string
353340
duration?: number
354341
}> {
355-
// ElevenLabs STT API endpoint
356342
const formData = new FormData()
357343
const blob = new Blob([new Uint8Array(audioBuffer)], { type: 'audio/mpeg' })
358344
formData.append('file', blob, 'audio.mp3')
@@ -381,7 +367,6 @@ async function transcribeWithElevenLabs(
381367

382368
const data = await response.json()
383369

384-
// Process response
385370
return {
386371
transcript: data.text || '',
387372
language: data.language,

0 commit comments

Comments
 (0)