richardr1126
diff --git a/‎Dockerfile‎
Lines changed: 3 additions & 0 deletions b/‎Dockerfile‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 1 addition & 1 deletion b/‎README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/app/api/audio/convert/route.ts‎
Lines changed: 239 additions & 0 deletions b/‎src/app/api/audio/convert/route.ts‎
Lines changed: 239 additions & 0 deletions
diff --git a/‎src/app/api/tts/route.ts‎
Lines changed: 10 additions & 6 deletions b/‎src/app/api/tts/route.ts‎
Lines changed: 10 additions & 6 deletions
@@ -1,6 +1,9 @@
 # Use Node.js slim image
 FROM node:slim
 
+# Add ffmpeg
+RUN apt-get update && apt-get install -y ffmpeg
+
 # Create app directory
 WORKDIR /app
 
 
@@ -103,7 +103,7 @@ services:
 
 3. Configure the environment:
    ```bash
-   cp .env.template .env
+   cp template.env .env
    # Edit .env with your configuration settings
    ```
    > Note: The base URL for the TTS API should be accessible and relative to the Next.js server
 
@@ -0,0 +1,239 @@
+import { NextRequest, NextResponse } from 'next/server';
+import { spawn } from 'child_process';
+import { writeFile, mkdir, unlink, rmdir } from 'fs/promises';
+import { createReadStream } from 'fs';
+import { existsSync } from 'fs';
+import { join } from 'path';
+import { randomUUID } from 'crypto';
+
+interface Chapter {
+  title: string;
+  buffer: number[];
+}
+
+interface ConversionRequest {
+  chapters: Chapter[];
+}
+
+async function getAudioDuration(filePath: string): Promise<number> {
+  return new Promise((resolve, reject) => {
+    const ffprobe = spawn('ffprobe', [
+      '-i', filePath,
+      '-show_entries', 'format=duration',
+      '-v', 'quiet',
+      '-of', 'csv=p=0'
+    ]);
+
+    let output = '';
+    ffprobe.stdout.on('data', (data) => {
+      output += data.toString();
+    });
+
+    ffprobe.on('close', (code) => {
+      if (code === 0) {
+        const duration = parseFloat(output.trim());
+        resolve(duration);
+      } else {
+        reject(new Error(`ffprobe process exited with code ${code}`));
+      }
+    });
+
+    ffprobe.on('error', (err) => {
+      reject(err);
+    });
+  });
+}
+
+async function runFFmpeg(args: string[]): Promise<void> {
+  return new Promise<void>((resolve, reject) => {
+    const ffmpeg = spawn('ffmpeg', args);
+
+    ffmpeg.stderr.on('data', (data) => {
+      console.error(`ffmpeg stderr: ${data}`);
+    });
+
+    ffmpeg.on('close', (code) => {
+      if (code === 0) {
+        resolve();
+      } else {
+        reject(new Error(`FFmpeg process exited with code ${code}`));
+      }
+    });
+
+    ffmpeg.on('error', (err) => {
+      reject(err);
+    });
+  });
+}
+
+async function cleanup(files: string[], directories: string[]) {
+  await Promise.all([
+    ...files.map(f => unlink(f).catch(console.error)),
+    ...directories.map(d => rmdir(d).catch(console.error))
+  ]);
+}
+
+export async function POST(request: NextRequest) {
+  const tempFiles: string[] = [];
+  const tempDirs: string[] = [];
+
+  try {
+    // Parse the request body as a stream
+    const data: ConversionRequest = await request.json();
+    
+    // Create temp directory if it doesn't exist
+    const tempDir = join(process.cwd(), 'temp');
+    if (!existsSync(tempDir)) {
+      await mkdir(tempDir);
+    }
+
+    // Generate unique filenames
+    const id = randomUUID();
+    const outputPath = join(tempDir, `${id}.m4b`);
+    const metadataPath = join(tempDir, `${id}.txt`);
+    const intermediateDir = join(tempDir, `${id}-intermediate`);
+    
+    tempFiles.push(outputPath, metadataPath);
+    tempDirs.push(intermediateDir);
+
+    // Create intermediate directory
+    if (!existsSync(intermediateDir)) {
+      await mkdir(intermediateDir);
+    }
+
+    // Process chapters sequentially to avoid memory issues
+    const chapterFiles: { path: string; title: string; duration: number }[] = [];
+    let currentTime = 0;
+
+    for (let i = 0; i < data.chapters.length; i++) {
+      const chapter = data.chapters[i];
+      const inputPath = join(intermediateDir, `${i}-input.mp3`);
+      const outputPath = join(intermediateDir, `${i}.wav`);
+      
+      tempFiles.push(inputPath, outputPath);
+
+      // Write the chapter audio to a temp file using a Buffer chunk size of 64KB
+      const chunkSize = 64 * 1024; // 64KB chunks
+      const buffer = Buffer.from(new Uint8Array(chapter.buffer));
+      const chunks: Buffer[] = [];
+      
+      for (let offset = 0; offset < buffer.length; offset += chunkSize) {
+        chunks.push(buffer.slice(offset, offset + chunkSize));
+      }
+      
+      await writeFile(inputPath, Buffer.concat(chunks));
+      chunks.length = 0; // Clear chunks array
+
+      // Convert to WAV with consistent format
+      await runFFmpeg([
+        '-i', inputPath,
+        '-acodec', 'pcm_s16le',
+        '-ar', '44100',
+        '-ac', '2',
+        outputPath
+      ]);
+      
+      const duration = await getAudioDuration(outputPath);
+      
+      chapterFiles.push({
+        path: outputPath,
+        title: chapter.title,
+        duration
+      });
+
+      // Clean up input file early
+      await unlink(inputPath).catch(console.error);
+      const index = tempFiles.indexOf(inputPath);
+      if (index > -1) {
+        tempFiles.splice(index, 1);
+      }
+    }
+
+    // Create chapter metadata file
+    const metadata: string[] = [];
+    
+    chapterFiles.forEach((chapter) => {
+      const startMs = Math.floor(currentTime * 1000);
+      currentTime += chapter.duration;
+      const endMs = Math.floor(currentTime * 1000);
+
+      metadata.push(
+        `[CHAPTER]`,
+        `TIMEBASE=1/1000`,
+        `START=${startMs}`,
+        `END=${endMs}`,
+        `title=${chapter.title}`
+      );
+    });
+    
+    await writeFile(metadataPath, ';FFMETADATA1\n' + metadata.join('\n'));
+
+    // Create list file for concat
+    const listPath = join(tempDir, `${id}-list.txt`);
+    tempFiles.push(listPath);
+    
+    await writeFile(
+      listPath,
+      chapterFiles.map(f => `file '${f.path}'`).join('\n')
+    );
+
+    // Combine all files into a single M4B
+    await runFFmpeg([
+      '-f', 'concat',
+      '-safe', '0',
+      '-i', listPath,
+      '-i', metadataPath,
+      '-map_metadata', '1',
+      '-c:a', 'aac',
+      '-b:a', '192k',
+      '-movflags', '+faststart',
+      outputPath
+    ]);
+
+    // Create a readable stream from the output file
+    const fileStream = createReadStream(outputPath);
+
+    // Create a web-compatible ReadableStream from the Node.js stream
+    const webStream = new ReadableStream({
+      start(controller) {
+        fileStream.on('data', (chunk) => {
+          controller.enqueue(chunk);
+        });
+        
+        fileStream.on('end', () => {
+          controller.close();
+          // Clean up only after the stream has been fully sent
+          cleanup(tempFiles, tempDirs).catch(console.error);
+        });
+        
+        fileStream.on('error', (error) => {
+          console.error('Stream error:', error);
+          controller.error(error);
+          cleanup(tempFiles, tempDirs).catch(console.error);
+        });
+      },
+      cancel() {
+        fileStream.destroy();
+        cleanup(tempFiles, tempDirs).catch(console.error);
+      }
+    });
+
+    // Return the streaming response
+    return new NextResponse(webStream, {
+      headers: {
+        'Content-Type': 'audio/mp4',
+        'Transfer-Encoding': 'chunked'
+      },
+    });
+
+  } catch (error) {
+    // Clean up in case of error
+    await cleanup(tempFiles, tempDirs).catch(console.error);
+    
+    console.error('Error converting audio:', error);
+    return NextResponse.json(
+      { error: 'Failed to convert audio format' }, 
+      { status: 500 }
+    );
+  }
+}
@@ -20,7 +20,7 @@ export async function POST(req: NextRequest) {
     // Initialize OpenAI client with abort signal
     const openai = new OpenAI({
       apiKey: openApiKey,
-      baseURL: openApiBaseUrl || 'https://api.openai.com/v1',
+      baseURL: openApiBaseUrl,
     });
 
     // Request audio from OpenAI and pass along the abort signal
@@ -29,19 +29,23 @@ export async function POST(req: NextRequest) {
       voice: voice as "alloy",
       input: text,
       speed: speed,
-    }, { signal: req.signal }); // Pass the abort signal to OpenAI client
+      response_format: 'mp3',  // Always use mp3 since we convert to WAV later if needed
+    }, { signal: req.signal });
 
     // Get the audio data as array buffer
-    // This will also be aborted if the client cancels
-    const arrayBuffer = await response.arrayBuffer();
+    const stream = response.body;
 
     // Return audio data with appropriate headers
-    return new NextResponse(arrayBuffer);
+    return new NextResponse(stream, {
+      headers: {
+        'Content-Type': 'audio/mpeg'
+      }
+    });
   } catch (error) {
     // Check if this was an abort error
     if (error instanceof Error && error.name === 'AbortError') {
       console.log('TTS request aborted by client');
-      return new Response(null, { status: 499 }); // Use 499 status for client closed request
+      return new NextResponse(null, { status: 499 }); // Use 499 status for client closed request
     }
 
     console.error('Error generating TTS:', error);