@@ -23,6 +23,87 @@ const ttsAudioCache = new LRUCache<string, AudioBufferValue>({
2323 ttl : TTS_CACHE_TTL_MS ,
2424} ) ;
2525
26+ // Concurrency controls and in-flight de-duplication
27+ const TTS_MAX_CONCURRENCY = Number ( process . env . TTS_MAX_CONCURRENCY || 4 ) ;
28+
29+ class Semaphore {
30+ private permits : number ;
31+ private queue : Array < ( ) => void > = [ ] ;
32+ constructor ( max : number ) {
33+ this . permits = Math . max ( 1 , max ) ;
34+ }
35+ async acquire ( ) : Promise < ( ) => void > {
36+ if ( this . permits > 0 ) {
37+ this . permits -= 1 ;
38+ return this . release . bind ( this ) ;
39+ }
40+ return new Promise < ( ) => void > ( ( resolve ) => {
41+ this . queue . push ( ( ) => {
42+ this . permits -= 1 ;
43+ resolve ( this . release . bind ( this ) ) ;
44+ } ) ;
45+ } ) ;
46+ }
47+ private release ( ) {
48+ this . permits += 1 ;
49+ const next = this . queue . shift ( ) ;
50+ if ( next ) next ( ) ;
51+ }
52+ }
53+
54+ const ttsSemaphore = new Semaphore ( TTS_MAX_CONCURRENCY ) ;
55+
56+ type InflightEntry = {
57+ promise : Promise < ArrayBuffer > ;
58+ controller : AbortController ;
59+ consumers : number ;
60+ } ;
61+
62+ const inflightRequests = new Map < string , InflightEntry > ( ) ;
63+
64+ function sleep ( ms : number ) {
65+ return new Promise ( ( res ) => setTimeout ( res , ms ) ) ;
66+ }
67+
68+ async function fetchTTSBufferWithRetry (
69+ openai : OpenAI ,
70+ createParams : ExtendedSpeechParams ,
71+ signal : AbortSignal
72+ ) : Promise < ArrayBuffer > {
73+ let attempt = 0 ;
74+ const maxRetries = Number ( process . env . TTS_MAX_RETRIES ?? 2 ) ;
75+ let delay = Number ( process . env . TTS_RETRY_INITIAL_MS ?? 250 ) ;
76+ const maxDelay = Number ( process . env . TTS_RETRY_MAX_MS ?? 2000 ) ;
77+ const backoff = Number ( process . env . TTS_RETRY_BACKOFF ?? 2 ) ;
78+
79+ // Retry on 429 and 5xx only; never retry aborts
80+ for ( ; ; ) {
81+ try {
82+ const response = await openai . audio . speech . create ( createParams as SpeechCreateParams , { signal } ) ;
83+ return await response . arrayBuffer ( ) ;
84+ } catch ( err : unknown ) {
85+ if ( signal ?. aborted || ( err instanceof Error && err . name === 'AbortError' ) ) {
86+ throw err ;
87+ }
88+ const status = ( ( ) => {
89+ if ( typeof err === 'object' && err !== null ) {
90+ const rec = err as Record < string , unknown > ;
91+ if ( typeof rec . status === 'number' ) return rec . status as number ;
92+ if ( typeof rec . statusCode === 'number' ) return rec . statusCode as number ;
93+ }
94+ return 0 ;
95+ } ) ( ) ;
96+ const retryable = status === 429 || status >= 500 ;
97+ if ( ! retryable || attempt >= maxRetries ) {
98+ throw err ;
99+ }
100+ await sleep ( Math . min ( delay , maxDelay ) ) ;
101+ delay = Math . min ( maxDelay , delay * backoff ) ;
102+ attempt += 1 ;
103+ }
104+ }
105+ }
106+
26107function makeCacheKey ( input : {
27108 provider : string ;
28109 model : string | null | undefined ;
@@ -102,30 +183,108 @@ export async function POST(req: NextRequest) {
102183 instructions : createParams . instructions ,
103184 } ) ;
104185
186+ const etag = `W/"${ cacheKey } "` ;
187+ const ifNoneMatch = req . headers . get ( 'if-none-match' ) ;
188+
105189 const cachedBuffer = ttsAudioCache . get ( cacheKey ) ;
106190 if ( cachedBuffer ) {
191+ if ( ifNoneMatch && ( ifNoneMatch . includes ( cacheKey ) || ifNoneMatch . includes ( etag ) ) ) {
192+ return new NextResponse ( null , {
193+ status : 304 ,
194+ headers : {
195+ 'ETag' : etag ,
196+ 'Cache-Control' : 'private, max-age=1800' ,
197+ 'Vary' : 'x-tts-provider, x-openai-key, x-openai-base-url'
198+ }
199+ } ) ;
200+ }
107201 console . log ( 'TTS cache HIT for key:' , cacheKey . slice ( 0 , 8 ) ) ;
108202 return new NextResponse ( cachedBuffer , {
109203 headers : {
110204 'Content-Type' : contentType ,
111205 'X-Cache' : 'HIT' ,
206+ 'ETag' : etag ,
207+ 'Content-Length' : String ( cachedBuffer . byteLength ) ,
208+ 'Cache-Control' : 'private, max-age=1800' ,
209+ 'Vary' : 'x-tts-provider, x-openai-key, x-openai-base-url'
112210 }
113211 } ) ;
114212 }
115213
116- const response = await openai . audio . speech . create ( createParams as SpeechCreateParams , { signal : req . signal } ) ;
214+ // De-duplicate identical in-flight requests and bound upstream concurrency
215+ const existing = inflightRequests . get ( cacheKey ) ;
216+ if ( existing ) {
217+ console . log ( 'TTS in-flight JOIN for key:' , cacheKey . slice ( 0 , 8 ) ) ;
218+ existing . consumers += 1 ;
219+
220+ const onAbort = ( _evt : Event ) => {
221+ existing . consumers = Math . max ( 0 , existing . consumers - 1 ) ;
222+ if ( existing . consumers === 0 ) {
223+ existing . controller . abort ( ) ;
224+ }
225+ } ;
226+ req . signal . addEventListener ( 'abort' , onAbort , { once : true } ) ;
227+
228+ try {
229+ const buffer = await existing . promise ;
230+ return new NextResponse ( buffer , {
231+ headers : {
232+ 'Content-Type' : contentType ,
233+ 'X-Cache' : 'INFLIGHT' ,
234+ 'ETag' : etag ,
235+ 'Content-Length' : String ( buffer . byteLength ) ,
236+ 'Cache-Control' : 'private, max-age=1800' ,
237+ 'Vary' : 'x-tts-provider, x-openai-key, x-openai-base-url'
238+ }
239+ } ) ;
240+ } finally {
241+ try { req . signal . removeEventListener ( 'abort' , onAbort ) ; } catch { }
242+ }
243+ }
244+
245+ const controller = new AbortController ( ) ;
246+ const entry : InflightEntry = {
247+ controller,
248+ consumers : 1 ,
249+ promise : ( async ( ) => {
250+ const release = await ttsSemaphore . acquire ( ) ;
251+ try {
252+ const buffer = await fetchTTSBufferWithRetry ( openai , createParams , controller . signal ) ;
253+ // Save to cache
254+ ttsAudioCache . set ( cacheKey , buffer ) ;
255+ return buffer ;
256+ } finally {
257+ release ( ) ;
258+ inflightRequests . delete ( cacheKey ) ;
259+ }
260+ } ) ( )
261+ } ;
117262
118- // Read the audio data as an ArrayBuffer and return it with appropriate headers
119- // This will also be aborted if the client cancels
120- const buffer = await response . arrayBuffer ( ) ;
263+ inflightRequests . set ( cacheKey , entry ) ;
121264
122- // Save to cache
123- ttsAudioCache . set ( cacheKey , buffer ) ;
265+ const onAbort = ( _evt : Event ) => {
266+ entry . consumers = Math . max ( 0 , entry . consumers - 1 ) ;
267+ if ( entry . consumers === 0 ) {
268+ entry . controller . abort ( ) ;
269+ }
270+ } ;
271+ req . signal . addEventListener ( 'abort' , onAbort , { once : true } ) ;
272+
273+ let buffer : ArrayBuffer ;
274+ try {
275+ buffer = await entry . promise ;
276+ } finally {
277+ try { req . signal . removeEventListener ( 'abort' , onAbort ) ; } catch { }
278+ }
124279
125280 return new NextResponse ( buffer , {
126281 headers : {
127282 'Content-Type' : contentType ,
128- 'X-Cache' : 'MISS'
283+ 'X-Cache' : 'MISS' ,
284+ 'ETag' : etag ,
285+ 'Content-Length' : String ( buffer . byteLength ) ,
286+ 'Cache-Control' : 'private, max-age=1800' ,
287+ 'Vary' : 'x-tts-provider, x-openai-key, x-openai-base-url'
129288 }
130289 } ) ;
131290 } catch ( error ) {
0 commit comments