@@ -246,6 +246,22 @@ const mergeContinuation = (text: string, nextText: string): TTSSmartMergeResult
246246 } ;
247247} ;
248248
249+ const buildCacheKey = (
250+ sentence : string ,
251+ voice : string ,
252+ speed : number ,
253+ provider : string ,
254+ model : string ,
255+ ) => {
256+ return [
257+ `provider=${ provider || '' } ` ,
258+ `model=${ model || '' } ` ,
259+ `voice=${ voice || '' } ` ,
260+ `speed=${ Number . isFinite ( speed ) ? speed : '' } ` ,
261+ `text=${ sentence } ` ,
262+ ] . join ( '|' ) ;
263+ } ;
264+
249265// Create the context
250266const TTSContext = createContext < TTSContextType | undefined > ( undefined ) ;
251267
@@ -745,7 +761,14 @@ export function TTSProvider({ children }: { children: ReactNode }): ReactElement
745761 // served from the local cache.
746762 const ensureAlignment = ( arrayBuffer : TTSAudioBuffer ) => {
747763 if ( ! alignmentEnabledForCurrentDoc ) return ;
748- if ( sentenceAlignmentCacheRef . current . has ( sentence ) ) return ;
764+ const alignmentKey = buildCacheKey (
765+ sentence ,
766+ voice ,
767+ speed ,
768+ configTTSProvider ,
769+ ttsModel ,
770+ ) ;
771+ if ( sentenceAlignmentCacheRef . current . has ( alignmentKey ) ) return ;
749772
750773 try {
751774 const audioBytes = Array . from ( new Uint8Array ( arrayBuffer ) ) ;
@@ -760,7 +783,7 @@ export function TTSProvider({ children }: { children: ReactNode }): ReactElement
760783 return ;
761784 }
762785 const alignment = data . alignments [ 0 ] as TTSSentenceAlignment ;
763- sentenceAlignmentCacheRef . current . set ( sentence , alignment ) ;
786+ sentenceAlignmentCacheRef . current . set ( alignmentKey , alignment ) ;
764787
765788 const currentSentence = sentencesRef . current [ currentIndexRef . current ] ;
766789 if ( currentSentence === sentence ) {
@@ -776,8 +799,16 @@ export function TTSProvider({ children }: { children: ReactNode }): ReactElement
776799 }
777800 } ;
778801
802+ const audioCacheKey = buildCacheKey (
803+ sentence ,
804+ voice ,
805+ speed ,
806+ configTTSProvider ,
807+ ttsModel ,
808+ ) ;
809+
779810 // Check if the audio is already cached
780- const cachedAudio = audioCache . get ( sentence ) ;
811+ const cachedAudio = audioCache . get ( audioCacheKey ) ;
781812 if ( cachedAudio ) {
782813 console . log ( 'Using cached audio for sentence:' , sentence . substring ( 0 , 20 ) ) ;
783814 // If we have audio but no alignment (e.g. after a
@@ -829,7 +860,7 @@ export function TTSProvider({ children }: { children: ReactNode }): ReactElement
829860 activeAbortControllers . current . delete ( controller ) ;
830861
831862 // Cache the array buffer
832- audioCache . set ( sentence , arrayBuffer ) ;
863+ audioCache . set ( audioCacheKey , arrayBuffer ) ;
833864
834865 // Fire-and-forget alignment request; do not block audio playback
835866 ensureAlignment ( arrayBuffer ) ;
@@ -1084,7 +1115,14 @@ export function TTSProvider({ children }: { children: ReactNode }): ReactElement
10841115
10851116 const playAudio = useCallback ( async ( ) => {
10861117 const sentence = sentences [ currentIndex ] ;
1087- const cachedAlignment = sentenceAlignmentCacheRef . current . get ( sentence ) ;
1118+ const alignmentKey = buildCacheKey (
1119+ sentence ,
1120+ voice ,
1121+ speed ,
1122+ configTTSProvider ,
1123+ ttsModel ,
1124+ ) ;
1125+ const cachedAlignment = sentenceAlignmentCacheRef . current . get ( alignmentKey ) ;
10881126 if ( cachedAlignment ) {
10891127 setCurrentSentenceAlignment ( cachedAlignment ) ;
10901128 setCurrentWordIndex ( null ) ;
@@ -1097,7 +1135,7 @@ export function TTSProvider({ children }: { children: ReactNode }): ReactElement
10971135 if ( howl ) {
10981136 howl . play ( ) ;
10991137 }
1100- } , [ sentences , currentIndex , playSentenceWithHowl ] ) ;
1138+ } , [ sentences , currentIndex , playSentenceWithHowl , voice , speed , configTTSProvider , ttsModel ] ) ;
11011139
11021140 // Place useBackgroundState after playAudio is defined
11031141 const isBackgrounded = useBackgroundState ( {
@@ -1153,16 +1191,26 @@ export function TTSProvider({ children }: { children: ReactNode }): ReactElement
11531191 const preloadNextAudio = useCallback ( async ( ) => {
11541192 try {
11551193 const nextSentence = sentences [ currentIndex + 1 ] ;
1156- if ( nextSentence && ! audioCache . has ( nextSentence ) && ! preloadRequests . current . has ( nextSentence ) ) {
1194+ if ( nextSentence ) {
1195+ const nextKey = buildCacheKey (
1196+ nextSentence ,
1197+ voice ,
1198+ speed ,
1199+ configTTSProvider ,
1200+ ttsModel ,
1201+ ) ;
1202+
1203+ if ( ! audioCache . has ( nextKey ) && ! preloadRequests . current . has ( nextSentence ) ) {
11571204 // Start preloading but don't wait for it to complete
1158- processSentence ( nextSentence , true ) . catch ( error => {
1159- console . error ( 'Error preloading next sentence:' , error ) ;
1160- } ) ;
1205+ processSentence ( nextSentence , true ) . catch ( error => {
1206+ console . error ( 'Error preloading next sentence:' , error ) ;
1207+ } ) ;
1208+ }
11611209 }
11621210 } catch ( error ) {
11631211 console . error ( 'Error initiating preload:' , error ) ;
11641212 }
1165- } , [ currentIndex , sentences , audioCache , processSentence ] ) ;
1213+ } , [ currentIndex , sentences , audioCache , processSentence , voice , speed , configTTSProvider , ttsModel ] ) ;
11661214
11671215 /**
11681216 * Main Playback Driver
@@ -1251,9 +1299,8 @@ export function TTSProvider({ children }: { children: ReactNode }): ReactElement
12511299 abortAudio ( true ) ; // Clear pending requests since speed changed
12521300 setActiveHowl ( null ) ;
12531301
1254- // Update speed, clear cache, and config
1302+ // Update speed and config
12551303 setSpeed ( newSpeed ) ;
1256- audioCache . clear ( ) ;
12571304
12581305 // Update config after state changes
12591306 updateConfigKey ( 'voiceSpeed' , newSpeed ) . then ( ( ) => {
@@ -1263,7 +1310,7 @@ export function TTSProvider({ children }: { children: ReactNode }): ReactElement
12631310 setIsPlaying ( true ) ;
12641311 }
12651312 } ) ;
1266- } , [ abortAudio , updateConfigKey , audioCache , isPlaying ] ) ;
1313+ } , [ abortAudio , updateConfigKey , isPlaying ] ) ;
12671314
12681315 /**
12691316 * Sets the voice and restarts the playback
@@ -1284,9 +1331,8 @@ export function TTSProvider({ children }: { children: ReactNode }): ReactElement
12841331 abortAudio ( true ) ; // Clear pending requests since voice changed
12851332 setActiveHowl ( null ) ;
12861333
1287- // Update voice, clear cache, and config
1334+ // Update voice and config
12881335 setVoice ( newVoice ) ;
1289- audioCache . clear ( ) ;
12901336
12911337 // Update config after state changes
12921338 updateConfigKey ( 'voice' , newVoice ) . then ( ( ) => {
@@ -1296,7 +1342,7 @@ export function TTSProvider({ children }: { children: ReactNode }): ReactElement
12961342 setIsPlaying ( true ) ;
12971343 }
12981344 } ) ;
1299- } , [ abortAudio , updateConfigKey , audioCache , isPlaying ] ) ;
1345+ } , [ abortAudio , updateConfigKey , isPlaying ] ) ;
13001346
13011347 /**
13021348 * Sets the audio player speed and restarts the playback
0 commit comments