@@ -105,6 +105,21 @@ export default class MP4Remuxer implements Remuxer {
105105 }
106106
107107 if ( this . ISGenerated ) {
108+ let audioTimeOffset = timeOffset ;
109+ let videoTimeOffset = timeOffset ;
110+ if ( enoughAudioSamples && enoughVideoSamples ) {
111+ // timeOffset is expected to be the offset of the first timestamp of this fragment (first DTS)
112+ // if first audio DTS is not aligned with first video DTS then we need to take that into account
113+ // when providing timeOffset to remuxAudio / remuxVideo. if we don't do that, there might be a permanent / small
114+ // drift between audio and video streams
115+ // Use pts at timeOffset 0 so that VOD streams begin at 0
116+ const tsDelta = timeOffset > 0 ? audioTrack . samples [ 0 ] . dts - videoTrack . samples [ 0 ] . dts
117+ : audioTrack . samples [ 0 ] . pts - videoTrack . samples [ 0 ] . pts ;
118+ const audiovideoTimestampDelta = tsDelta / videoTrack . inputTimeScale ;
119+ audioTimeOffset += Math . max ( 0 , audiovideoTimestampDelta ) ;
120+ videoTimeOffset += Math . max ( 0 , - audiovideoTimestampDelta ) ;
121+ }
122+
108123 // Purposefully remuxing audio before video, so that remuxVideo can use nextAudioPts, which is calculated in remuxAudio.
109124 if ( enoughAudioSamples ) {
110125 // if initSegment was generated without audio samples, regenerate it again
@@ -113,18 +128,21 @@ export default class MP4Remuxer implements Remuxer {
113128 initSegment = this . generateIS ( audioTrack , videoTrack , timeOffset ) ;
114129 delete initSegment . video ;
115130 }
116- audio = this . remuxAudio ( audioTrack , timeOffset , this . isAudioContiguous , accurateTimeOffset ) ;
131+ audio = this . remuxAudio ( audioTrack , audioTimeOffset , this . isAudioContiguous , accurateTimeOffset ) ;
117132 if ( enoughVideoSamples ) {
118133 const audioTrackLength = audio ? audio . endPTS - audio . startPTS : 0 ;
119134 // if initSegment was generated without video samples, regenerate it again
120135 if ( ! videoTrack . inputTimeScale ) {
121136 logger . warn ( '[mp4-remuxer]: regenerate InitSegment as video detected' ) ;
122137 initSegment = this . generateIS ( audioTrack , videoTrack , timeOffset ) ;
123138 }
124- video = this . remuxVideo ( videoTrack , timeOffset , isVideoContiguous , audioTrackLength , accurateTimeOffset ) ;
139+ video = this . remuxVideo ( videoTrack , videoTimeOffset , isVideoContiguous , audioTrackLength , accurateTimeOffset ) ;
125140 }
126141 } else if ( enoughVideoSamples ) {
127- video = this . remuxVideo ( videoTrack , timeOffset , isVideoContiguous , 0 , accurateTimeOffset ) ;
142+ video = this . remuxVideo ( videoTrack , videoTimeOffset , isVideoContiguous , 0 , accurateTimeOffset ) ;
143+ if ( video && audioTrack . codec ) {
144+ this . remuxEmptyAudio ( audioTrack , audioTimeOffset , this . isAudioContiguous , video ) ;
145+ }
128146 }
129147 }
130148 }
@@ -236,8 +254,8 @@ export default class MP4Remuxer implements Remuxer {
236254 const initPTS : number = this . _initPTS ;
237255 let nextAvcDts = this . nextAvcDts ;
238256 let offset = 8 ;
239- let minPTS : number = Number . MAX_SAFE_INTEGER ;
240- let maxPTS : number = - Number . MAX_SAFE_INTEGER ;
257+ let minPTS : number = Number . POSITIVE_INFINITY ;
258+ let maxPTS : number = Number . NEGATIVE_INFINITY ;
241259 let mp4SampleDuration ! : number ;
242260
243261 // Safari does not like overlapping DTS on consecutive fragments. let's use nextAvcDts to overcome this if fragments are consecutive
@@ -273,10 +291,16 @@ export default class MP4Remuxer implements Remuxer {
273291 let firstDTS = inputSamples [ 0 ] . dts ;
274292 const lastDTS = inputSamples [ inputSamples . length - 1 ] . dts ;
275293
276- // Check timestamp continuity across consecutive fragments, and modify timing in order to remove gaps or overlaps.
294+ // on Safari let's signal the same sample duration for all samples
295+ // sample duration (as expected by trun MP4 boxes), should be the delta between sample DTS
296+ // set this constant duration as being the avg delta between consecutive DTS.
297+ const averageSampleDuration = Math . round ( ( lastDTS - firstDTS ) / ( nbSamples - 1 ) ) ;
298+
299+ // if fragment are contiguous, detect hole/overlapping between fragments
277300 if ( contiguous ) {
301+ // Check timestamp continuity across consecutive fragments, and modify timing in order to remove gaps or overlaps.
278302 const delta = firstDTS - nextAvcDts ;
279- const foundHole = delta > 2 ;
303+ const foundHole = delta > averageSampleDuration ;
280304 const foundOverlap = delta < - 1 ;
281305 if ( foundHole || foundOverlap ) {
282306 const millisecondDelta = Math . round ( delta / 90 ) ;
@@ -289,17 +313,10 @@ export default class MP4Remuxer implements Remuxer {
289313 minPTS -= delta ;
290314 inputSamples [ 0 ] . dts = firstDTS ;
291315 inputSamples [ 0 ] . pts = minPTS ;
292- logger . log ( `Video: PTS/DTS adjusted: ${ Math . round ( minPTS / 90 ) } /${ Math . round ( firstDTS / 90 ) } , delta: ${ millisecondDelta } ms` ) ;
316+ logger . log ( `Video: First PTS/DTS adjusted: ${ Math . round ( minPTS / 90 ) } /${ Math . round ( firstDTS / 90 ) } , delta: ${ millisecondDelta } ms` ) ;
293317 }
294318 }
295319
296- // on Safari let's signal the same sample duration for all samples
297- // sample duration (as expected by trun MP4 boxes), should be the delta between sample DTS
298- // set this constant duration as being the avg delta between consecutive DTS.
299- if ( isSafari ) {
300- mp4SampleDuration = Math . round ( ( lastDTS - firstDTS ) / ( inputSamples . length - 1 ) ) ;
301- }
302-
303320 // handle broken streams with PTS < DTS, tolerance up 200ms (18000 in 90kHz timescale)
304321 const PTSDTSshift = inputSamples . reduce ( ( prev , curr ) => Math . max ( Math . min ( prev , curr . pts - curr . dts ) , - 18000 ) , 0 ) ;
305322 if ( PTSDTSshift < 0 ) {
@@ -332,7 +349,7 @@ export default class MP4Remuxer implements Remuxer {
332349 // normalize PTS/DTS
333350 if ( isSafari ) {
334351 // sample DTS is computed using a constant decoding offset (mp4SampleDuration) between samples
335- sample . dts = firstDTS + i * mp4SampleDuration ;
352+ sample . dts = firstDTS + i * averageSampleDuration ;
336353 } else {
337354 // ensure sample monotonic DTS
338355 sample . dts = Math . max ( sample . dts , firstDTS ) ;
0 commit comments