@@ -11,6 +11,7 @@ import type { DecodeRatchetOptions, KeyProviderOptions, KeySet, RatchetResult }
1111import { deriveKeys , isVideoFrame , needsRbspUnescaping , parseRbsp , writeRbsp } from '../utils' ;
1212import type { ParticipantKeyHandler } from './ParticipantKeyHandler' ;
1313import { SifGuard } from './SifGuard' ;
14+ import { processNALUsForEncryption } from './naluUtils' ;
1415
1516export const encryptionEnabledMap : Map < string , boolean > = new Map ( ) ;
1617
@@ -304,7 +305,7 @@ export class FrameCryptor extends BaseFrameCryptor {
304305 newDataWithoutHeader . set ( new Uint8Array ( iv ) , cipherText . byteLength ) ; // append IV.
305306 newDataWithoutHeader . set ( frameTrailer , cipherText . byteLength + iv . byteLength ) ; // append frame trailer.
306307
307- if ( frameInfo . isH264 ) {
308+ if ( frameInfo . requiresNALUProcessing ) {
308309 newDataWithoutHeader = writeRbsp ( newDataWithoutHeader ) ;
309310 }
310311
@@ -441,7 +442,7 @@ export class FrameCryptor extends BaseFrameCryptor {
441442 frameHeader . length ,
442443 encodedFrame . data . byteLength - frameHeader . length ,
443444 ) ;
444- if ( frameInfo . isH264 && needsRbspUnescaping ( encryptedData ) ) {
445+ if ( frameInfo . requiresNALUProcessing && needsRbspUnescaping ( encryptedData ) ) {
445446 encryptedData = parseRbsp ( encryptedData ) ;
446447 const newUint8 = new Uint8Array ( frameHeader . byteLength + encryptedData . byteLength ) ;
447448 newUint8 . set ( frameHeader ) ;
@@ -584,66 +585,58 @@ export class FrameCryptor extends BaseFrameCryptor {
584585
585586 private getUnencryptedBytes ( frame : RTCEncodedVideoFrame | RTCEncodedAudioFrame ) : {
586587 unencryptedBytes : number ;
587- isH264 : boolean ;
588+ requiresNALUProcessing : boolean ;
588589 } {
589- var frameInfo = { unencryptedBytes : 0 , isH264 : false } ;
590- if ( isVideoFrame ( frame ) ) {
591- let detectedCodec = this . getVideoCodec ( frame ) ?? this . videoCodec ;
592- if ( detectedCodec !== this . detectedCodec ) {
593- workerLogger . debug ( 'detected different codec' , {
594- detectedCodec,
595- oldCodec : this . detectedCodec ,
596- ...this . logContext ,
597- } ) ;
598- this . detectedCodec = detectedCodec ;
599- }
600-
601- if ( detectedCodec === 'av1' ) {
602- throw new Error ( `${ detectedCodec } is not yet supported for end to end encryption` ) ;
603- }
590+ // Handle audio frames
591+ if ( ! isVideoFrame ( frame ) ) {
592+ return { unencryptedBytes : UNENCRYPTED_BYTES . audio , requiresNALUProcessing : false } ;
593+ }
604594
605- if ( detectedCodec === 'vp8' ) {
606- frameInfo . unencryptedBytes = UNENCRYPTED_BYTES [ frame . type ] ;
607- } else if ( detectedCodec === 'vp9' ) {
608- frameInfo . unencryptedBytes = 0 ;
609- return frameInfo ;
610- }
595+ // Detect and track codec changes
596+ const detectedCodec = this . getVideoCodec ( frame ) ?? this . videoCodec ;
597+ if ( detectedCodec !== this . detectedCodec ) {
598+ workerLogger . debug ( 'detected different codec' , {
599+ detectedCodec,
600+ oldCodec : this . detectedCodec ,
601+ ...this . logContext ,
602+ } ) ;
603+ this . detectedCodec = detectedCodec ;
604+ }
611605
612- const data = new Uint8Array ( frame . data ) ;
613- try {
614- const naluIndices = findNALUIndices ( data ) ;
606+ // Check for unsupported codecs
607+ if ( detectedCodec === 'av1' ) {
608+ throw new Error ( `${ detectedCodec } is not yet supported for end to end encryption` ) ;
609+ }
615610
616- // if the detected codec is undefined we test whether it _looks_ like a h264 frame as a best guess
617- frameInfo . isH264 =
618- detectedCodec === 'h264' ||
619- naluIndices . some ( ( naluIndex ) =>
620- [ NALUType . SLICE_IDR , NALUType . SLICE_NON_IDR ] . includes ( parseNALUType ( data [ naluIndex ] ) ) ,
621- ) ;
611+ // Handle VP8/VP9 codecs (no NALU processing needed)
612+ if ( detectedCodec === 'vp8' ) {
613+ return { unencryptedBytes : UNENCRYPTED_BYTES [ frame . type ] , requiresNALUProcessing : false } ;
614+ }
615+ if ( detectedCodec === 'vp9' ) {
616+ return { unencryptedBytes : 0 , requiresNALUProcessing : false } ;
617+ }
622618
623- if ( frameInfo . isH264 ) {
624- for ( const index of naluIndices ) {
625- let type = parseNALUType ( data [ index ] ) ;
626- switch ( type ) {
627- case NALUType . SLICE_IDR :
628- case NALUType . SLICE_NON_IDR :
629- frameInfo . unencryptedBytes = index + 2 ;
630- return frameInfo ;
631- default :
632- break ;
633- }
634- }
635- throw new TypeError ( 'Could not find NALU' ) ;
636- }
637- } catch ( e ) {
638- // no op, we just continue and fallback to vp8
619+ // Try NALU processing for H.264/H.265 codecs
620+ try {
621+ const knownCodec =
622+ detectedCodec === 'h264' || detectedCodec === 'h265' ? detectedCodec : undefined ;
623+ const naluResult = processNALUsForEncryption ( new Uint8Array ( frame . data ) , knownCodec ) ;
624+
625+ if ( naluResult . requiresNALUProcessing ) {
626+ return {
627+ unencryptedBytes : naluResult . unencryptedBytes ,
628+ requiresNALUProcessing : true ,
629+ } ;
639630 }
640-
641- frameInfo . unencryptedBytes = UNENCRYPTED_BYTES [ frame . type ] ;
642- return frameInfo ;
643- } else {
644- frameInfo . unencryptedBytes = UNENCRYPTED_BYTES . audio ;
645- return frameInfo ;
631+ } catch ( e ) {
632+ workerLogger . debug ( 'NALU processing failed, falling back to VP8 handling' , {
633+ error : e ,
634+ ...this . logContext ,
635+ } ) ;
646636 }
637+
638+ // Fallback to VP8 handling
639+ return { unencryptedBytes : UNENCRYPTED_BYTES [ frame . type ] , requiresNALUProcessing : false } ;
647640 }
648641
649642 /**
@@ -659,90 +652,6 @@ export class FrameCryptor extends BaseFrameCryptor {
659652 }
660653}
661654
662- /**
663- * Slice the NALUs present in the supplied buffer, assuming it is already byte-aligned
664- * code adapted from https://github.com/medooze/h264-frame-parser/blob/main/lib/NalUnits.ts to return indices only
665- */
666- export function findNALUIndices ( stream : Uint8Array ) : number [ ] {
667- const result : number [ ] = [ ] ;
668- let start = 0 ,
669- pos = 0 ,
670- searchLength = stream . length - 2 ;
671- while ( pos < searchLength ) {
672- // skip until end of current NALU
673- while (
674- pos < searchLength &&
675- ! ( stream [ pos ] === 0 && stream [ pos + 1 ] === 0 && stream [ pos + 2 ] === 1 )
676- )
677- pos ++ ;
678- if ( pos >= searchLength ) pos = stream . length ;
679- // remove trailing zeros from current NALU
680- let end = pos ;
681- while ( end > start && stream [ end - 1 ] === 0 ) end -- ;
682- // save current NALU
683- if ( start === 0 ) {
684- if ( end !== start ) throw TypeError ( 'byte stream contains leading data' ) ;
685- } else {
686- result . push ( start ) ;
687- }
688- // begin new NALU
689- start = pos = pos + 3 ;
690- }
691- return result ;
692- }
693-
694- export function parseNALUType ( startByte : number ) : NALUType {
695- return startByte & kNaluTypeMask ;
696- }
697-
698- const kNaluTypeMask = 0x1f ;
699-
700- export enum NALUType {
701- /** Coded slice of a non-IDR picture */
702- SLICE_NON_IDR = 1 ,
703- /** Coded slice data partition A */
704- SLICE_PARTITION_A = 2 ,
705- /** Coded slice data partition B */
706- SLICE_PARTITION_B = 3 ,
707- /** Coded slice data partition C */
708- SLICE_PARTITION_C = 4 ,
709- /** Coded slice of an IDR picture */
710- SLICE_IDR = 5 ,
711- /** Supplemental enhancement information */
712- SEI = 6 ,
713- /** Sequence parameter set */
714- SPS = 7 ,
715- /** Picture parameter set */
716- PPS = 8 ,
717- /** Access unit delimiter */
718- AUD = 9 ,
719- /** End of sequence */
720- END_SEQ = 10 ,
721- /** End of stream */
722- END_STREAM = 11 ,
723- /** Filler data */
724- FILLER_DATA = 12 ,
725- /** Sequence parameter set extension */
726- SPS_EXT = 13 ,
727- /** Prefix NAL unit */
728- PREFIX_NALU = 14 ,
729- /** Subset sequence parameter set */
730- SUBSET_SPS = 15 ,
731- /** Depth parameter set */
732- DPS = 16 ,
733-
734- // 17, 18 reserved
735-
736- /** Coded slice of an auxiliary coded picture without partitioning */
737- SLICE_AUX = 19 ,
738- /** Coded slice extension */
739- SLICE_EXT = 20 ,
740- /** Coded slice extension for a depth view component or a 3D-AVC texture view component */
741- SLICE_LAYER_EXT = 21 ,
742-
743- // 22, 23 reserved
744- }
745-
746655/**
747656 * we use a magic frame trailer to detect whether a frame is injected
748657 * by the livekit server and thus to be treated as unencrypted
0 commit comments