@@ -60,6 +60,22 @@ export function detectVideoType(url: string): LearningSuiteVideoInfo["type"] {
6060 * Extracts video information from a lesson page.
6161 */
6262export async function extractVideoFromPage ( page : Page ) : Promise < LearningSuiteVideoInfo | null > {
63+ // Helper to check if URL is a valid CDN URL (not an API proxy)
64+ const isValidCdnUrl = ( url : string ) : boolean => {
65+ // Skip API proxy endpoints
66+ if ( url . includes ( "api.learningsuite.io" ) ) {
67+ return false ;
68+ }
69+ // Only accept actual CDN URLs
70+ return (
71+ url . includes ( "b-cdn.net" ) ||
72+ url . includes ( "mediadelivery.net" ) ||
73+ url . includes ( "vz-" ) ||
74+ // Also accept URLs without API proxying
75+ ( ! url . includes ( "learningsuite.io" ) && url . includes ( ".m3u8" ) )
76+ ) ;
77+ } ;
78+
6379 // Check for HLS video
6480 const hlsUrl = await page . evaluate ( ( ) => {
6581 // Look for video elements with HLS source
@@ -80,8 +96,19 @@ export async function extractVideoFromPage(page: Page): Promise<LearningSuiteVid
8096 if ( src ) return src ;
8197 }
8298
83- // Look for HLS URLs in script tags
99+ // Look for HLS URLs in script tags - prefer CDN URLs
84100 const scripts = Array . from ( document . querySelectorAll ( "script" ) ) ;
101+ for ( const script of scripts ) {
102+ const content = script . textContent ?? "" ;
103+ // Look for Bunny CDN URLs first
104+ const cdnMatch =
105+ / ( h t t p s ? : \/ \/ [ ^ " ' \s ] * (?: b - c d n \. n e t | m e d i a d e l i v e r y \. n e t | v z - ) [ ^ " ' \s ] * \. m 3 u 8 [ ^ " ' \s ] * ) / i. exec (
106+ content
107+ ) ;
108+ if ( cdnMatch ?. [ 1 ] ) return cdnMatch [ 1 ] ;
109+ }
110+
111+ // Fallback to any m3u8 URL in scripts (will be filtered later)
85112 for ( const script of scripts ) {
86113 const content = script . textContent ?? "" ;
87114 const hlsMatch = / " ( h t t p s ? : \/ \/ [ ^ " ] + \. m 3 u 8 [ ^ " ] * ) " / i. exec ( content ) ;
@@ -91,7 +118,8 @@ export async function extractVideoFromPage(page: Page): Promise<LearningSuiteVid
91118 return null ;
92119 } ) ;
93120
94- if ( hlsUrl ) {
121+ // Filter out API proxy URLs
122+ if ( hlsUrl && isValidCdnUrl ( hlsUrl ) ) {
95123 return {
96124 type : "hls" ,
97125 url : hlsUrl ,
@@ -355,23 +383,64 @@ export async function extractLearningSuitePostContent(
355383) : Promise < LearningSuitePostContent | null > {
356384 // Set up request interception to capture HLS video URLs
357385 const hlsUrls : string [ ] = [ ] ;
386+
387+ // Handler for requests - capture direct CDN URLs
358388 const requestHandler = ( request : { url : ( ) => string } ) => {
359389 const url = request . url ( ) ;
360- // Capture actual HLS playlists from Bunny CDN or direct m3u8 files
361- // Prioritize actual .m3u8 files over API endpoints
390+
391+ // Only capture real Bunny CDN URLs, not API proxies
392+ if ( url . includes ( "api.learningsuite.io" ) ) {
393+ return ;
394+ }
395+
396+ // Capture actual HLS playlists from Bunny CDN
362397 if (
363- url . includes ( ".m3u8" ) || // Direct HLS playlist
364- url . includes ( "b-cdn. net" ) || // Bunny CDN
365- url . includes ( "mediadelivery.net " ) // Bunny video delivery
398+ ( url . includes ( ".m3u8" ) && url . includes ( "b-cdn.net" ) ) ||
399+ ( url . includes ( ".m3u8" ) && url . includes ( "mediadelivery. net") ) ||
400+ ( url . includes ( ".m3u8 " ) && url . includes ( "vz-" ) )
366401 ) {
367- // Skip API responses, only capture actual playlist URLs
368- if ( ! url . includes ( "/embed/" ) && ! url . includes ( "/play/" ) ) {
402+ if ( ! hlsUrls . includes ( url ) ) {
369403 hlsUrls . push ( url ) ;
370404 }
371405 }
372406 } ;
373407
408+ // Handler for responses - capture Bunny CDN URLs from API responses
409+ const responseHandler = async ( response : {
410+ url : ( ) => string ;
411+ status : ( ) => number ;
412+ text : ( ) => Promise < string > ;
413+ } ) => {
414+ const url = response . url ( ) ;
415+
416+ // Check if this is a Bunny API response that might contain the real playlist URL
417+ if ( url . includes ( "api.learningsuite.io" ) && url . includes ( "/bunny/" ) ) {
418+ try {
419+ const status = response . status ( ) ;
420+ // Follow redirects - status 302/301 might have Location header
421+ if ( status >= 300 && status < 400 ) {
422+ return ; // Redirects are handled automatically
423+ }
424+
425+ // For 200 responses, try to parse as JSON to extract playlist URL
426+ if ( status === 200 ) {
427+ const text = await response . text ( ) ;
428+ // Look for Bunny CDN URLs in the response
429+ const cdnUrlRegex =
430+ / ( h t t p s ? : \/ \/ [ ^ " ' \s ] * (?: b - c d n \. n e t | m e d i a d e l i v e r y \. n e t ) [ ^ " ' \s ] * \. m 3 u 8 [ ^ " ' \s ] * ) / ;
431+ const cdnUrlMatch = cdnUrlRegex . exec ( text ) ;
432+ if ( cdnUrlMatch ?. [ 1 ] && ! hlsUrls . includes ( cdnUrlMatch [ 1 ] ) ) {
433+ hlsUrls . push ( cdnUrlMatch [ 1 ] ) ;
434+ }
435+ }
436+ } catch {
437+ // Response body might not be readable
438+ }
439+ }
440+ } ;
441+
374442 page . on ( "request" , requestHandler ) ;
443+ page . on ( "response" , responseHandler ) ;
375444
376445 // Navigate to lesson page
377446 await page . goto ( lessonUrl , { timeout : 30000 } ) ;
@@ -406,30 +475,20 @@ export async function extractLearningSuitePostContent(
406475 await page . waitForTimeout ( 2000 ) ;
407476 }
408477
409- // Remove handler
478+ // Remove handlers
410479 page . off ( "request" , requestHandler ) ;
480+ page . off ( "response" , responseHandler ) ;
411481
412482 // Try to get video from intercepted requests first
413483 let video : LearningSuiteVideoInfo | null = null ;
414484
415- // Prioritize actual .m3u8 files from CDN
416- const actualPlaylist = hlsUrls . find (
417- ( url ) =>
418- url . includes ( ".m3u8" ) && ( url . includes ( "b-cdn.net" ) || url . includes ( "mediadelivery.net" ) )
419- ) ;
420-
421- if ( actualPlaylist ) {
422- video = {
423- type : "hls" ,
424- url : actualPlaylist ,
425- hlsUrl : actualPlaylist ,
426- } ;
427- } else if ( hlsUrls . length > 0 && hlsUrls [ 0 ] ) {
428- // Fallback to any captured HLS URL
485+ // Only use CDN URLs we captured (API proxy URLs are filtered out above)
486+ const firstHlsUrl = hlsUrls [ 0 ] ;
487+ if ( firstHlsUrl ) {
429488 video = {
430489 type : "hls" ,
431- url : hlsUrls [ 0 ] ,
432- hlsUrl : hlsUrls [ 0 ] ,
490+ url : firstHlsUrl ,
491+ hlsUrl : firstHlsUrl ,
433492 } ;
434493 }
435494
0 commit comments