@@ -387,6 +387,7 @@ export async function extractLearningSuitePostContent(
387387
388388 // Handler for requests - capture segment URLs with tokens
389389 const segmentUrls : string [ ] = [ ] ;
390+
390391 const requestHandler = ( request : { url : ( ) => string } ) => {
391392 const url = request . url ( ) ;
392393
@@ -428,7 +429,6 @@ export async function extractLearningSuitePostContent(
428429 const headers = response . headers ( ) ;
429430 const location = headers . location ;
430431 if ( location ?. includes ( "b-cdn.net" ) ) {
431- console . log ( `[DEBUG] Redirect to CDN: ${ location . substring ( 0 , 100 ) } ...` ) ;
432432 if ( ! hlsUrls . includes ( location ) ) {
433433 hlsUrls . unshift ( location ) ; // Priority
434434 }
@@ -437,27 +437,57 @@ export async function extractLearningSuitePostContent(
437437 }
438438
439439 if ( status === 200 ) {
440- const contentType = response . headers ( ) [ "content-type" ] ?? "" ;
441-
442- // If it's a direct playlist response
443- if ( contentType . includes ( "mpegurl" ) || contentType . includes ( "m3u8" ) ) {
444- // The API proxy is serving the playlist directly - construct CDN URL from .ts requests
445- // We'll capture the CDN base URL from .ts segment requests instead
446- return ;
447- }
448-
449440 const text = await response . text ( ) ;
450441
451442 // Check if it's HLS playlist content
452443 if ( text . startsWith ( "#EXTM3U" ) ) {
453- console . log ( `[DEBUG] Got HLS playlist from API proxy` ) ;
444+ // Extract ALL segment URLs from the playlist
445+ // Lines that end with .ts and include tokens are segments
446+ const lines = text . split ( "\n" ) ;
447+ let baseUrl = "" ;
448+
449+ // First, try to find the base URL from full URLs in the playlist
450+ for ( const line of lines ) {
451+ const trimmed = line . trim ( ) ;
452+ if ( trimmed . startsWith ( "http" ) && trimmed . includes ( "b-cdn.net" ) ) {
453+ const match = / ( h t t p s ? : \/ \/ [ ^ / ] + \/ [ ^ / ] + \/ ) / . exec ( trimmed ) ;
454+ if ( match ?. [ 1 ] ) {
455+ baseUrl = match [ 1 ] ;
456+ break ;
457+ }
458+ }
459+ }
460+
461+ for ( const line of lines ) {
462+ const trimmed = line . trim ( ) ;
463+ // Skip comment lines and empty lines
464+ if ( trimmed . startsWith ( "#" ) || trimmed === "" ) continue ;
465+
466+ // Check if this line is a segment URL (contains .ts)
467+ if ( trimmed . includes ( ".ts" ) ) {
468+ let segmentUrl = trimmed ;
469+
470+ // If it's a full URL, add it directly
471+ if ( trimmed . startsWith ( "http" ) ) {
472+ if ( ! segmentUrls . includes ( segmentUrl ) ) {
473+ segmentUrls . push ( segmentUrl ) ;
474+ }
475+ } else if ( baseUrl && trimmed . includes ( "token=" ) ) {
476+ // Relative URL with token - construct full URL
477+ segmentUrl = baseUrl + trimmed ;
478+ if ( ! segmentUrls . includes ( segmentUrl ) ) {
479+ segmentUrls . push ( segmentUrl ) ;
480+ }
481+ }
482+ }
483+ }
484+
454485 // Extract CDN base URL from playlist content
455486 const cdnMatch = / ( h t t p s ? : \/ \/ v z - [ ^ " ' \s ] + \. b - c d n \. n e t \/ [ ^ " ' \s ] + ) / g. exec ( text ) ;
456487 if ( cdnMatch ?. [ 1 ] ) {
457- const baseUrl = cdnMatch [ 1 ] . replace ( / \/ [ ^ / ] + \. t s .* $ / , "/playlist.m3u8" ) ;
458- console . log ( `[DEBUG] Extracted CDN base: ${ baseUrl } ` ) ;
459- if ( ! hlsUrls . includes ( baseUrl ) ) {
460- hlsUrls . unshift ( baseUrl ) ;
488+ const extractedBase = cdnMatch [ 1 ] . replace ( / \/ [ ^ / ] + \. t s .* $ / , "/playlist.m3u8" ) ;
489+ if ( ! hlsUrls . includes ( extractedBase ) ) {
490+ hlsUrls . unshift ( extractedBase ) ;
461491 }
462492 }
463493 }
@@ -468,7 +498,6 @@ export async function extractLearningSuitePostContent(
468498 while ( ( match = cdnUrlRegex . exec ( text ) ) !== null ) {
469499 const cdnUrl = match [ 1 ] ;
470500 if ( cdnUrl && ! hlsUrls . includes ( cdnUrl ) ) {
471- console . log ( `[DEBUG] Found CDN URL in response: ${ cdnUrl . substring ( 0 , 80 ) } ...` ) ;
472501 hlsUrls . push ( cdnUrl ) ;
473502 }
474503 }
@@ -494,45 +523,77 @@ export async function extractLearningSuitePostContent(
494523 . then ( ( ) => true )
495524 . catch ( ( ) => false ) ;
496525
497- // If video player exists but no HLS URL captured yet, try to trigger video load
498- if ( hasVideoPlayer && hlsUrls . length === 0 ) {
499- // Try multiple approaches to trigger video loading
500-
501- // 1. Try clicking play button
526+ // If video player exists, trigger video load and seek to capture ALL segments
527+ if ( hasVideoPlayer ) {
528+ // Try clicking play button first
502529 const playButton = page . locator (
503530 '[aria-label*="play" i], [class*="play" i], button[class*="Play"], [data-testid*="play"]'
504531 ) ;
505532 try {
506533 await playButton . first ( ) . click ( { timeout : 2000 } ) ;
507- await page . waitForTimeout ( 3000 ) ;
534+ await page . waitForTimeout ( 2000 ) ;
508535 } catch {
509- // Play button not found
510- }
511-
512- // 2. Try clicking the video element directly
513- if ( hlsUrls . length === 0 ) {
536+ // Play button not found, try clicking video directly
514537 try {
515538 await page . locator ( "video" ) . first ( ) . click ( { timeout : 2000 } ) ;
516- await page . waitForTimeout ( 3000 ) ;
539+ await page . waitForTimeout ( 2000 ) ;
517540 } catch {
518541 // Video not clickable
519542 }
520543 }
521544
522- // 3. Try hovering over video to trigger autoplay
523- if ( hlsUrls . length === 0 ) {
524- try {
525- await page . locator ( "video, [class*='video']" ) . first ( ) . hover ( { timeout : 2000 } ) ;
526- await page . waitForTimeout ( 3000 ) ;
527- } catch {
528- // Hover failed
545+ // Get video duration and seek to multiple positions to capture all segments
546+ // HLS players load segments on-demand, so we need to seek to trigger loading
547+ try {
548+ const videoDuration = await page . evaluate ( ( ) => {
549+ const video = document . querySelector ( "video" ) ;
550+ return video ?. duration ?? 0 ;
551+ } ) ;
552+
553+ if ( videoDuration > 0 ) {
554+ // Calculate expected segment count (assuming ~4s per segment for Bunny CDN)
555+ const segmentDuration = 4 ; // Bunny CDN uses ~4 second segments
556+
557+ // For longer videos, we need to seek to MORE positions
558+ // HLS players typically buffer ~3-4 segments ahead
559+ // So we need to seek every ~12-16 seconds to capture all segments
560+ const seekInterval = segmentDuration * 3 ; // Seek every ~12 seconds
561+ const seekPositions : number [ ] = [ ] ;
562+
563+ // Generate seek positions throughout the video
564+ for ( let t = 0 ; t < videoDuration ; t += seekInterval ) {
565+ seekPositions . push ( t ) ;
566+ }
567+ // Always include near the end
568+ seekPositions . push ( videoDuration - 2 ) ;
569+ seekPositions . push ( videoDuration - 0.5 ) ;
570+
571+ for ( const seekTime of seekPositions ) {
572+ await page . evaluate ( ( time ) => {
573+ const video = document . querySelector ( "video" ) ;
574+ if ( video ) {
575+ video . currentTime = time ;
576+ }
577+ } , seekTime ) ;
578+ // Wait for segments to load - shorter wait since we have many positions
579+ await page . waitForTimeout ( 800 ) ;
580+ }
581+
582+ // Seek back to start
583+ await page . evaluate ( ( ) => {
584+ const video = document . querySelector ( "video" ) ;
585+ if ( video ) {
586+ video . currentTime = 0 ;
587+ }
588+ } ) ;
589+ await page . waitForTimeout ( 500 ) ;
529590 }
591+ } catch {
592+ // Seek failed
530593 }
531- }
532594
533- // Give more time for lazy-loaded videos and CDN URL extraction
534- if ( hlsUrls . length === 0 && hasVideoPlayer ) {
535- await page . waitForTimeout ( 5000 ) ;
595+ // Give more time for all segment requests to complete
596+ await page . waitForTimeout ( 1500 ) ;
536597 }
537598
538599 // Remove handlers
0 commit comments