Skip to content

Commit f62c3e7

Browse files
authored
Merge branch 'CCExtractor:master' into feature/split-dvb-subs
2 parents 7889a41 + 5f0c672 commit f62c3e7

File tree

8 files changed

+1207
-51
lines changed

8 files changed

+1207
-51
lines changed

docs/CHANGES.TXT

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
0.95 (2025-09-15)
22
-----------------
3+
- New: Added --list-tracks (-L) option to list all tracks in media files without processing
34
- Fix: Garbled captions from HDHomeRun and I/P-only H.264 streams (#1109)
45
- Fix: Enable stdout output for CEA-708 captions on Windows (#1693)
56
- Fix: McPoodle DVD raw format read/write - properly handle loop markers (#1524)

src/lib_ccx/general_loop.c

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -534,8 +534,14 @@ int raw_loop(struct lib_ccx_ctx *ctx)
534534
dec_ctx = update_decoder_list(ctx);
535535
dec_sub = &dec_ctx->dec_sub;
536536

537-
set_current_pts(dec_ctx->timing, 90);
538-
set_fts(dec_ctx->timing); // Now set the FTS related variables
537+
// For raw mode, timing is derived from the caption block counter (cb_field1).
538+
// We set min_pts=0 and pts_set=MinPtsSet so set_fts() will calculate fts_now.
539+
// Initialize timing for raw mode - no video PTS, just caption block counting.
540+
dec_ctx->timing->min_pts = 0;
541+
dec_ctx->timing->sync_pts = 0;
542+
dec_ctx->timing->pts_set = 2; // MinPtsSet
543+
set_current_pts(dec_ctx->timing, 0);
544+
set_fts(dec_ctx->timing);
539545

540546
do
541547
{
@@ -561,8 +567,13 @@ int raw_loop(struct lib_ccx_ctx *ctx)
561567
else
562568
{
563569
ret = process_raw(dec_ctx, dec_sub, data->buffer, data->len);
564-
// For regular raw format, advance timing based on field 1 blocks
565-
add_current_pts(dec_ctx->timing, cb_field1 * 1001 / 30 * (MPEG_CLOCK_FREQ / 1000));
570+
// For raw mode, cb_field1 is incremented by do_cb() for each CC pair.
571+
// After processing each chunk, add the accumulated time to current_pts
572+
// and call set_fts() to update fts_now. set_fts() resets cb_field1 to 0,
573+
// so each chunk's timing is added incrementally.
574+
// Note: Cast cb_field1 to LLONG to prevent 32-bit integer overflow
575+
// when calculating ticks for large raw files (issue #1565).
576+
add_current_pts(dec_ctx->timing, (LLONG)cb_field1 * 1001 / 30 * (MPEG_CLOCK_FREQ / 1000));
566577
set_fts(dec_ctx->timing);
567578
}
568579

@@ -573,7 +584,12 @@ int raw_loop(struct lib_ccx_ctx *ctx)
573584
dec_sub->got_output = 0;
574585
}
575586

576-
} while (data->len);
587+
// Reset buffer length after processing so we can read more data
588+
// Without this, data->len stays at BUFSIZE and general_get_more_data
589+
// returns CCX_EOF prematurely (it calculates want = BUFSIZE - len = 0)
590+
data->len = 0;
591+
592+
} while (1); // Loop exits via break on CCX_EOF or terminate_asap
577593
free(data);
578594
return caps;
579595
}

src/rust/lib_ccxr/src/common/options.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -424,6 +424,8 @@ pub struct Options {
424424
pub mp4vidtrack: bool,
425425
/// If true, extracts chapters (if present), from MP4 files.
426426
pub extract_chapters: bool,
427+
/// If true, only list tracks in the input file without processing
428+
pub list_tracks_only: bool,
427429
/* General settings */
428430
/// Force the use of pic_order_cnt_lsb in AVC/H.264 data streams
429431
pub usepicorder: bool,
@@ -564,6 +566,7 @@ impl Default for Options {
564566
auto_myth: None,
565567
mp4vidtrack: Default::default(),
566568
extract_chapters: Default::default(),
569+
list_tracks_only: Default::default(),
567570
usepicorder: Default::default(),
568571
xmltv: Default::default(),
569572
xmltvliveinterval: Timestamp::from_millis(10000),

src/rust/src/args.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -359,6 +359,10 @@ pub struct Args {
359359
/// Uses multiple programs from the same input stream.
360360
#[arg(long, verbatim_doc_comment, help_heading=OPTIONS_AFFECTING_INPUT_FILES)]
361361
pub multiprogram: bool,
362+
/// List all tracks found in the input file and exit without
363+
/// processing. Useful for exploring media files before extraction.
364+
#[arg(long = "list-tracks", short = 'L', verbatim_doc_comment, help_heading=OPTIONS_AFFECTING_INPUT_FILES)]
365+
pub list_tracks: bool,
362366
/// Don't try to find out the stream for caption/teletext
363367
/// data, just use this one instead.
364368
#[arg(long, verbatim_doc_comment, help_heading=OPTIONS_AFFECTING_INPUT_FILES)]

src/rust/src/avc/core.rs

Lines changed: 161 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -367,6 +367,31 @@ fn find_next_zero(slice: &[u8]) -> Option<usize> {
367367
fn find_next_zero(slice: &[u8]) -> Option<usize> {
368368
slice.iter().position(|&b| b == 0x00)
369369
}
370+
/// Find the first NAL start code (0x00 0x00 0x01 or 0x00 0x00 0x00 0x01) in a buffer.
371+
/// Returns the position of the 0x01 byte if found, or None if not found.
372+
fn find_nal_start_code(buf: &[u8]) -> Option<usize> {
373+
if buf.len() < 3 {
374+
return None;
375+
}
376+
377+
for i in 0..buf.len().saturating_sub(2) {
378+
// Check for 0x00 0x00 0x01 (3-byte start code)
379+
if buf[i] == 0x00 && buf[i + 1] == 0x00 && buf[i + 2] == 0x01 {
380+
return Some(i + 2); // Position of the 0x01
381+
}
382+
// Also check for 0x00 0x00 0x00 0x01 (4-byte start code)
383+
if i + 3 < buf.len()
384+
&& buf[i] == 0x00
385+
&& buf[i + 1] == 0x00
386+
&& buf[i + 2] == 0x00
387+
&& buf[i + 3] == 0x01
388+
{
389+
return Some(i + 3); // Position of the 0x01
390+
}
391+
}
392+
None
393+
}
394+
370395
/// # Safety
371396
/// This function is unsafe because it dereferences raw pointers and calls `dump` and `do_nal`.
372397
pub unsafe fn process_avc(
@@ -384,118 +409,155 @@ pub unsafe fn process_avc(
384409
));
385410
}
386411

387-
// Warning there should be only leading zeros, nothing else
388-
if !(avcbuf[0] == 0x00 && avcbuf[1] == 0x00) {
389-
return Err(AvcError::BrokenStream(
390-
"Leading bytes are non-zero".to_string(),
391-
));
412+
// If the buffer doesn't start with leading zeros, try to find the first NAL start code.
413+
// This can happen with:
414+
// - HLS/Twitch stream segments that start mid-stream
415+
// - Streams with garbage data at the beginning
416+
// - Buffer accumulation issues after previous errors
417+
let start_offset = if avcbuf[0] == 0x00 && avcbuf[1] == 0x00 {
418+
// Normal case: buffer starts with zeros
419+
0
420+
} else {
421+
// Try to find the first NAL start code
422+
if let Some(nal_pos) = find_nal_start_code(avcbuf) {
423+
// Found a NAL start code, skip to the position before it (the zeros)
424+
// The position returned is the 0x01, so we need to go back to find the zeros
425+
let zeros_start = if nal_pos >= 3 && avcbuf[nal_pos - 3] == 0x00 {
426+
nal_pos - 3 // 4-byte start code
427+
} else {
428+
nal_pos - 2 // 3-byte start code
429+
};
430+
debug!(msg_type = DebugMessageFlag::VERBOSE;
431+
"Skipped {} bytes of garbage before first NAL start code", zeros_start);
432+
zeros_start
433+
} else {
434+
// No NAL start code found - return full buffer length to clear it
435+
debug!(msg_type = DebugMessageFlag::VERBOSE;
436+
"No NAL start code found in buffer of {} bytes, clearing", avcbuflen);
437+
return Ok(avcbuflen);
438+
}
439+
};
440+
441+
// Work with the buffer starting from start_offset
442+
let working_buf = &avcbuf[start_offset..];
443+
let working_len = working_buf.len();
444+
445+
if working_len <= 5 {
446+
// Not enough data after skipping garbage
447+
return Ok(avcbuflen);
392448
}
393449

394450
let mut buffer_position = 2usize;
395-
let mut firstloop = true;
396451

397452
// Loop over NAL units
398-
while buffer_position < avcbuflen.saturating_sub(2) {
453+
while buffer_position < working_len.saturating_sub(2) {
399454
let mut zeropad = 0;
400455

401456
// Find next NAL_start
402-
while buffer_position < avcbuflen {
403-
if avcbuf[buffer_position] == 0x01 {
457+
while buffer_position < working_len {
458+
if working_buf[buffer_position] == 0x01 {
404459
break;
405-
} else if firstloop && avcbuf[buffer_position] != 0x00 {
406-
return Err(AvcError::BrokenStream(
407-
"Leading bytes are non-zero".to_string(),
408-
));
460+
} else if working_buf[buffer_position] != 0x00 {
461+
// Non-zero byte found where we expected zeros - skip to next potential start code
462+
if let Some(next_nal) = find_nal_start_code(&working_buf[buffer_position..]) {
463+
buffer_position += next_nal - 1; // -1 because we'll increment at end of loop
464+
zeropad = 0;
465+
} else {
466+
// No more NAL units found
467+
return Ok(avcbuflen);
468+
}
409469
}
410470
buffer_position += 1;
411471
zeropad += 1;
412472
}
413473

414-
firstloop = false;
415-
416-
if buffer_position >= avcbuflen {
474+
if buffer_position >= working_len {
417475
break;
418476
}
419477

420478
let nal_start_pos = buffer_position + 1;
421-
let mut nal_stop_pos = avcbuflen;
479+
let mut nal_stop_pos = working_len;
422480

423481
buffer_position += 1;
424-
let restlen = avcbuflen.saturating_sub(buffer_position + 2);
482+
let restlen = working_len.saturating_sub(buffer_position + 2);
425483

426484
// Use optimized zero search
427485
if restlen > 0 {
428486
if let Some(zero_offset) =
429-
find_next_zero(&avcbuf[buffer_position..buffer_position + restlen])
487+
find_next_zero(&working_buf[buffer_position..buffer_position + restlen])
430488
{
431489
let zero_pos = buffer_position + zero_offset;
432490

433-
if zero_pos + 2 < avcbuflen {
434-
if avcbuf[zero_pos + 1] == 0x00 && (avcbuf[zero_pos + 2] | 0x01) == 0x01 {
491+
if zero_pos + 2 < working_len {
492+
if working_buf[zero_pos + 1] == 0x00
493+
&& (working_buf[zero_pos + 2] | 0x01) == 0x01
494+
{
435495
nal_stop_pos = zero_pos;
436496
buffer_position = zero_pos + 2;
437497
} else {
438498
// Continue searching from after this zero
439499
buffer_position = zero_pos + 1;
440500
// Recursive search for next start code
441-
while buffer_position < avcbuflen.saturating_sub(2) {
501+
while buffer_position < working_len.saturating_sub(2) {
442502
if let Some(next_zero_offset) = find_next_zero(
443-
&avcbuf[buffer_position..avcbuflen.saturating_sub(2)],
503+
&working_buf[buffer_position..working_len.saturating_sub(2)],
444504
) {
445505
let next_zero_pos = buffer_position + next_zero_offset;
446-
if next_zero_pos + 2 < avcbuflen {
447-
if avcbuf[next_zero_pos + 1] == 0x00
448-
&& (avcbuf[next_zero_pos + 2] | 0x01) == 0x01
506+
if next_zero_pos + 2 < working_len {
507+
if working_buf[next_zero_pos + 1] == 0x00
508+
&& (working_buf[next_zero_pos + 2] | 0x01) == 0x01
449509
{
450510
nal_stop_pos = next_zero_pos;
451511
buffer_position = next_zero_pos + 2;
452512
break;
453513
}
454514
} else {
455-
nal_stop_pos = avcbuflen;
456-
buffer_position = avcbuflen;
515+
nal_stop_pos = working_len;
516+
buffer_position = working_len;
457517
break;
458518
}
459519
buffer_position = next_zero_pos + 1;
460520
} else {
461-
nal_stop_pos = avcbuflen;
462-
buffer_position = avcbuflen;
521+
nal_stop_pos = working_len;
522+
buffer_position = working_len;
463523
break;
464524
}
465525
}
466526
}
467527
} else {
468-
nal_stop_pos = avcbuflen;
469-
buffer_position = avcbuflen;
528+
nal_stop_pos = working_len;
529+
buffer_position = working_len;
470530
}
471531
} else {
472-
nal_stop_pos = avcbuflen;
473-
buffer_position = avcbuflen;
532+
nal_stop_pos = working_len;
533+
buffer_position = working_len;
474534
}
475535
} else {
476-
nal_stop_pos = avcbuflen;
477-
buffer_position = avcbuflen;
536+
nal_stop_pos = working_len;
537+
buffer_position = working_len;
478538
}
479539

480-
if nal_start_pos >= avcbuflen {
540+
if nal_start_pos >= working_len {
481541
break;
482542
}
483543

484-
if (avcbuf[nal_start_pos] & 0x80) != 0 {
544+
if (working_buf[nal_start_pos] & 0x80) != 0 {
485545
let dump_start = nal_start_pos.saturating_sub(4);
486-
let dump_len = std::cmp::min(10, avcbuflen - dump_start);
487-
dump(avcbuf[dump_start..].as_ptr(), dump_len as i32, 0, 0);
488-
489-
return Err(AvcError::ForbiddenZeroBit(
490-
"forbidden_zero_bit not zero".to_string(),
491-
));
546+
let dump_len = std::cmp::min(10, working_len - dump_start);
547+
dump(working_buf[dump_start..].as_ptr(), dump_len as i32, 0, 0);
548+
549+
// Don't return an error - just skip this NAL and continue
550+
// This allows processing to continue even with some corrupt data
551+
debug!(msg_type = DebugMessageFlag::VERBOSE;
552+
"Skipping NAL with forbidden_zero_bit set");
553+
continue;
492554
}
493555

494-
(*dec_ctx.avc_ctx).nal_ref_idc = (avcbuf[nal_start_pos] >> 5) as u32;
556+
(*dec_ctx.avc_ctx).nal_ref_idc = (working_buf[nal_start_pos] >> 5) as u32;
495557

496558
debug!(msg_type = DebugMessageFlag::VIDEO_STREAM; "process_avc: zeropad {}", zeropad);
497559
let nal_length = (nal_stop_pos - nal_start_pos) as i64;
498-
let mut nal_slice = avcbuf[nal_start_pos..nal_stop_pos].to_vec();
560+
let mut nal_slice = working_buf[nal_start_pos..nal_stop_pos].to_vec();
499561

500562
if let Err(e) = do_nal(enc_ctx, dec_ctx, &mut nal_slice, nal_length, sub) {
501563
info!("Error processing NAL unit: {}", e);
@@ -504,3 +566,56 @@ pub unsafe fn process_avc(
504566

505567
Ok(avcbuflen)
506568
}
569+
570+
#[cfg(test)]
571+
mod tests {
572+
use super::*;
573+
574+
#[test]
575+
fn test_find_nal_start_code_3byte() {
576+
// 3-byte start code at position 0
577+
let buf = [0x00, 0x00, 0x01, 0x65, 0x88];
578+
assert_eq!(find_nal_start_code(&buf), Some(2));
579+
}
580+
581+
#[test]
582+
fn test_find_nal_start_code_4byte() {
583+
// 4-byte start code at position 0
584+
let buf = [0x00, 0x00, 0x00, 0x01, 0x67, 0x64];
585+
assert_eq!(find_nal_start_code(&buf), Some(3));
586+
}
587+
588+
#[test]
589+
fn test_find_nal_start_code_with_garbage() {
590+
// Garbage data followed by 3-byte start code
591+
let buf = [0xFF, 0xAB, 0xCD, 0x00, 0x00, 0x01, 0x09, 0xF0];
592+
assert_eq!(find_nal_start_code(&buf), Some(5));
593+
}
594+
595+
#[test]
596+
fn test_find_nal_start_code_no_start_code() {
597+
// No start code in buffer
598+
let buf = [0xFF, 0xAB, 0xCD, 0xEF];
599+
assert_eq!(find_nal_start_code(&buf), None);
600+
}
601+
602+
#[test]
603+
fn test_find_nal_start_code_too_short() {
604+
// Buffer too short
605+
let buf = [0x00, 0x00];
606+
assert_eq!(find_nal_start_code(&buf), None);
607+
}
608+
609+
#[test]
610+
fn test_find_nal_start_code_empty() {
611+
let buf: [u8; 0] = [];
612+
assert_eq!(find_nal_start_code(&buf), None);
613+
}
614+
615+
#[test]
616+
fn test_find_nal_start_code_partial_match() {
617+
// 0x00 0x00 but no 0x01 following
618+
let buf = [0x00, 0x00, 0x02, 0x00, 0x00, 0x01, 0x65];
619+
assert_eq!(find_nal_start_code(&buf), Some(5));
620+
}
621+
}

0 commit comments

Comments
 (0)