Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions CI_TRIAGE_DEC_2025.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# CI Test Triage - December 2025

This PR is used to trigger CI runs and track the triage of failing regression tests.

## Purpose

Several PRs have been merged recently that improved CCExtractor behavior, but the Sample Platform
considers them regressions because the "ground truth" baseline is outdated. This PR helps:

1. Get a fresh CI run against current master
2. Systematically analyze each failing test
3. Determine whether to update ground truth or fix code

## Failing Tests to Triage

Will be populated after CI run completes.

## Status

- [ ] CI run triggered
- [ ] Results analyzed
- [ ] Triage decisions made
11 changes: 11 additions & 0 deletions src/lib_ccx/general_loop.c
Original file line number Diff line number Diff line change
Expand Up @@ -1479,6 +1479,17 @@ int rcwt_loop(struct lib_ccx_ctx *ctx)
} // end while(1)

dbg_print(CCX_DMT_PARSE, "Processed %d bytes\n", bread);

/* Check if captions were found via other paths (CEA-608 writes directly
to encoder without setting got_output). Similar to general_loop logic. */
if (!caps && enc_ctx != NULL)
{
if (enc_ctx->srt_counter || enc_ctx->cea_708_counter || dec_ctx->saw_caption_block)
{
caps = 1;
}
}

/* Free XDS context - similar to cleanup in general_loop */
free(dec_ctx->xds_ctx);
free(parsebuf);
Expand Down
9 changes: 7 additions & 2 deletions src/lib_ccx/hardsubx.c
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,8 @@ int hardsubx_process_data(struct lib_hardsubx_ctx *ctx, struct lib_ccx_ctx *ctx_

// Free the allocated memory for frame processing
av_free(ctx->rgb_buffer);
if (ctx->sws_ctx)
sws_freeContext(ctx->sws_ctx);
if (ctx->frame)
av_frame_free(&ctx->frame);
if (ctx->rgb_frame)
Expand Down Expand Up @@ -283,7 +285,7 @@ struct lib_hardsubx_ctx *_init_hardsubx(struct ccx_s_options *options)

free(pars_vec);
free(pars_values);
free(tessdata_path);
// Note: tessdata_path points to static string or getenv() result, do NOT free
if (ret != 0)
{
free(ctx);
Expand Down Expand Up @@ -336,8 +338,11 @@ void _dinit_hardsubx(struct lib_hardsubx_ctx **ctx)
TessBaseAPIEnd(lctx->tess_handle);
TessBaseAPIDelete(lctx->tess_handle);

// Free basefilename (allocated by get_basename in _init_hardsubx)
freep(&lctx->basefilename);

// Free subtitle
freep(lctx->dec_sub);
freep(&lctx->dec_sub);
freep(ctx);
}

Expand Down
4 changes: 4 additions & 0 deletions src/lib_ccx/hardsubx.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,10 @@ int64_t convert_pts_to_s(int64_t pts, AVRational time_base);
int is_valid_trailing_char(char c);
char *prune_string(char *s);

// Rust memory management - strings returned from Rust must be freed with this function
// DO NOT use free() on strings returned from _process_frame_white_basic, _process_frame_color_basic, etc.
void free_rust_c_string(char *ptr);

#endif

#endif
18 changes: 18 additions & 0 deletions src/lib_ccx/hardsubx_decoder.c
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,7 @@ void hardsubx_process_frames_linear(struct lib_hardsubx_ctx *ctx, struct encoder
if (dist < (0.2 * MIN(strlen(subtitle_text), strlen(prev_subtitle_text))))
{
dist = -1;
free_rust_c_string(subtitle_text);
subtitle_text = NULL;
prev_end_time = convert_pts_to_ms(ctx->packet.pts, ctx->format_ctx->streams[ctx->video_stream_id]->time_base);
}
Expand All @@ -219,6 +220,7 @@ void hardsubx_process_frames_linear(struct lib_hardsubx_ctx *ctx, struct encoder
add_cc_sub_text(ctx->dec_sub, prev_subtitle_text, prev_begin_time, prev_end_time, "", "BURN", CCX_ENC_UTF_8);
encode_sub(enc_ctx, ctx->dec_sub);
prev_begin_time = prev_end_time + 1;
free(prev_subtitle_text);
prev_subtitle_text = NULL;
prev_sub_encoded = 1;
prev_end_time = convert_pts_to_ms(ctx->packet.pts, ctx->format_ctx->streams[ctx->video_stream_id]->time_base);
Expand Down Expand Up @@ -252,6 +254,10 @@ void hardsubx_process_frames_linear(struct lib_hardsubx_ctx *ctx, struct encoder
prev_sub_encoded = 0;
}
prev_packet_pts = ctx->packet.pts;

// Free subtitle_text from this iteration (was allocated by Rust in _process_frame_*_basic)
free_rust_c_string(subtitle_text);
subtitle_text = NULL;
}
}
av_packet_unref(&ctx->packet);
Expand All @@ -263,6 +269,9 @@ void hardsubx_process_frames_linear(struct lib_hardsubx_ctx *ctx, struct encoder
encode_sub(enc_ctx, ctx->dec_sub);
prev_sub_encoded = 1;
}

// Cleanup
free(prev_subtitle_text);
activity_progress(100, cur_sec / 60, cur_sec % 60);
}

Expand Down Expand Up @@ -488,6 +497,7 @@ void process_hardsubx_linear_frames_and_normal_subs(struct lib_hardsubx_ctx *har
if (dist < (0.2 * MIN(strlen(subtitle_text_hard), strlen(prev_subtitle_text_hard))))
{
dist = -1;
free_rust_c_string(subtitle_text_hard);
subtitle_text_hard = NULL;
prev_end_time_hard = convert_pts_to_ms(hard_ctx->packet.pts, hard_ctx->format_ctx->streams[hard_ctx->video_stream_id]->time_base);
}
Expand All @@ -497,6 +507,7 @@ void process_hardsubx_linear_frames_and_normal_subs(struct lib_hardsubx_ctx *har
add_cc_sub_text(hard_ctx->dec_sub, prev_subtitle_text_hard, prev_begin_time_hard, prev_end_time_hard, "", "BURN", CCX_ENC_UTF_8);
encode_sub(enc_ctx, hard_ctx->dec_sub);
prev_begin_time_hard = prev_end_time_hard + 1;
free(prev_subtitle_text_hard);
prev_subtitle_text_hard = NULL;
prev_sub_encoded_hard = 1;
prev_end_time_hard = convert_pts_to_ms(hard_ctx->packet.pts, hard_ctx->format_ctx->streams[hard_ctx->video_stream_id]->time_base);
Expand All @@ -517,6 +528,10 @@ void process_hardsubx_linear_frames_and_normal_subs(struct lib_hardsubx_ctx *har
prev_sub_encoded_hard = 0;
}
prev_packet_pts_hard = hard_ctx->packet.pts;

// Free subtitle_text_hard from this iteration (allocated by Rust)
free_rust_c_string(subtitle_text_hard);
subtitle_text_hard = NULL;
}
}
}
Expand All @@ -529,6 +544,9 @@ void process_hardsubx_linear_frames_and_normal_subs(struct lib_hardsubx_ctx *har
encode_sub(enc_ctx, hard_ctx->dec_sub);
prev_sub_encoded_hard = 1;
}

// Cleanup
free(prev_subtitle_text_hard);
activity_progress(100, cur_sec / 60, cur_sec % 60);
}

Expand Down
45 changes: 27 additions & 18 deletions src/rust/src/hardsubx/classifier.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,40 +22,49 @@ use log::warn;
/// # Safety
/// The function accepts and dereferences a raw pointer
/// The function also makes calls to functions whose safety is not guaranteed
/// The function returns a raw pointer which is a string made in C
/// The function returns a raw pointer which is a string allocated by Rust
/// The caller must free this with the appropriate method (or let Rust handle it)
/// ctx should be not null
#[no_mangle]
pub unsafe extern "C" fn get_ocr_text_simple_threshold(
ctx: *mut lib_hardsubx_ctx,
image: *mut Pix,
threshold: std::os::raw::c_float,
) -> *mut ::std::os::raw::c_char {
let mut text_out: *mut ::std::os::raw::c_char;

TessBaseAPISetImage2((*ctx).tess_handle, image);

if TessBaseAPIRecognize((*ctx).tess_handle, null::<ETEXT_DESC>() as *mut ETEXT_DESC) != 0 {
warn!("Error in Tesseract recognition, skipping frame\n");
null::<c_char>() as *mut c_char
} else {
text_out = TessBaseAPIGetUTF8Text((*ctx).tess_handle);
return null::<c_char>() as *mut c_char;
}

if text_out == null::<c_char>() as *mut c_char {
warn!("Error getting text, skipping frame\n");
}
let tess_text = TessBaseAPIGetUTF8Text((*ctx).tess_handle);

if threshold > 0.0 {
// non-zero conf, only then we'll make the call to check for confidence
let conf = TessBaseAPIMeanTextConf((*ctx).tess_handle);
if tess_text == null::<c_char>() as *mut c_char {
warn!("Error getting text, skipping frame\n");
return null::<c_char>() as *mut c_char;
}

if (conf as std::os::raw::c_float) < threshold {
text_out = null::<c_char>() as *mut c_char;
} else {
(*ctx).cur_conf = conf as std::os::raw::c_float;
}
if threshold > 0.0 {
// non-zero conf, only then we'll make the call to check for confidence
let conf = TessBaseAPIMeanTextConf((*ctx).tess_handle);

if (conf as std::os::raw::c_float) < threshold {
TessDeleteText(tess_text);
return null::<c_char>() as *mut c_char;
} else {
(*ctx).cur_conf = conf as std::os::raw::c_float;
}
text_out
}

// Convert Tesseract string to Rust-owned string, then free Tesseract's allocation
let rust_string = ffi::CStr::from_ptr(tess_text)
.to_string_lossy()
.into_owned();
TessDeleteText(tess_text);

// Return a Rust-allocated C string that can be safely freed with standard free()
string_to_c_char(&rust_string)
}

/// basically the get_oct_text_simple function without threshold
Expand Down
32 changes: 15 additions & 17 deletions src/rust/src/hardsubx/decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,18 @@ static HARDSUBX_OCRMODE_WORD: i32 = 1;
// HARDSUBX_OCRMODE_LETTER
// };

/// Helper function to convert a Rust-allocated C string to an owned String
/// Takes ownership of the memory and frees it properly
unsafe fn cstring_to_owned(ptr: *mut c_char) -> String {
if ptr.is_null() {
return String::new();
}
match ffi::CString::from_raw(ptr).into_string() {
Ok(s) => s,
Err(_) => String::new(),
}
}

/// # Safety
/// dereferences a raw pointer
/// calls functions that are not necessarily safe
Expand All @@ -40,29 +52,15 @@ pub unsafe fn dispatch_classifier_functions(ctx: *mut lib_hardsubx_ctx, im: *mut
match (*ctx).ocr_mode {
0 => {
let ret_char_arr = get_ocr_text_simple_threshold(ctx, im, (*ctx).conf_thresh);
let text_out_result = ffi::CString::from_raw(ret_char_arr).into_string();
match text_out_result {
Ok(T) => T,
Err(_E) => "".to_string(),
}
cstring_to_owned(ret_char_arr)
}
1 => {
let ret_char_arr = get_ocr_text_wordwise_threshold(ctx, im, (*ctx).conf_thresh);
if ret_char_arr.is_null() {
"".to_string()
} else {
ffi::CStr::from_ptr(ret_char_arr)
.to_string_lossy()
.into_owned()
}
cstring_to_owned(ret_char_arr)
}
2 => {
let ret_char_arr = get_ocr_text_letterwise_threshold(ctx, im, (*ctx).conf_thresh);
let text_out_result = ffi::CString::from_raw(ret_char_arr).into_string();
match text_out_result {
Ok(T) => T,
Err(_E) => "".to_string(),
}
cstring_to_owned(ret_char_arr)
}

_ => {
Expand Down
3 changes: 2 additions & 1 deletion src/rust/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ pub fn string_to_c_char(a: &str) -> *mut ::std::os::raw::c_char {
/// # Safety
/// The pointer must have been allocated by `string_to_c_char` (i.e., `CString::into_raw`)
/// or be null. Passing a pointer allocated by C's malloc will cause undefined behavior.
pub unsafe fn free_rust_c_string(ptr: *mut ::std::os::raw::c_char) {
#[no_mangle]
pub unsafe extern "C" fn free_rust_c_string(ptr: *mut ::std::os::raw::c_char) {
if !ptr.is_null() {
// Reclaim ownership and drop the CString, which frees the memory
let _ = ffi::CString::from_raw(ptr);
Expand Down
Loading