diff --git a/CI_TRIAGE_DEC_2025.md b/CI_TRIAGE_DEC_2025.md new file mode 100644 index 000000000..4dc3dc3e6 --- /dev/null +++ b/CI_TRIAGE_DEC_2025.md @@ -0,0 +1,22 @@ +# CI Test Triage - December 2025 + +This PR is used to trigger CI runs and track the triage of failing regression tests. + +## Purpose + +Several PRs have been merged recently that improved CCExtractor behavior, but the Sample Platform +considers them regressions because the "ground truth" baseline is outdated. This PR helps: + +1. Get a fresh CI run against current master +2. Systematically analyze each failing test +3. Determine whether to update ground truth or fix code + +## Failing Tests to Triage + +Will be populated after CI run completes. + +## Status + +- [ ] CI run triggered +- [ ] Results analyzed +- [ ] Triage decisions made diff --git a/src/lib_ccx/general_loop.c b/src/lib_ccx/general_loop.c index 459063ba7..4ea5fa34e 100644 --- a/src/lib_ccx/general_loop.c +++ b/src/lib_ccx/general_loop.c @@ -1479,6 +1479,17 @@ int rcwt_loop(struct lib_ccx_ctx *ctx) } // end while(1) dbg_print(CCX_DMT_PARSE, "Processed %d bytes\n", bread); + + /* Check if captions were found via other paths (CEA-608 writes directly + to encoder without setting got_output). Similar to general_loop logic. */ + if (!caps && enc_ctx != NULL) + { + if (enc_ctx->srt_counter || enc_ctx->cea_708_counter || dec_ctx->saw_caption_block) + { + caps = 1; + } + } + /* Free XDS context - similar to cleanup in general_loop */ free(dec_ctx->xds_ctx); free(parsebuf); diff --git a/src/lib_ccx/hardsubx.c b/src/lib_ccx/hardsubx.c index 357d63da7..7ddff8d44 100644 --- a/src/lib_ccx/hardsubx.c +++ b/src/lib_ccx/hardsubx.c @@ -121,6 +121,8 @@ int hardsubx_process_data(struct lib_hardsubx_ctx *ctx, struct lib_ccx_ctx *ctx_ // Free the allocated memory for frame processing av_free(ctx->rgb_buffer); + if (ctx->sws_ctx) + sws_freeContext(ctx->sws_ctx); if (ctx->frame) av_frame_free(&ctx->frame); if (ctx->rgb_frame) @@ -283,7 +285,7 @@ struct lib_hardsubx_ctx *_init_hardsubx(struct ccx_s_options *options) free(pars_vec); free(pars_values); - free(tessdata_path); + // Note: tessdata_path points to static string or getenv() result, do NOT free if (ret != 0) { free(ctx); @@ -336,8 +338,11 @@ void _dinit_hardsubx(struct lib_hardsubx_ctx **ctx) TessBaseAPIEnd(lctx->tess_handle); TessBaseAPIDelete(lctx->tess_handle); + // Free basefilename (allocated by get_basename in _init_hardsubx) + freep(&lctx->basefilename); + // Free subtitle - freep(lctx->dec_sub); + freep(&lctx->dec_sub); freep(ctx); } diff --git a/src/lib_ccx/hardsubx.h b/src/lib_ccx/hardsubx.h index 45642743d..c5da3b495 100644 --- a/src/lib_ccx/hardsubx.h +++ b/src/lib_ccx/hardsubx.h @@ -116,6 +116,10 @@ int64_t convert_pts_to_s(int64_t pts, AVRational time_base); int is_valid_trailing_char(char c); char *prune_string(char *s); +// Rust memory management - strings returned from Rust must be freed with this function +// DO NOT use free() on strings returned from _process_frame_white_basic, _process_frame_color_basic, etc. +void free_rust_c_string(char *ptr); + #endif #endif diff --git a/src/lib_ccx/hardsubx_decoder.c b/src/lib_ccx/hardsubx_decoder.c index 9a40995a5..ddfa69914 100644 --- a/src/lib_ccx/hardsubx_decoder.c +++ b/src/lib_ccx/hardsubx_decoder.c @@ -210,6 +210,7 @@ void hardsubx_process_frames_linear(struct lib_hardsubx_ctx *ctx, struct encoder if (dist < (0.2 * MIN(strlen(subtitle_text), strlen(prev_subtitle_text)))) { dist = -1; + free_rust_c_string(subtitle_text); subtitle_text = NULL; prev_end_time = convert_pts_to_ms(ctx->packet.pts, ctx->format_ctx->streams[ctx->video_stream_id]->time_base); } @@ -219,6 +220,7 @@ void hardsubx_process_frames_linear(struct lib_hardsubx_ctx *ctx, struct encoder add_cc_sub_text(ctx->dec_sub, prev_subtitle_text, prev_begin_time, prev_end_time, "", "BURN", CCX_ENC_UTF_8); encode_sub(enc_ctx, ctx->dec_sub); prev_begin_time = prev_end_time + 1; + free(prev_subtitle_text); prev_subtitle_text = NULL; prev_sub_encoded = 1; prev_end_time = convert_pts_to_ms(ctx->packet.pts, ctx->format_ctx->streams[ctx->video_stream_id]->time_base); @@ -252,6 +254,10 @@ void hardsubx_process_frames_linear(struct lib_hardsubx_ctx *ctx, struct encoder prev_sub_encoded = 0; } prev_packet_pts = ctx->packet.pts; + + // Free subtitle_text from this iteration (was allocated by Rust in _process_frame_*_basic) + free_rust_c_string(subtitle_text); + subtitle_text = NULL; } } av_packet_unref(&ctx->packet); @@ -263,6 +269,9 @@ void hardsubx_process_frames_linear(struct lib_hardsubx_ctx *ctx, struct encoder encode_sub(enc_ctx, ctx->dec_sub); prev_sub_encoded = 1; } + + // Cleanup + free(prev_subtitle_text); activity_progress(100, cur_sec / 60, cur_sec % 60); } @@ -488,6 +497,7 @@ void process_hardsubx_linear_frames_and_normal_subs(struct lib_hardsubx_ctx *har if (dist < (0.2 * MIN(strlen(subtitle_text_hard), strlen(prev_subtitle_text_hard)))) { dist = -1; + free_rust_c_string(subtitle_text_hard); subtitle_text_hard = NULL; prev_end_time_hard = convert_pts_to_ms(hard_ctx->packet.pts, hard_ctx->format_ctx->streams[hard_ctx->video_stream_id]->time_base); } @@ -497,6 +507,7 @@ void process_hardsubx_linear_frames_and_normal_subs(struct lib_hardsubx_ctx *har add_cc_sub_text(hard_ctx->dec_sub, prev_subtitle_text_hard, prev_begin_time_hard, prev_end_time_hard, "", "BURN", CCX_ENC_UTF_8); encode_sub(enc_ctx, hard_ctx->dec_sub); prev_begin_time_hard = prev_end_time_hard + 1; + free(prev_subtitle_text_hard); prev_subtitle_text_hard = NULL; prev_sub_encoded_hard = 1; prev_end_time_hard = convert_pts_to_ms(hard_ctx->packet.pts, hard_ctx->format_ctx->streams[hard_ctx->video_stream_id]->time_base); @@ -517,6 +528,10 @@ void process_hardsubx_linear_frames_and_normal_subs(struct lib_hardsubx_ctx *har prev_sub_encoded_hard = 0; } prev_packet_pts_hard = hard_ctx->packet.pts; + + // Free subtitle_text_hard from this iteration (allocated by Rust) + free_rust_c_string(subtitle_text_hard); + subtitle_text_hard = NULL; } } } @@ -529,6 +544,9 @@ void process_hardsubx_linear_frames_and_normal_subs(struct lib_hardsubx_ctx *har encode_sub(enc_ctx, hard_ctx->dec_sub); prev_sub_encoded_hard = 1; } + + // Cleanup + free(prev_subtitle_text_hard); activity_progress(100, cur_sec / 60, cur_sec % 60); } diff --git a/src/rust/src/hardsubx/classifier.rs b/src/rust/src/hardsubx/classifier.rs index d8572832a..253095980 100644 --- a/src/rust/src/hardsubx/classifier.rs +++ b/src/rust/src/hardsubx/classifier.rs @@ -22,7 +22,8 @@ use log::warn; /// # Safety /// The function accepts and dereferences a raw pointer /// The function also makes calls to functions whose safety is not guaranteed -/// The function returns a raw pointer which is a string made in C +/// The function returns a raw pointer which is a string allocated by Rust +/// The caller must free this with the appropriate method (or let Rust handle it) /// ctx should be not null #[no_mangle] pub unsafe extern "C" fn get_ocr_text_simple_threshold( @@ -30,32 +31,40 @@ pub unsafe extern "C" fn get_ocr_text_simple_threshold( image: *mut Pix, threshold: std::os::raw::c_float, ) -> *mut ::std::os::raw::c_char { - let mut text_out: *mut ::std::os::raw::c_char; - TessBaseAPISetImage2((*ctx).tess_handle, image); if TessBaseAPIRecognize((*ctx).tess_handle, null::() as *mut ETEXT_DESC) != 0 { warn!("Error in Tesseract recognition, skipping frame\n"); - null::() as *mut c_char - } else { - text_out = TessBaseAPIGetUTF8Text((*ctx).tess_handle); + return null::() as *mut c_char; + } - if text_out == null::() as *mut c_char { - warn!("Error getting text, skipping frame\n"); - } + let tess_text = TessBaseAPIGetUTF8Text((*ctx).tess_handle); - if threshold > 0.0 { - // non-zero conf, only then we'll make the call to check for confidence - let conf = TessBaseAPIMeanTextConf((*ctx).tess_handle); + if tess_text == null::() as *mut c_char { + warn!("Error getting text, skipping frame\n"); + return null::() as *mut c_char; + } - if (conf as std::os::raw::c_float) < threshold { - text_out = null::() as *mut c_char; - } else { - (*ctx).cur_conf = conf as std::os::raw::c_float; - } + if threshold > 0.0 { + // non-zero conf, only then we'll make the call to check for confidence + let conf = TessBaseAPIMeanTextConf((*ctx).tess_handle); + + if (conf as std::os::raw::c_float) < threshold { + TessDeleteText(tess_text); + return null::() as *mut c_char; + } else { + (*ctx).cur_conf = conf as std::os::raw::c_float; } - text_out } + + // Convert Tesseract string to Rust-owned string, then free Tesseract's allocation + let rust_string = ffi::CStr::from_ptr(tess_text) + .to_string_lossy() + .into_owned(); + TessDeleteText(tess_text); + + // Return a Rust-allocated C string that can be safely freed with standard free() + string_to_c_char(&rust_string) } /// basically the get_oct_text_simple function without threshold diff --git a/src/rust/src/hardsubx/decoder.rs b/src/rust/src/hardsubx/decoder.rs index cb8fabe6c..765d264b2 100644 --- a/src/rust/src/hardsubx/decoder.rs +++ b/src/rust/src/hardsubx/decoder.rs @@ -32,6 +32,18 @@ static HARDSUBX_OCRMODE_WORD: i32 = 1; // HARDSUBX_OCRMODE_LETTER // }; +/// Helper function to convert a Rust-allocated C string to an owned String +/// Takes ownership of the memory and frees it properly +unsafe fn cstring_to_owned(ptr: *mut c_char) -> String { + if ptr.is_null() { + return String::new(); + } + match ffi::CString::from_raw(ptr).into_string() { + Ok(s) => s, + Err(_) => String::new(), + } +} + /// # Safety /// dereferences a raw pointer /// calls functions that are not necessarily safe @@ -40,29 +52,15 @@ pub unsafe fn dispatch_classifier_functions(ctx: *mut lib_hardsubx_ctx, im: *mut match (*ctx).ocr_mode { 0 => { let ret_char_arr = get_ocr_text_simple_threshold(ctx, im, (*ctx).conf_thresh); - let text_out_result = ffi::CString::from_raw(ret_char_arr).into_string(); - match text_out_result { - Ok(T) => T, - Err(_E) => "".to_string(), - } + cstring_to_owned(ret_char_arr) } 1 => { let ret_char_arr = get_ocr_text_wordwise_threshold(ctx, im, (*ctx).conf_thresh); - if ret_char_arr.is_null() { - "".to_string() - } else { - ffi::CStr::from_ptr(ret_char_arr) - .to_string_lossy() - .into_owned() - } + cstring_to_owned(ret_char_arr) } 2 => { let ret_char_arr = get_ocr_text_letterwise_threshold(ctx, im, (*ctx).conf_thresh); - let text_out_result = ffi::CString::from_raw(ret_char_arr).into_string(); - match text_out_result { - Ok(T) => T, - Err(_E) => "".to_string(), - } + cstring_to_owned(ret_char_arr) } _ => { diff --git a/src/rust/src/utils.rs b/src/rust/src/utils.rs index 4b531e74b..2947612f5 100644 --- a/src/rust/src/utils.rs +++ b/src/rust/src/utils.rs @@ -38,7 +38,8 @@ pub fn string_to_c_char(a: &str) -> *mut ::std::os::raw::c_char { /// # Safety /// The pointer must have been allocated by `string_to_c_char` (i.e., `CString::into_raw`) /// or be null. Passing a pointer allocated by C's malloc will cause undefined behavior. -pub unsafe fn free_rust_c_string(ptr: *mut ::std::os::raw::c_char) { +#[no_mangle] +pub unsafe extern "C" fn free_rust_c_string(ptr: *mut ::std::os::raw::c_char) { if !ptr.is_null() { // Reclaim ownership and drop the CString, which frees the memory let _ = ffi::CString::from_raw(ptr);