Skip to content

Commit 2060db9

Browse files
authored
fix(hardsubx): Fix heap corruption from Rust/C allocator mismatch
2 parents a299d06 + 0b74c92 commit 2060db9

File tree

8 files changed

+106
-38
lines changed

8 files changed

+106
-38
lines changed

CI_TRIAGE_DEC_2025.md

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# CI Test Triage - December 2025
2+
3+
This PR is used to trigger CI runs and track the triage of failing regression tests.
4+
5+
## Purpose
6+
7+
Several PRs have been merged recently that improved CCExtractor behavior, but the Sample Platform
8+
considers them regressions because the "ground truth" baseline is outdated. This PR helps:
9+
10+
1. Get a fresh CI run against current master
11+
2. Systematically analyze each failing test
12+
3. Determine whether to update ground truth or fix code
13+
14+
## Failing Tests to Triage
15+
16+
Will be populated after CI run completes.
17+
18+
## Status
19+
20+
- [ ] CI run triggered
21+
- [ ] Results analyzed
22+
- [ ] Triage decisions made

src/lib_ccx/general_loop.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1479,6 +1479,17 @@ int rcwt_loop(struct lib_ccx_ctx *ctx)
14791479
} // end while(1)
14801480

14811481
dbg_print(CCX_DMT_PARSE, "Processed %d bytes\n", bread);
1482+
1483+
/* Check if captions were found via other paths (CEA-608 writes directly
1484+
to encoder without setting got_output). Similar to general_loop logic. */
1485+
if (!caps && enc_ctx != NULL)
1486+
{
1487+
if (enc_ctx->srt_counter || enc_ctx->cea_708_counter || dec_ctx->saw_caption_block)
1488+
{
1489+
caps = 1;
1490+
}
1491+
}
1492+
14821493
/* Free XDS context - similar to cleanup in general_loop */
14831494
free(dec_ctx->xds_ctx);
14841495
free(parsebuf);

src/lib_ccx/hardsubx.c

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,8 @@ int hardsubx_process_data(struct lib_hardsubx_ctx *ctx, struct lib_ccx_ctx *ctx_
121121

122122
// Free the allocated memory for frame processing
123123
av_free(ctx->rgb_buffer);
124+
if (ctx->sws_ctx)
125+
sws_freeContext(ctx->sws_ctx);
124126
if (ctx->frame)
125127
av_frame_free(&ctx->frame);
126128
if (ctx->rgb_frame)
@@ -283,7 +285,7 @@ struct lib_hardsubx_ctx *_init_hardsubx(struct ccx_s_options *options)
283285

284286
free(pars_vec);
285287
free(pars_values);
286-
free(tessdata_path);
288+
// Note: tessdata_path points to static string or getenv() result, do NOT free
287289
if (ret != 0)
288290
{
289291
free(ctx);
@@ -336,8 +338,11 @@ void _dinit_hardsubx(struct lib_hardsubx_ctx **ctx)
336338
TessBaseAPIEnd(lctx->tess_handle);
337339
TessBaseAPIDelete(lctx->tess_handle);
338340

341+
// Free basefilename (allocated by get_basename in _init_hardsubx)
342+
freep(&lctx->basefilename);
343+
339344
// Free subtitle
340-
freep(lctx->dec_sub);
345+
freep(&lctx->dec_sub);
341346
freep(ctx);
342347
}
343348

src/lib_ccx/hardsubx.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,10 @@ int64_t convert_pts_to_s(int64_t pts, AVRational time_base);
116116
int is_valid_trailing_char(char c);
117117
char *prune_string(char *s);
118118

119+
// Rust memory management - strings returned from Rust must be freed with this function
120+
// DO NOT use free() on strings returned from _process_frame_white_basic, _process_frame_color_basic, etc.
121+
void free_rust_c_string(char *ptr);
122+
119123
#endif
120124

121125
#endif

src/lib_ccx/hardsubx_decoder.c

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,7 @@ void hardsubx_process_frames_linear(struct lib_hardsubx_ctx *ctx, struct encoder
210210
if (dist < (0.2 * MIN(strlen(subtitle_text), strlen(prev_subtitle_text))))
211211
{
212212
dist = -1;
213+
free_rust_c_string(subtitle_text);
213214
subtitle_text = NULL;
214215
prev_end_time = convert_pts_to_ms(ctx->packet.pts, ctx->format_ctx->streams[ctx->video_stream_id]->time_base);
215216
}
@@ -219,6 +220,7 @@ void hardsubx_process_frames_linear(struct lib_hardsubx_ctx *ctx, struct encoder
219220
add_cc_sub_text(ctx->dec_sub, prev_subtitle_text, prev_begin_time, prev_end_time, "", "BURN", CCX_ENC_UTF_8);
220221
encode_sub(enc_ctx, ctx->dec_sub);
221222
prev_begin_time = prev_end_time + 1;
223+
free(prev_subtitle_text);
222224
prev_subtitle_text = NULL;
223225
prev_sub_encoded = 1;
224226
prev_end_time = convert_pts_to_ms(ctx->packet.pts, ctx->format_ctx->streams[ctx->video_stream_id]->time_base);
@@ -252,6 +254,10 @@ void hardsubx_process_frames_linear(struct lib_hardsubx_ctx *ctx, struct encoder
252254
prev_sub_encoded = 0;
253255
}
254256
prev_packet_pts = ctx->packet.pts;
257+
258+
// Free subtitle_text from this iteration (was allocated by Rust in _process_frame_*_basic)
259+
free_rust_c_string(subtitle_text);
260+
subtitle_text = NULL;
255261
}
256262
}
257263
av_packet_unref(&ctx->packet);
@@ -263,6 +269,9 @@ void hardsubx_process_frames_linear(struct lib_hardsubx_ctx *ctx, struct encoder
263269
encode_sub(enc_ctx, ctx->dec_sub);
264270
prev_sub_encoded = 1;
265271
}
272+
273+
// Cleanup
274+
free(prev_subtitle_text);
266275
activity_progress(100, cur_sec / 60, cur_sec % 60);
267276
}
268277

@@ -488,6 +497,7 @@ void process_hardsubx_linear_frames_and_normal_subs(struct lib_hardsubx_ctx *har
488497
if (dist < (0.2 * MIN(strlen(subtitle_text_hard), strlen(prev_subtitle_text_hard))))
489498
{
490499
dist = -1;
500+
free_rust_c_string(subtitle_text_hard);
491501
subtitle_text_hard = NULL;
492502
prev_end_time_hard = convert_pts_to_ms(hard_ctx->packet.pts, hard_ctx->format_ctx->streams[hard_ctx->video_stream_id]->time_base);
493503
}
@@ -497,6 +507,7 @@ void process_hardsubx_linear_frames_and_normal_subs(struct lib_hardsubx_ctx *har
497507
add_cc_sub_text(hard_ctx->dec_sub, prev_subtitle_text_hard, prev_begin_time_hard, prev_end_time_hard, "", "BURN", CCX_ENC_UTF_8);
498508
encode_sub(enc_ctx, hard_ctx->dec_sub);
499509
prev_begin_time_hard = prev_end_time_hard + 1;
510+
free(prev_subtitle_text_hard);
500511
prev_subtitle_text_hard = NULL;
501512
prev_sub_encoded_hard = 1;
502513
prev_end_time_hard = convert_pts_to_ms(hard_ctx->packet.pts, hard_ctx->format_ctx->streams[hard_ctx->video_stream_id]->time_base);
@@ -517,6 +528,10 @@ void process_hardsubx_linear_frames_and_normal_subs(struct lib_hardsubx_ctx *har
517528
prev_sub_encoded_hard = 0;
518529
}
519530
prev_packet_pts_hard = hard_ctx->packet.pts;
531+
532+
// Free subtitle_text_hard from this iteration (allocated by Rust)
533+
free_rust_c_string(subtitle_text_hard);
534+
subtitle_text_hard = NULL;
520535
}
521536
}
522537
}
@@ -529,6 +544,9 @@ void process_hardsubx_linear_frames_and_normal_subs(struct lib_hardsubx_ctx *har
529544
encode_sub(enc_ctx, hard_ctx->dec_sub);
530545
prev_sub_encoded_hard = 1;
531546
}
547+
548+
// Cleanup
549+
free(prev_subtitle_text_hard);
532550
activity_progress(100, cur_sec / 60, cur_sec % 60);
533551
}
534552

src/rust/src/hardsubx/classifier.rs

Lines changed: 27 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -22,40 +22,49 @@ use log::warn;
2222
/// # Safety
2323
/// The function accepts and dereferences a raw pointer
2424
/// The function also makes calls to functions whose safety is not guaranteed
25-
/// The function returns a raw pointer which is a string made in C
25+
/// The function returns a raw pointer which is a string allocated by Rust
26+
/// The caller must free this with the appropriate method (or let Rust handle it)
2627
/// ctx should be not null
2728
#[no_mangle]
2829
pub unsafe extern "C" fn get_ocr_text_simple_threshold(
2930
ctx: *mut lib_hardsubx_ctx,
3031
image: *mut Pix,
3132
threshold: std::os::raw::c_float,
3233
) -> *mut ::std::os::raw::c_char {
33-
let mut text_out: *mut ::std::os::raw::c_char;
34-
3534
TessBaseAPISetImage2((*ctx).tess_handle, image);
3635

3736
if TessBaseAPIRecognize((*ctx).tess_handle, null::<ETEXT_DESC>() as *mut ETEXT_DESC) != 0 {
3837
warn!("Error in Tesseract recognition, skipping frame\n");
39-
null::<c_char>() as *mut c_char
40-
} else {
41-
text_out = TessBaseAPIGetUTF8Text((*ctx).tess_handle);
38+
return null::<c_char>() as *mut c_char;
39+
}
4240

43-
if text_out == null::<c_char>() as *mut c_char {
44-
warn!("Error getting text, skipping frame\n");
45-
}
41+
let tess_text = TessBaseAPIGetUTF8Text((*ctx).tess_handle);
4642

47-
if threshold > 0.0 {
48-
// non-zero conf, only then we'll make the call to check for confidence
49-
let conf = TessBaseAPIMeanTextConf((*ctx).tess_handle);
43+
if tess_text == null::<c_char>() as *mut c_char {
44+
warn!("Error getting text, skipping frame\n");
45+
return null::<c_char>() as *mut c_char;
46+
}
5047

51-
if (conf as std::os::raw::c_float) < threshold {
52-
text_out = null::<c_char>() as *mut c_char;
53-
} else {
54-
(*ctx).cur_conf = conf as std::os::raw::c_float;
55-
}
48+
if threshold > 0.0 {
49+
// non-zero conf, only then we'll make the call to check for confidence
50+
let conf = TessBaseAPIMeanTextConf((*ctx).tess_handle);
51+
52+
if (conf as std::os::raw::c_float) < threshold {
53+
TessDeleteText(tess_text);
54+
return null::<c_char>() as *mut c_char;
55+
} else {
56+
(*ctx).cur_conf = conf as std::os::raw::c_float;
5657
}
57-
text_out
5858
}
59+
60+
// Convert Tesseract string to Rust-owned string, then free Tesseract's allocation
61+
let rust_string = ffi::CStr::from_ptr(tess_text)
62+
.to_string_lossy()
63+
.into_owned();
64+
TessDeleteText(tess_text);
65+
66+
// Return a Rust-allocated C string that can be safely freed with standard free()
67+
string_to_c_char(&rust_string)
5968
}
6069

6170
/// basically the get_oct_text_simple function without threshold

src/rust/src/hardsubx/decoder.rs

Lines changed: 15 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,18 @@ static HARDSUBX_OCRMODE_WORD: i32 = 1;
3232
// HARDSUBX_OCRMODE_LETTER
3333
// };
3434

35+
/// Helper function to convert a Rust-allocated C string to an owned String
36+
/// Takes ownership of the memory and frees it properly
37+
unsafe fn cstring_to_owned(ptr: *mut c_char) -> String {
38+
if ptr.is_null() {
39+
return String::new();
40+
}
41+
match ffi::CString::from_raw(ptr).into_string() {
42+
Ok(s) => s,
43+
Err(_) => String::new(),
44+
}
45+
}
46+
3547
/// # Safety
3648
/// dereferences a raw pointer
3749
/// calls functions that are not necessarily safe
@@ -40,29 +52,15 @@ pub unsafe fn dispatch_classifier_functions(ctx: *mut lib_hardsubx_ctx, im: *mut
4052
match (*ctx).ocr_mode {
4153
0 => {
4254
let ret_char_arr = get_ocr_text_simple_threshold(ctx, im, (*ctx).conf_thresh);
43-
let text_out_result = ffi::CString::from_raw(ret_char_arr).into_string();
44-
match text_out_result {
45-
Ok(T) => T,
46-
Err(_E) => "".to_string(),
47-
}
55+
cstring_to_owned(ret_char_arr)
4856
}
4957
1 => {
5058
let ret_char_arr = get_ocr_text_wordwise_threshold(ctx, im, (*ctx).conf_thresh);
51-
if ret_char_arr.is_null() {
52-
"".to_string()
53-
} else {
54-
ffi::CStr::from_ptr(ret_char_arr)
55-
.to_string_lossy()
56-
.into_owned()
57-
}
59+
cstring_to_owned(ret_char_arr)
5860
}
5961
2 => {
6062
let ret_char_arr = get_ocr_text_letterwise_threshold(ctx, im, (*ctx).conf_thresh);
61-
let text_out_result = ffi::CString::from_raw(ret_char_arr).into_string();
62-
match text_out_result {
63-
Ok(T) => T,
64-
Err(_E) => "".to_string(),
65-
}
63+
cstring_to_owned(ret_char_arr)
6664
}
6765

6866
_ => {

src/rust/src/utils.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,8 @@ pub fn string_to_c_char(a: &str) -> *mut ::std::os::raw::c_char {
3838
/// # Safety
3939
/// The pointer must have been allocated by `string_to_c_char` (i.e., `CString::into_raw`)
4040
/// or be null. Passing a pointer allocated by C's malloc will cause undefined behavior.
41-
pub unsafe fn free_rust_c_string(ptr: *mut ::std::os::raw::c_char) {
41+
#[no_mangle]
42+
pub unsafe extern "C" fn free_rust_c_string(ptr: *mut ::std::os::raw::c_char) {
4243
if !ptr.is_null() {
4344
// Reclaim ownership and drop the CString, which frees the memory
4445
let _ = ffi::CString::from_raw(ptr);

0 commit comments

Comments
 (0)