Skip to content

Commit 9e816a1

Browse files
authored
Merge branch 'CCExtractor:master' into test
2 parents b2de438 + c3f637a commit 9e816a1

File tree

13 files changed

+145
-50
lines changed

13 files changed

+145
-50
lines changed

CI_TRIAGE_DEC_2025.md

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# CI Test Triage - December 2025
2+
3+
This PR is used to trigger CI runs and track the triage of failing regression tests.
4+
5+
## Purpose
6+
7+
Several PRs have been merged recently that improved CCExtractor behavior, but the Sample Platform
8+
considers them regressions because the "ground truth" baseline is outdated. This PR helps:
9+
10+
1. Get a fresh CI run against current master
11+
2. Systematically analyze each failing test
12+
3. Determine whether to update ground truth or fix code
13+
14+
## Merged Fixes
15+
16+
The following PRs have been merged and this run verifies their combined effect:
17+
18+
- **PR #1847**: Hardsubx crash fix, memory leak fixes, rcwt exit code fix
19+
- **PR #1848**: XDS empty content entries fix
20+
21+
## Status
22+
23+
- [x] PR #1847 merged
24+
- [x] PR #1848 merged
25+
- [ ] Verification CI run triggered
26+
- [ ] Results analyzed

src/ccextractor.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
/* CCExtractor, originally by carlos at ccextractor.org, now a lot of people.
33
Credits: See AUTHORS.TXT
44
License: GPL 2.0
5+
6+
CI verification run: 2025-12-19T08:30 - Testing merged fixes from PRs #1847 and #1848
57
*/
68
#include "ccextractor.h"
79
#include <stdio.h>

src/lib_ccx/ccx_decoders_xds.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -472,14 +472,17 @@ void xds_do_content_advisory(struct cc_subtitle *sub, struct ccx_decoders_xds_co
472472
if (!a1 && a0) // US TV parental guidelines
473473
{
474474
xdsprint(sub, ctx, age);
475-
xdsprint(sub, ctx, content);
475+
if (content[0]) // Only output content if not empty
476+
xdsprint(sub, ctx, content);
476477
if (changed)
477478
{
478479
ccx_common_logging.log_ftn("\rXDS: %s\n ", age);
479-
ccx_common_logging.log_ftn("\rXDS: %s\n ", content);
480+
if (content[0])
481+
ccx_common_logging.log_ftn("\rXDS: %s\n ", content);
480482
}
481483
ccx_common_logging.debug_ftn(CCX_DMT_DECODER_XDS, "\rXDS: %s\n", age);
482-
ccx_common_logging.debug_ftn(CCX_DMT_DECODER_XDS, "\rXDS: %s\n", content);
484+
if (content[0])
485+
ccx_common_logging.debug_ftn(CCX_DMT_DECODER_XDS, "\rXDS: %s\n", content);
483486
}
484487
if (!a0 || // MPA
485488
(a0 && a1 && !Da2 && !La3) || // Canadian English Language Rating

src/lib_ccx/general_loop.c

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1402,9 +1402,12 @@ int rcwt_loop(struct lib_ccx_ctx *ctx)
14021402
dec_sub = &dec_ctx->dec_sub;
14031403
telctx = dec_ctx->private_data;
14041404

1405-
/* Set minimum and current pts since rcwt has correct time */
1405+
/* Set minimum and current pts since rcwt has correct time.
1406+
* Also set pts_set = 2 (MinPtsSet) so the Rust timing code knows
1407+
* that min_pts is valid and can calculate fts_now properly. */
14061408
dec_ctx->timing->min_pts = 0;
14071409
dec_ctx->timing->current_pts = 0;
1410+
dec_ctx->timing->pts_set = 2; // 2 = min_pts set
14081411

14091412
// Loop until no more data is found
14101413
while (1)
@@ -1489,6 +1492,17 @@ int rcwt_loop(struct lib_ccx_ctx *ctx)
14891492
} // end while(1)
14901493

14911494
dbg_print(CCX_DMT_PARSE, "Processed %d bytes\n", bread);
1495+
1496+
/* Check if captions were found via other paths (CEA-608 writes directly
1497+
to encoder without setting got_output). Similar to general_loop logic. */
1498+
if (!caps && enc_ctx != NULL)
1499+
{
1500+
if (enc_ctx->srt_counter || enc_ctx->cea_708_counter || dec_ctx->saw_caption_block)
1501+
{
1502+
caps = 1;
1503+
}
1504+
}
1505+
14921506
/* Free XDS context - similar to cleanup in general_loop */
14931507
free(dec_ctx->xds_ctx);
14941508
free(parsebuf);

src/lib_ccx/hardsubx.c

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,8 @@ int hardsubx_process_data(struct lib_hardsubx_ctx *ctx, struct lib_ccx_ctx *ctx_
121121

122122
// Free the allocated memory for frame processing
123123
av_free(ctx->rgb_buffer);
124+
if (ctx->sws_ctx)
125+
sws_freeContext(ctx->sws_ctx);
124126
if (ctx->frame)
125127
av_frame_free(&ctx->frame);
126128
if (ctx->rgb_frame)
@@ -283,7 +285,7 @@ struct lib_hardsubx_ctx *_init_hardsubx(struct ccx_s_options *options)
283285

284286
free(pars_vec);
285287
free(pars_values);
286-
free(tessdata_path);
288+
// Note: tessdata_path points to static string or getenv() result, do NOT free
287289
if (ret != 0)
288290
{
289291
free(ctx);
@@ -336,8 +338,11 @@ void _dinit_hardsubx(struct lib_hardsubx_ctx **ctx)
336338
TessBaseAPIEnd(lctx->tess_handle);
337339
TessBaseAPIDelete(lctx->tess_handle);
338340

341+
// Free basefilename (allocated by get_basename in _init_hardsubx)
342+
freep(&lctx->basefilename);
343+
339344
// Free subtitle
340-
freep(lctx->dec_sub);
345+
freep(&lctx->dec_sub);
341346
freep(ctx);
342347
}
343348

src/lib_ccx/hardsubx.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,10 @@ int64_t convert_pts_to_s(int64_t pts, AVRational time_base);
116116
int is_valid_trailing_char(char c);
117117
char *prune_string(char *s);
118118

119+
// Rust memory management - strings returned from Rust must be freed with this function
120+
// DO NOT use free() on strings returned from _process_frame_white_basic, _process_frame_color_basic, etc.
121+
void free_rust_c_string(char *ptr);
122+
119123
#endif
120124

121125
#endif

src/lib_ccx/hardsubx_decoder.c

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,7 @@ void hardsubx_process_frames_linear(struct lib_hardsubx_ctx *ctx, struct encoder
210210
if (dist < (0.2 * MIN(strlen(subtitle_text), strlen(prev_subtitle_text))))
211211
{
212212
dist = -1;
213+
free_rust_c_string(subtitle_text);
213214
subtitle_text = NULL;
214215
prev_end_time = convert_pts_to_ms(ctx->packet.pts, ctx->format_ctx->streams[ctx->video_stream_id]->time_base);
215216
}
@@ -219,6 +220,7 @@ void hardsubx_process_frames_linear(struct lib_hardsubx_ctx *ctx, struct encoder
219220
add_cc_sub_text(ctx->dec_sub, prev_subtitle_text, prev_begin_time, prev_end_time, "", "BURN", CCX_ENC_UTF_8);
220221
encode_sub(enc_ctx, ctx->dec_sub);
221222
prev_begin_time = prev_end_time + 1;
223+
free(prev_subtitle_text);
222224
prev_subtitle_text = NULL;
223225
prev_sub_encoded = 1;
224226
prev_end_time = convert_pts_to_ms(ctx->packet.pts, ctx->format_ctx->streams[ctx->video_stream_id]->time_base);
@@ -252,6 +254,10 @@ void hardsubx_process_frames_linear(struct lib_hardsubx_ctx *ctx, struct encoder
252254
prev_sub_encoded = 0;
253255
}
254256
prev_packet_pts = ctx->packet.pts;
257+
258+
// Free subtitle_text from this iteration (was allocated by Rust in _process_frame_*_basic)
259+
free_rust_c_string(subtitle_text);
260+
subtitle_text = NULL;
255261
}
256262
}
257263
av_packet_unref(&ctx->packet);
@@ -263,6 +269,9 @@ void hardsubx_process_frames_linear(struct lib_hardsubx_ctx *ctx, struct encoder
263269
encode_sub(enc_ctx, ctx->dec_sub);
264270
prev_sub_encoded = 1;
265271
}
272+
273+
// Cleanup
274+
free(prev_subtitle_text);
266275
activity_progress(100, cur_sec / 60, cur_sec % 60);
267276
}
268277

@@ -488,6 +497,7 @@ void process_hardsubx_linear_frames_and_normal_subs(struct lib_hardsubx_ctx *har
488497
if (dist < (0.2 * MIN(strlen(subtitle_text_hard), strlen(prev_subtitle_text_hard))))
489498
{
490499
dist = -1;
500+
free_rust_c_string(subtitle_text_hard);
491501
subtitle_text_hard = NULL;
492502
prev_end_time_hard = convert_pts_to_ms(hard_ctx->packet.pts, hard_ctx->format_ctx->streams[hard_ctx->video_stream_id]->time_base);
493503
}
@@ -497,6 +507,7 @@ void process_hardsubx_linear_frames_and_normal_subs(struct lib_hardsubx_ctx *har
497507
add_cc_sub_text(hard_ctx->dec_sub, prev_subtitle_text_hard, prev_begin_time_hard, prev_end_time_hard, "", "BURN", CCX_ENC_UTF_8);
498508
encode_sub(enc_ctx, hard_ctx->dec_sub);
499509
prev_begin_time_hard = prev_end_time_hard + 1;
510+
free(prev_subtitle_text_hard);
500511
prev_subtitle_text_hard = NULL;
501512
prev_sub_encoded_hard = 1;
502513
prev_end_time_hard = convert_pts_to_ms(hard_ctx->packet.pts, hard_ctx->format_ctx->streams[hard_ctx->video_stream_id]->time_base);
@@ -517,6 +528,10 @@ void process_hardsubx_linear_frames_and_normal_subs(struct lib_hardsubx_ctx *har
517528
prev_sub_encoded_hard = 0;
518529
}
519530
prev_packet_pts_hard = hard_ctx->packet.pts;
531+
532+
// Free subtitle_text_hard from this iteration (allocated by Rust)
533+
free_rust_c_string(subtitle_text_hard);
534+
subtitle_text_hard = NULL;
520535
}
521536
}
522537
}
@@ -529,6 +544,9 @@ void process_hardsubx_linear_frames_and_normal_subs(struct lib_hardsubx_ctx *har
529544
encode_sub(enc_ctx, hard_ctx->dec_sub);
530545
prev_sub_encoded_hard = 1;
531546
}
547+
548+
// Cleanup
549+
free(prev_subtitle_text_hard);
532550
activity_progress(100, cur_sec / 60, cur_sec % 60);
533551
}
534552

src/lib_ccx/wtv_functions.c

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -436,8 +436,18 @@ LLONG get_data(struct lib_ccx_ctx *ctx, struct wtv_chunked_buffer *cb, struct de
436436
dbg_print(CCX_DMT_PARSE, "TIME: %ld\n", time);
437437
if (time != -1 && time != WTV_CC_TIMESTAMP_MAGIC)
438438
{ // Ignore -1 timestamps
439-
set_current_pts(dec_ctx->timing, time_to_pes_time(time));
440-
dec_ctx->timing->pts_set = 1;
439+
LLONG pes_time = time_to_pes_time(time);
440+
set_current_pts(dec_ctx->timing, pes_time);
441+
// Set min_pts and sync_pts on first valid timestamp to enable fts_now calculation
442+
if (dec_ctx->timing->min_pts == 0x01FFFFFFFF || pes_time < dec_ctx->timing->min_pts)
443+
{
444+
dec_ctx->timing->min_pts = pes_time;
445+
// Also set sync_pts to prevent PTS jump detection from triggering
446+
// when pts_set becomes MinPtsSet (sync_pts - current_pts would be huge otherwise)
447+
dec_ctx->timing->sync_pts = pes_time;
448+
}
449+
// pts_set = 2 (MinPtsSet) is required for proper fts_now calculation
450+
dec_ctx->timing->pts_set = 2;
441451
frames_since_ref_time = 0;
442452
set_fts(dec_ctx->timing);
443453
}

src/rust/lib_ccxr/src/util/encoding.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -584,7 +584,7 @@ fn latin1_to_line21(c: Latin1Char) -> Line21Char {
584584
0xbf => 0x83, // Inverted (open) question mark
585585
0xa2 => 0x85, // Cents symbol
586586
0xa3 => 0x86, // Pounds sterling
587-
0xb6 => 0x87, // Music note (pilcrow in Latin-1)
587+
b'#' => 0x87, // Music note (# in Latin-1)
588588
0xe0 => 0x88, // lowercase a, grave accent
589589
0x20 => 0x89, // transparent space
590590
0xe8 => 0x8a, // lowercase e, grave accent
@@ -682,7 +682,7 @@ pub fn line21_to_latin1(c: Line21Char) -> Latin1Char {
682682
0x84 => UNAVAILABLE_CHAR, // Trademark symbol (TM) - Does not exist in Latin 1
683683
0x85 => 0xa2, // Cents symbol
684684
0x86 => 0xa3, // Pounds sterling
685-
0x87 => 0xb6, // Music note - Not in latin 1, so we use 'pilcrow'
685+
0x87 => b'#', // Music note - Not in latin 1, so we use '#'
686686
0x88 => 0xe0, // lowercase a, grave accent
687687
0x89 => 0x20, // transparent space, we make it regular
688688
0x8a => 0xe8, // lowercase e, grave accent

src/rust/src/hardsubx/classifier.rs

Lines changed: 27 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -22,40 +22,49 @@ use log::warn;
2222
/// # Safety
2323
/// The function accepts and dereferences a raw pointer
2424
/// The function also makes calls to functions whose safety is not guaranteed
25-
/// The function returns a raw pointer which is a string made in C
25+
/// The function returns a raw pointer which is a string allocated by Rust
26+
/// The caller must free this with the appropriate method (or let Rust handle it)
2627
/// ctx should be not null
2728
#[no_mangle]
2829
pub unsafe extern "C" fn get_ocr_text_simple_threshold(
2930
ctx: *mut lib_hardsubx_ctx,
3031
image: *mut Pix,
3132
threshold: std::os::raw::c_float,
3233
) -> *mut ::std::os::raw::c_char {
33-
let mut text_out: *mut ::std::os::raw::c_char;
34-
3534
TessBaseAPISetImage2((*ctx).tess_handle, image);
3635

3736
if TessBaseAPIRecognize((*ctx).tess_handle, null::<ETEXT_DESC>() as *mut ETEXT_DESC) != 0 {
3837
warn!("Error in Tesseract recognition, skipping frame\n");
39-
null::<c_char>() as *mut c_char
40-
} else {
41-
text_out = TessBaseAPIGetUTF8Text((*ctx).tess_handle);
38+
return null::<c_char>() as *mut c_char;
39+
}
4240

43-
if text_out == null::<c_char>() as *mut c_char {
44-
warn!("Error getting text, skipping frame\n");
45-
}
41+
let tess_text = TessBaseAPIGetUTF8Text((*ctx).tess_handle);
4642

47-
if threshold > 0.0 {
48-
// non-zero conf, only then we'll make the call to check for confidence
49-
let conf = TessBaseAPIMeanTextConf((*ctx).tess_handle);
43+
if tess_text == null::<c_char>() as *mut c_char {
44+
warn!("Error getting text, skipping frame\n");
45+
return null::<c_char>() as *mut c_char;
46+
}
5047

51-
if (conf as std::os::raw::c_float) < threshold {
52-
text_out = null::<c_char>() as *mut c_char;
53-
} else {
54-
(*ctx).cur_conf = conf as std::os::raw::c_float;
55-
}
48+
if threshold > 0.0 {
49+
// non-zero conf, only then we'll make the call to check for confidence
50+
let conf = TessBaseAPIMeanTextConf((*ctx).tess_handle);
51+
52+
if (conf as std::os::raw::c_float) < threshold {
53+
TessDeleteText(tess_text);
54+
return null::<c_char>() as *mut c_char;
55+
} else {
56+
(*ctx).cur_conf = conf as std::os::raw::c_float;
5657
}
57-
text_out
5858
}
59+
60+
// Convert Tesseract string to Rust-owned string, then free Tesseract's allocation
61+
let rust_string = ffi::CStr::from_ptr(tess_text)
62+
.to_string_lossy()
63+
.into_owned();
64+
TessDeleteText(tess_text);
65+
66+
// Return a Rust-allocated C string that can be safely freed with standard free()
67+
string_to_c_char(&rust_string)
5968
}
6069

6170
/// basically the get_oct_text_simple function without threshold

0 commit comments

Comments
 (0)