Skip to content

Commit fd06393

Browse files
cfsmp3claude
andcommitted
feat(teletext): Add multi-page extraction with separate output files (#665)
Implement support for extracting multiple teletext pages simultaneously, with each page output to a separate file. Changes: - Support multiple --tpage arguments (e.g., --tpage 397 --tpage 398) - Create separate output files per page with _pNNN suffix (e.g., output_p397.srt, output_p398.srt) - Maintain backward compatibility for single-page extraction (no suffix) - Add per-page SRT counters for correct subtitle numbering - Fix BCD to decimal page number conversion in telxcc.c - Add --tpages-all mode support for auto-detecting all pages Tested with 21 teletext samples from the sample platform, all passing. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <[email protected]>
1 parent 7a9acb7 commit fd06393

File tree

11 files changed

+397
-25
lines changed

11 files changed

+397
-25
lines changed

src/lib_ccx/ccx_common_structs.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,9 @@ struct cc_subtitle
8484
/** Raw PTS value when this subtitle started (for DVB timing) */
8585
LLONG start_pts;
8686

87+
/** Teletext page number (for multi-page extraction, issue #665) */
88+
uint16_t teletext_page;
89+
8790
struct cc_subtitle *next;
8891
struct cc_subtitle *prev;
8992
};

src/lib_ccx/ccx_encoders_common.c

Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -719,6 +719,9 @@ void dinit_encoder(struct encoder_ctx **arg, LLONG current_fts)
719719
write_subtitle_file_footer(ctx, ctx->out + i);
720720
}
721721

722+
// Clean up teletext multi-page output files (issue #665)
723+
dinit_teletext_outputs(ctx);
724+
722725
free_encoder_context(ctx->prev);
723726
dinit_output_ctx(ctx);
724727
freep(&ctx->subline);
@@ -838,6 +841,15 @@ struct encoder_ctx *init_encoder(struct encoder_cfg *opt)
838841
ctx->segment_last_key_frame = 0;
839842
ctx->nospupngocr = opt->nospupngocr;
840843

844+
// Initialize teletext multi-page output arrays (issue #665)
845+
ctx->tlt_out_count = 0;
846+
for (int i = 0; i < MAX_TLT_PAGES_EXTRACT; i++)
847+
{
848+
ctx->tlt_out[i] = NULL;
849+
ctx->tlt_out_pages[i] = 0;
850+
ctx->tlt_srt_counter[i] = 0;
851+
}
852+
841853
ctx->prev = NULL;
842854
return ctx;
843855
}
@@ -1298,3 +1310,168 @@ void switch_output_file(struct lib_ccx_ctx *ctx, struct encoder_ctx *enc_ctx, in
12981310
enc_ctx->cea_708_counter = 0;
12991311
enc_ctx->srt_counter = 0;
13001312
}
1313+
1314+
/**
1315+
* Get or create the output file for a specific teletext page (issue #665)
1316+
* Creates output files on-demand with suffix _pNNN (e.g., output_p891.srt)
1317+
* Returns NULL if we're in stdout mode or if too many pages are being extracted
1318+
*/
1319+
struct ccx_s_write *get_teletext_output(struct encoder_ctx *ctx, uint16_t teletext_page)
1320+
{
1321+
// If teletext_page is 0, use the default output
1322+
if (teletext_page == 0 || ctx->out == NULL)
1323+
return ctx->out;
1324+
1325+
// Check if we're sending to stdout - can't do multi-page in that case
1326+
if (ctx->out[0].fh == STDOUT_FILENO)
1327+
return ctx->out;
1328+
1329+
// Check if we already have an output file for this page
1330+
for (int i = 0; i < ctx->tlt_out_count; i++)
1331+
{
1332+
if (ctx->tlt_out_pages[i] == teletext_page)
1333+
return ctx->tlt_out[i];
1334+
}
1335+
1336+
// If we only have one teletext page requested, use the default output
1337+
// (no suffix needed for backward compatibility)
1338+
extern struct ccx_s_teletext_config tlt_config;
1339+
if (tlt_config.num_user_pages <= 1 && !tlt_config.extract_all_pages)
1340+
return ctx->out;
1341+
1342+
// Need to create a new output file for this page
1343+
if (ctx->tlt_out_count >= MAX_TLT_PAGES_EXTRACT)
1344+
{
1345+
mprint("Warning: Too many teletext pages to extract (max %d), using default output for page %03d\n",
1346+
MAX_TLT_PAGES_EXTRACT, teletext_page);
1347+
return ctx->out;
1348+
}
1349+
1350+
// Allocate the new write structure
1351+
struct ccx_s_write *new_out = (struct ccx_s_write *)malloc(sizeof(struct ccx_s_write));
1352+
if (!new_out)
1353+
{
1354+
mprint("Error: Memory allocation failed for teletext output\n");
1355+
return ctx->out;
1356+
}
1357+
memset(new_out, 0, sizeof(struct ccx_s_write));
1358+
1359+
// Create the filename with page suffix
1360+
const char *ext = get_file_extension(ctx->write_format);
1361+
char suffix[16];
1362+
snprintf(suffix, sizeof(suffix), "_p%03d", teletext_page);
1363+
1364+
char *basefilename = NULL;
1365+
if (ctx->out[0].filename != NULL)
1366+
{
1367+
basefilename = get_basename(ctx->out[0].filename);
1368+
}
1369+
else if (ctx->first_input_file != NULL)
1370+
{
1371+
basefilename = get_basename(ctx->first_input_file);
1372+
}
1373+
else
1374+
{
1375+
basefilename = strdup("untitled");
1376+
}
1377+
1378+
if (basefilename == NULL)
1379+
{
1380+
free(new_out);
1381+
return ctx->out;
1382+
}
1383+
1384+
char *filename = create_outfilename(basefilename, suffix, ext);
1385+
free(basefilename);
1386+
1387+
if (filename == NULL)
1388+
{
1389+
free(new_out);
1390+
return ctx->out;
1391+
}
1392+
1393+
// Open the file
1394+
new_out->filename = filename;
1395+
new_out->fh = open(filename, O_RDWR | O_CREAT | O_TRUNC | O_BINARY, S_IREAD | S_IWRITE);
1396+
if (new_out->fh == -1)
1397+
{
1398+
mprint("Error: Failed to open output file %s: %s\n", filename, strerror(errno));
1399+
free(filename);
1400+
free(new_out);
1401+
return ctx->out;
1402+
}
1403+
1404+
mprint("Creating teletext output file: %s\n", filename);
1405+
1406+
// Store in our array
1407+
int idx = ctx->tlt_out_count;
1408+
ctx->tlt_out[idx] = new_out;
1409+
ctx->tlt_out_pages[idx] = teletext_page;
1410+
ctx->tlt_srt_counter[idx] = 0;
1411+
ctx->tlt_out_count++;
1412+
1413+
// Write the subtitle file header
1414+
write_subtitle_file_header(ctx, new_out);
1415+
1416+
return new_out;
1417+
}
1418+
1419+
/**
1420+
* Get the SRT counter for a specific teletext page (issue #665)
1421+
* Returns pointer to the counter, or NULL if page not found
1422+
*/
1423+
unsigned int *get_teletext_srt_counter(struct encoder_ctx *ctx, uint16_t teletext_page)
1424+
{
1425+
// If teletext_page is 0, use the default counter
1426+
if (teletext_page == 0)
1427+
return &ctx->srt_counter;
1428+
1429+
// Check if we're using multi-page mode
1430+
extern struct ccx_s_teletext_config tlt_config;
1431+
if (tlt_config.num_user_pages <= 1 && !tlt_config.extract_all_pages)
1432+
return &ctx->srt_counter;
1433+
1434+
// Find the counter for this page
1435+
for (int i = 0; i < ctx->tlt_out_count; i++)
1436+
{
1437+
if (ctx->tlt_out_pages[i] == teletext_page)
1438+
return &ctx->tlt_srt_counter[i];
1439+
}
1440+
1441+
// Not found, use default counter
1442+
return &ctx->srt_counter;
1443+
}
1444+
1445+
/**
1446+
* Clean up all teletext output files (issue #665)
1447+
*/
1448+
void dinit_teletext_outputs(struct encoder_ctx *ctx)
1449+
{
1450+
if (!ctx)
1451+
return;
1452+
1453+
for (int i = 0; i < ctx->tlt_out_count; i++)
1454+
{
1455+
if (ctx->tlt_out[i] != NULL)
1456+
{
1457+
// Write footer
1458+
write_subtitle_file_footer(ctx, ctx->tlt_out[i]);
1459+
1460+
// Close file
1461+
if (ctx->tlt_out[i]->fh != -1)
1462+
{
1463+
close(ctx->tlt_out[i]->fh);
1464+
}
1465+
1466+
// Free filename
1467+
if (ctx->tlt_out[i]->filename != NULL)
1468+
{
1469+
free(ctx->tlt_out[i]->filename);
1470+
}
1471+
1472+
free(ctx->tlt_out[i]);
1473+
ctx->tlt_out[i] = NULL;
1474+
}
1475+
}
1476+
ctx->tlt_out_count = 0;
1477+
}

src/lib_ccx/ccx_encoders_common.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,11 @@
1616
#include "ccx_encoders_structs.h"
1717
#include "ccx_common_option.h"
1818

19+
// Maximum number of teletext pages to extract simultaneously (issue #665)
20+
#ifndef MAX_TLT_PAGES_EXTRACT
21+
#define MAX_TLT_PAGES_EXTRACT 8
22+
#endif
23+
1924
#define REQUEST_BUFFER_CAPACITY(ctx, length) \
2025
if (length > ctx->capacity) \
2126
{ \
@@ -169,6 +174,12 @@ struct encoder_ctx
169174

170175
// OCR in SPUPNG
171176
int nospupngocr;
177+
178+
// Teletext multi-page output (issue #665)
179+
struct ccx_s_write *tlt_out[MAX_TLT_PAGES_EXTRACT]; // Output files per teletext page
180+
uint16_t tlt_out_pages[MAX_TLT_PAGES_EXTRACT]; // Page numbers for each output slot
181+
unsigned int tlt_srt_counter[MAX_TLT_PAGES_EXTRACT]; // SRT counter per page
182+
int tlt_out_count; // Number of teletext output files
172183
};
173184

174185
#define INITIAL_ENC_BUFFER_CAPACITY 2048
@@ -263,4 +274,9 @@ unsigned int get_font_encoded(struct encoder_ctx *ctx, unsigned char *buffer, in
263274

264275
struct lib_ccx_ctx;
265276
void switch_output_file(struct lib_ccx_ctx *ctx, struct encoder_ctx *enc_ctx, int track_id);
277+
278+
// Teletext multi-page output (issue #665)
279+
struct ccx_s_write *get_teletext_output(struct encoder_ctx *ctx, uint16_t teletext_page);
280+
unsigned int *get_teletext_srt_counter(struct encoder_ctx *ctx, uint16_t teletext_page);
281+
void dinit_teletext_outputs(struct encoder_ctx *ctx);
266282
#endif

src/lib_ccx/ccx_encoders_srt.c

Lines changed: 31 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,10 @@
66
#include "ocr.h"
77
#include "ccextractor.h"
88

9-
/* The timing here is not PTS based, but output based, i.e. user delay must be accounted for
10-
if there is any */
11-
int write_stringz_as_srt(char *string, struct encoder_ctx *context, LLONG ms_start, LLONG ms_end)
9+
/* Helper function to write SRT to a specific output file (issue #665 - teletext multi-page)
10+
Takes output file descriptor and counter pointer as parameters */
11+
static int write_stringz_as_srt_to_output(char *string, struct encoder_ctx *context, LLONG ms_start, LLONG ms_end,
12+
int out_fh, unsigned int *srt_counter)
1213
{
1314
int used;
1415
unsigned h1, m1, s1, ms1;
@@ -20,17 +21,17 @@ int write_stringz_as_srt(char *string, struct encoder_ctx *context, LLONG ms_sta
2021

2122
millis_to_time(ms_start, &h1, &m1, &s1, &ms1);
2223
millis_to_time(ms_end - 1, &h2, &m2, &s2, &ms2); // -1 To prevent overlapping with next line.
23-
context->srt_counter++;
24-
snprintf(timeline, sizeof(timeline), "%u%s", context->srt_counter, context->encoded_crlf);
24+
(*srt_counter)++;
25+
snprintf(timeline, sizeof(timeline), "%u%s", *srt_counter, context->encoded_crlf);
2526
used = encode_line(context, context->buffer, (unsigned char *)timeline);
26-
write_wrapped(context->out->fh, context->buffer, used);
27+
write_wrapped(out_fh, context->buffer, used);
2728
snprintf(timeline, sizeof(timeline), "%02u:%02u:%02u,%03u --> %02u:%02u:%02u,%03u%s",
2829
h1, m1, s1, ms1, h2, m2, s2, ms2, context->encoded_crlf);
2930
used = encode_line(context, context->buffer, (unsigned char *)timeline);
3031
dbg_print(CCX_DMT_DECODER_608, "\n- - - SRT caption - - -\n");
3132
dbg_print(CCX_DMT_DECODER_608, "%s", timeline);
3233

33-
write_wrapped(context->out->fh, context->buffer, used);
34+
write_wrapped(out_fh, context->buffer, used);
3435
int len = strlen(string);
3536
unsigned char *unescaped = (unsigned char *)malloc(len + 1);
3637
if (!unescaped)
@@ -69,20 +70,28 @@ int write_stringz_as_srt(char *string, struct encoder_ctx *context, LLONG ms_sta
6970
dbg_print(CCX_DMT_DECODER_608, "\r");
7071
dbg_print(CCX_DMT_DECODER_608, "%s\n", context->subline);
7172
}
72-
write_wrapped(context->out->fh, el, u);
73-
write_wrapped(context->out->fh, context->encoded_crlf, context->encoded_crlf_length);
73+
write_wrapped(out_fh, el, u);
74+
write_wrapped(out_fh, context->encoded_crlf, context->encoded_crlf_length);
7475
begin += strlen((const char *)begin) + 1;
7576
}
7677

7778
dbg_print(CCX_DMT_DECODER_608, "- - - - - - - - - - - -\r\n");
7879

79-
write_wrapped(context->out->fh, context->encoded_crlf, context->encoded_crlf_length);
80+
write_wrapped(out_fh, context->encoded_crlf, context->encoded_crlf_length);
8081
free(el);
8182
free(unescaped);
8283

8384
return 0;
8485
}
8586

87+
/* The timing here is not PTS based, but output based, i.e. user delay must be accounted for
88+
if there is any */
89+
int write_stringz_as_srt(char *string, struct encoder_ctx *context, LLONG ms_start, LLONG ms_end)
90+
{
91+
return write_stringz_as_srt_to_output(string, context, ms_start, ms_end,
92+
context->out->fh, &context->srt_counter);
93+
}
94+
8695
int write_cc_bitmap_as_srt(struct cc_subtitle *sub, struct encoder_ctx *context)
8796
{
8897
int ret = 0;
@@ -155,7 +164,18 @@ int write_cc_subtitle_as_srt(struct cc_subtitle *sub, struct encoder_ctx *contex
155164
{
156165
if (sub->type == CC_TEXT)
157166
{
158-
ret = write_stringz_as_srt(sub->data, context, sub->start_time, sub->end_time);
167+
// For teletext multi-page extraction (issue #665), use page-specific output
168+
struct ccx_s_write *out = get_teletext_output(context, sub->teletext_page);
169+
unsigned int *counter = get_teletext_srt_counter(context, sub->teletext_page);
170+
if (out && counter)
171+
{
172+
ret = write_stringz_as_srt_to_output(sub->data, context, sub->start_time, sub->end_time,
173+
out->fh, counter);
174+
}
175+
else
176+
{
177+
ret = write_stringz_as_srt(sub->data, context, sub->start_time, sub->end_time);
178+
}
159179
freep(&sub->data);
160180
sub->nb_data = 0;
161181
ret = 1;

src/lib_ccx/lib_ccx.h

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,17 +43,23 @@ struct file_report
4343
};
4444

4545
// Stuff for telxcc.c
46+
#define MAX_TLT_PAGES_EXTRACT 8 // Maximum number of teletext pages to extract simultaneously
47+
4648
struct ccx_s_teletext_config
4749
{
4850
uint8_t verbose : 1; // should telxcc be verbose?
49-
uint16_t page; // teletext page containing cc we want to filter
51+
uint16_t page; // teletext page containing cc we want to filter (legacy, first page)
5052
uint16_t tid; // 13-bit packet ID for teletext stream
5153
double offset; // time offset in seconds
5254
uint8_t bom : 1; // print UTF-8 BOM characters at the beginning of output
5355
uint8_t nonempty : 1; // produce at least one (dummy) frame
5456
// uint8_t se_mode : 1; // search engine compatible mode => Uses CCExtractor's write_format
5557
// uint64_t utc_refvalue; // UTC referential value => Moved to ccx_decoders_common, so can be used for other decoders (608/xds) too
56-
uint16_t user_page; // Page selected by user, which MIGHT be different to 'page' depending on autodetection stuff
58+
uint16_t user_page; // Page selected by user (legacy, first page)
59+
// Multi-page teletext extraction (issue #665)
60+
uint16_t user_pages[MAX_TLT_PAGES_EXTRACT]; // Pages selected by user for extraction
61+
int num_user_pages; // Number of pages to extract (0 = auto-detect single page)
62+
int extract_all_pages; // If 1, extract all detected subtitle pages
5763
int dolevdist; // 0=Don't attempt to correct errors
5864
int levdistmincnt, levdistmaxpct; // Means 2 fails or less is "the same", 10% or less is also "the same"
5965
struct ccx_boundary_time extraction_start, extraction_end; // Segment we actually process

0 commit comments

Comments
 (0)