Skip to content

Commit 4ae5a79

Browse files
authored
Merge branch 'CCExtractor:master' into gaurav-v2
2 parents bbf9d8a + 0626bb5 commit 4ae5a79

File tree

20 files changed

+518
-57
lines changed

20 files changed

+518
-57
lines changed

README.md

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,22 @@ The core functionality is written in C. Other languages used include C++ and Pyt
2828

2929
Downloads for precompiled binaries and source code can be found [on our website](https://ccextractor.org/public/general/downloads/).
3030

31+
### WebVTT Output Options
32+
33+
CCExtractor supports optional WebVTT-specific headers for advanced use cases
34+
such as HTTP Live Streaming (HLS).
35+
36+
#### `--timestamp-map`
37+
38+
Enable writing the `X-TIMESTAMP-MAP` header in WebVTT output.
39+
40+
This header is required for HLS workflows but is **disabled by default**
41+
to preserve compatibility with standard WebVTT players.
42+
43+
Example:
44+
```bash
45+
ccextractor input.ts --timestamp-map -o output.vtt
46+
```
3147

3248
### Windows Package Managers
3349

@@ -116,4 +132,4 @@ For more information visit the CCExtractor website: [https://www.ccextractor.org
116132

117133
## License
118134

119-
GNU General Public License version 2.0 (GPL-2.0)
135+
GNU General Public License version 2.0 (GPL-2.0)

docs/CHANGES.TXT

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
11
0.96.6 (unreleased)
22
-------------------
3+
- New: Add optional machine-readable JSON output for -out=report via --report-format json
4+
- Fix: Incorrect strlen argument when writing end timestamps in MKV subtitle extraction (WebVTT, SRT, ASS/SSA)
5+
- Fix: File descriptor leak and missing open() error check in MKV subtitle track saving
36
- Fix: DVB EIT start time BCD decoding in XMLTV output causing invalid timestamps (#1835)
47
- New: Add Snap packaging support with Snapcraft configuration and GitHub Actions CI workflow.
8+
- New: Implement dictionary-based capitalization and censorship for transcripts
59
- Fix: Clear status line output on Linux/WSL to prevent text artifacts (#2017)
610
- Fix: Prevent infinite loop on truncated MKV files
711
- Fix: Various memory safety and stability fixes in demuxers (MP4, PS, MKV, DVB)

src/lib_ccx/ccx_common_option.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,7 @@ void init_options(struct ccx_s_options *options)
145145
options->enc_cfg.scc_framerate = 0; // Default: 29.97fps for SCC output
146146
options->enc_cfg.scc_accurate_timing = 0; // Default: off for backwards compatibility (issue #1120)
147147
options->enc_cfg.extract_only_708 = 0;
148+
options->report_format = NULL;
148149

149150
options->settings_dtvcc.enabled = 0;
150151
options->settings_dtvcc.active_services_count = 0;

src/lib_ccx/ccx_common_option.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,7 @@ struct ccx_s_options // Options from user parameters
189189
enum ccx_datasource input_source; // Files, stdin or network
190190

191191
char *output_filename;
192+
char *report_format; // NULL = default text, e.g. "json"
192193

193194
char **inputfile; // List of files to process
194195
int num_input_files; // How many?

src/lib_ccx/ccx_demuxer.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#ifndef DISABLE_RUST
77
void ccxr_demuxer_reset(struct ccx_demuxer *ctx);
88
void ccxr_demuxer_close(struct ccx_demuxer *ctx);
9+
void ccxr_demuxer_delete(struct ccx_demuxer *ctx);
910
int ccxr_demuxer_isopen(const struct ccx_demuxer *ctx);
1011
int ccxr_demuxer_open(struct ccx_demuxer *ctx, const char *file);
1112
LLONG ccxr_demuxer_get_file_size(struct ccx_demuxer *ctx);
@@ -304,6 +305,12 @@ void ccx_demuxer_delete(struct ccx_demuxer **ctx)
304305
{
305306
struct ccx_demuxer *lctx = *ctx;
306307
int i;
308+
309+
#ifndef DISABLE_RUST
310+
// Let Rust free any memory it allocated
311+
ccxr_demuxer_delete(lctx);
312+
#endif
313+
307314
dinit_cap(lctx);
308315
freep(&lctx->last_pat_payload);
309316
for (i = 0; i < MAX_PSI_PID; i++)
@@ -322,7 +329,14 @@ void ccx_demuxer_delete(struct ccx_demuxer **ctx)
322329
freep(lctx->PIDs_programs + i);
323330
}
324331

332+
#ifdef DISABLE_RUST
333+
// Only free filebuffer in pure C mode - Rust handles its own memory
325334
freep(&lctx->filebuffer);
335+
#else
336+
// Rust already freed this in ccxr_demuxer_delete, just clear the pointer
337+
lctx->filebuffer = NULL;
338+
#endif
339+
326340
freep(ctx);
327341
}
328342

src/lib_ccx/ccx_encoders_transcript.c

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -120,11 +120,8 @@ int write_cc_subtitle_as_transcript(struct cc_subtitle *sub, struct encoder_ctx
120120
start_time = sub->start_time;
121121
end_time = sub->end_time;
122122
}
123-
if (context->sentence_cap)
124-
{
125-
// TODO capitalize (context, line_number,data);
126-
// TODO correct_case_with_dictionary(line_number, data);
127-
}
123+
if (sub->data)
124+
correct_spelling_and_censor_words(context, (unsigned char *)sub->data, strlen((char *)sub->data));
128125

129126
if (start_time == -1)
130127
{

src/lib_ccx/matroska.c

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1742,6 +1742,12 @@ void save_sub_track(struct matroska_ctx *mkv_ctx, struct matroska_sub_track *tra
17421742
free(filename);
17431743
}
17441744

1745+
if (desc < 0)
1746+
{
1747+
mprint("\nError: Cannot create output file for subtitle track\n");
1748+
return;
1749+
}
1750+
17451751
if (track->header != NULL)
17461752
write_wrapped(desc, track->header, strlen(track->header));
17471753

@@ -1795,7 +1801,7 @@ void save_sub_track(struct matroska_ctx *mkv_ctx, struct matroska_sub_track *tra
17951801

17961802
write_wrapped(desc, timestamp_start, strlen(timestamp_start));
17971803
write_wrapped(desc, " --> ", 5);
1798-
write_wrapped(desc, timestamp_end, strlen(timestamp_start));
1804+
write_wrapped(desc, timestamp_end, strlen(timestamp_end));
17991805

18001806
// writing cue settings list
18011807
if (blockaddition != NULL)
@@ -1836,7 +1842,7 @@ void save_sub_track(struct matroska_ctx *mkv_ctx, struct matroska_sub_track *tra
18361842
write_wrapped(desc, "\n", 1);
18371843
write_wrapped(desc, timestamp_start, strlen(timestamp_start));
18381844
write_wrapped(desc, " --> ", 5);
1839-
write_wrapped(desc, timestamp_end, strlen(timestamp_start));
1845+
write_wrapped(desc, timestamp_end, strlen(timestamp_end));
18401846
write_wrapped(desc, "\n", 1);
18411847
int size = 0;
18421848
while (*(sentence->text + size) == '\n' || *(sentence->text + size) == '\r')
@@ -1866,7 +1872,7 @@ void save_sub_track(struct matroska_ctx *mkv_ctx, struct matroska_sub_track *tra
18661872
write_wrapped(desc, "Dialogue: Marked=0,", strlen("Dialogue: Marked=0,"));
18671873
write_wrapped(desc, timestamp_start, strlen(timestamp_start));
18681874
write_wrapped(desc, ",", 1);
1869-
write_wrapped(desc, timestamp_end, strlen(timestamp_start));
1875+
write_wrapped(desc, timestamp_end, strlen(timestamp_end));
18701876
write_wrapped(desc, ",", 1);
18711877
char *text = ass_ssa_sentence_erase_read_order(sentence->text);
18721878
char *text_to_free = text; // Save original pointer for freeing
@@ -1880,6 +1886,9 @@ void save_sub_track(struct matroska_ctx *mkv_ctx, struct matroska_sub_track *tra
18801886
free(timestamp_end);
18811887
}
18821888
}
1889+
1890+
if (desc != 1)
1891+
close(desc);
18831892
}
18841893

18851894
void free_sub_track(struct matroska_sub_track *track)

0 commit comments

Comments
 (0)