Skip to content

Commit 7b43201

Browse files
authored
fix(mp4/mkv): Add HEVC/H.265 caption extraction for MP4 and Matroska containers
2 parents ea1c82a + 44363c0 commit 7b43201

File tree

3 files changed

+357
-11
lines changed

3 files changed

+357
-11
lines changed

src/lib_ccx/matroska.c

Lines changed: 127 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -678,9 +678,13 @@ void parse_simple_block(struct matroska_ctx *mkv_ctx, ULLONG frame_timestamp)
678678

679679
ULLONG track = read_vint_length(file);
680680

681-
if (track != mkv_ctx->avc_track_number)
681+
// Check if this is an AVC or HEVC track
682+
int is_avc = (track == mkv_ctx->avc_track_number);
683+
int is_hevc = (track == mkv_ctx->hevc_track_number);
684+
685+
if (!is_avc && !is_hevc)
682686
{
683-
// Skip everything except AVC track
687+
// Skip everything except AVC/HEVC tracks
684688
skip_bytes(file, len - 1); // 1 byte for track
685689
return;
686690
}
@@ -695,7 +699,10 @@ void parse_simple_block(struct matroska_ctx *mkv_ctx, ULLONG frame_timestamp)
695699
frame.data = read_byte_block(file, frame.len);
696700
frame.FTS = frame_timestamp + timecode;
697701

698-
process_avc_frame_mkv(mkv_ctx, frame);
702+
if (is_hevc)
703+
process_hevc_frame_mkv(mkv_ctx, frame);
704+
else
705+
process_avc_frame_mkv(mkv_ctx, frame);
699706

700707
free(frame.data);
701708
}
@@ -740,6 +747,51 @@ int process_avc_frame_mkv(struct matroska_ctx *mkv_ctx, struct matroska_avc_fram
740747
return status;
741748
}
742749

750+
int process_hevc_frame_mkv(struct matroska_ctx *mkv_ctx, struct matroska_avc_frame frame)
751+
{
752+
int status = 0;
753+
uint32_t i;
754+
struct lib_cc_decode *dec_ctx = update_decoder_list(mkv_ctx->ctx);
755+
struct encoder_ctx *enc_ctx = update_encoder_list(mkv_ctx->ctx);
756+
757+
// Set timing
758+
set_current_pts(dec_ctx->timing, frame.FTS * (MPEG_CLOCK_FREQ / 1000));
759+
set_fts(dec_ctx->timing);
760+
761+
// Set HEVC mode for NAL parsing
762+
dec_ctx->avc_ctx->is_hevc = 1;
763+
764+
// NAL unit length is assumed to be 4 (same as AVC in Matroska)
765+
uint8_t nal_unit_size = 4;
766+
767+
for (i = 0; i < frame.len;)
768+
{
769+
uint32_t nal_length;
770+
771+
nal_length = bswap32(*(long *)&frame.data[i]);
772+
i += nal_unit_size;
773+
774+
if (nal_length > 0)
775+
do_NAL(enc_ctx, dec_ctx, (unsigned char *)&(frame.data[i]), nal_length, &mkv_ctx->dec_sub);
776+
i += nal_length;
777+
}
778+
779+
// Flush any accumulated CC data after processing this frame
780+
// This is critical for HEVC because store_hdcc() is normally called from
781+
// slice_header() which is AVC-only
782+
if (dec_ctx->avc_ctx->cc_count > 0)
783+
{
784+
store_hdcc(enc_ctx, dec_ctx, dec_ctx->avc_ctx->cc_data, dec_ctx->avc_ctx->cc_count,
785+
dec_ctx->timing->current_tref, dec_ctx->timing->fts_now, &mkv_ctx->dec_sub);
786+
dec_ctx->avc_ctx->cc_buffer_saved = CCX_TRUE;
787+
dec_ctx->avc_ctx->cc_count = 0;
788+
}
789+
790+
mkv_ctx->current_second = (int)(get_fts(dec_ctx->timing, dec_ctx->current_field) / 1000);
791+
792+
return status;
793+
}
794+
743795
char *get_track_entry_type_description(enum matroska_track_entry_type type)
744796
{
745797
switch (type)
@@ -851,9 +903,11 @@ void parse_segment_track_entry(struct matroska_ctx *mkv_ctx)
851903
codec_id_string = read_vint_block_string(file);
852904
codec_id = get_track_subtitle_codec_id(codec_id_string);
853905
mprint(" Codec ID: %s\n", codec_id_string);
854-
// We only support AVC by now for EIA-608
906+
// Detect AVC and HEVC tracks for EIA-608/708 caption extraction
855907
if (strcmp((const char *)codec_id_string, (const char *)avc_codec_id) == 0)
856908
mkv_ctx->avc_track_number = track_number;
909+
else if (strcmp((const char *)codec_id_string, (const char *)hevc_codec_id) == 0)
910+
mkv_ctx->hevc_track_number = track_number;
857911
MATROSKA_SWITCH_BREAK(code, code_len);
858912
case MATROSKA_SEGMENT_TRACK_CODEC_PRIVATE:
859913
// We handle DVB's private data differently
@@ -1028,6 +1082,65 @@ void parse_private_codec_data(struct matroska_ctx *mkv_ctx, char *codec_id_strin
10281082
data = read_byte_block(file, size);
10291083
do_NAL(enc_ctx, dec_ctx, data, size, &mkv_ctx->dec_sub);
10301084
}
1085+
else if ((strcmp((const char *)codec_id_string, (const char *)hevc_codec_id) == 0) && mkv_ctx->hevc_track_number == track_number)
1086+
{
1087+
// HEVC uses HEVCDecoderConfigurationRecord format
1088+
// We need to parse this to extract VPS/SPS/PPS NAL units
1089+
dec_ctx->avc_ctx->is_hevc = 1;
1090+
1091+
data = read_byte_block(file, len);
1092+
1093+
// HEVCDecoderConfigurationRecord structure:
1094+
// - configurationVersion (1 byte)
1095+
// - general_profile_space, general_tier_flag, general_profile_idc (1 byte)
1096+
// - general_profile_compatibility_flags (4 bytes)
1097+
// - general_constraint_indicator_flags (6 bytes)
1098+
// - general_level_idc (1 byte)
1099+
// - reserved + min_spatial_segmentation_idc (2 bytes)
1100+
// - reserved + parallelismType (1 byte)
1101+
// - reserved + chromaFormat (1 byte)
1102+
// - reserved + bitDepthLumaMinus8 (1 byte)
1103+
// - reserved + bitDepthChromaMinus8 (1 byte)
1104+
// - avgFrameRate (2 bytes)
1105+
// - constantFrameRate, numTemporalLayers, temporalIdNested, lengthSizeMinusOne (1 byte)
1106+
// - numOfArrays (1 byte)
1107+
// Total header: 23 bytes
1108+
1109+
if (len >= 23)
1110+
{
1111+
uint8_t num_arrays = data[22];
1112+
size_t offset = 23;
1113+
1114+
for (uint8_t arr = 0; arr < num_arrays && offset < len; arr++)
1115+
{
1116+
if (offset + 3 > len)
1117+
break;
1118+
1119+
// uint8_t array_completeness = (data[offset] >> 7) & 1;
1120+
// uint8_t nal_unit_type = data[offset] & 0x3F;
1121+
offset++;
1122+
1123+
uint16_t num_nalus = (data[offset] << 8) | data[offset + 1];
1124+
offset += 2;
1125+
1126+
for (uint16_t n = 0; n < num_nalus && offset < len; n++)
1127+
{
1128+
if (offset + 2 > len)
1129+
break;
1130+
1131+
uint16_t nal_unit_length = (data[offset] << 8) | data[offset + 1];
1132+
offset += 2;
1133+
1134+
if (offset + nal_unit_length > len)
1135+
break;
1136+
1137+
// Process this NAL unit (VPS, SPS, or PPS)
1138+
do_NAL(enc_ctx, dec_ctx, &data[offset], nal_unit_length, &mkv_ctx->dec_sub);
1139+
offset += nal_unit_length;
1140+
}
1141+
}
1142+
}
1143+
}
10311144
else if (strcmp((const char *)codec_id_string, (const char *)dvb_codec_id) == 0)
10321145
{
10331146
enc_ctx->write_previous = 0;
@@ -1530,9 +1643,10 @@ int matroska_loop(struct lib_ccx_ctx *ctx)
15301643
mkv_ctx->sub_tracks = malloc(sizeof(struct matroska_sub_track **));
15311644
if (mkv_ctx->sub_tracks == NULL)
15321645
fatal(EXIT_NOT_ENOUGH_MEMORY, "In matroska_loop: Out of memory allocating sub_tracks.");
1533-
// EIA-608
1646+
// EIA-608/708
15341647
memset(&mkv_ctx->dec_sub, 0, sizeof(mkv_ctx->dec_sub));
15351648
mkv_ctx->avc_track_number = -1;
1649+
mkv_ctx->hevc_track_number = -1;
15361650

15371651
matroska_parse(mkv_ctx);
15381652

@@ -1545,17 +1659,22 @@ int matroska_loop(struct lib_ccx_ctx *ctx)
15451659
// Save values before freeing mkv_ctx
15461660
int sentence_count = mkv_ctx->sentence_count;
15471661
int avc_track_found = mkv_ctx->avc_track_number > -1;
1662+
int hevc_track_found = mkv_ctx->hevc_track_number > -1;
15481663
int got_output = mkv_ctx->dec_sub.got_output;
15491664

15501665
matroska_free_all(mkv_ctx);
15511666

15521667
mprint("\n\n");
15531668

1554-
// Support only one AVC track by now
1555-
if (avc_track_found)
1669+
// Report video tracks found
1670+
if (avc_track_found && hevc_track_found)
1671+
mprint("Found AVC and HEVC tracks. ");
1672+
else if (avc_track_found)
15561673
mprint("Found AVC track. ");
1674+
else if (hevc_track_found)
1675+
mprint("Found HEVC track. ");
15571676
else
1558-
mprint("Found no AVC track. ");
1677+
mprint("Found no AVC/HEVC track. ");
15591678

15601679
if (got_output)
15611680
return 1;

src/lib_ccx/matroska.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,7 @@ char *matroska_track_text_subtitle_id_extensions[] = {
178178
};
179179

180180
char *avc_codec_id = "V_MPEG4/ISO/AVC";
181+
char *hevc_codec_id = "V_MPEGH/ISO/HEVC";
181182
char *dvb_codec_id = "S_DVBSUB";
182183

183184
/* Messages */
@@ -239,7 +240,8 @@ struct matroska_ctx
239240
struct matroska_sub_track **sub_tracks;
240241
struct lib_ccx_ctx *ctx;
241242
struct cc_subtitle dec_sub;
242-
int avc_track_number; // ID of AVC track. -1 if there is none
243+
int avc_track_number; // ID of AVC track. -1 if there is none
244+
int hevc_track_number; // ID of HEVC track. -1 if there is none
243245
int sub_tracks_count;
244246
int block_index;
245247
int sentence_count;
@@ -270,6 +272,7 @@ void parse_segment_cluster_block_group(struct matroska_ctx *mkv_ctx, ULLONG clus
270272
void parse_segment_cluster(struct matroska_ctx *mkv_ctx);
271273
void parse_simple_block(struct matroska_ctx *mkv_ctx, ULLONG frame_timestamp);
272274
int process_avc_frame_mkv(struct matroska_ctx *mkv_ctx, struct matroska_avc_frame frame);
275+
int process_hevc_frame_mkv(struct matroska_ctx *mkv_ctx, struct matroska_avc_frame frame);
273276
void parse_segment_track_entry(struct matroska_ctx *mkv_ctx);
274277
void parse_private_codec_data(struct matroska_ctx *mkv_ctx, char *codec_id_string, ULLONG track_number, char *lang);
275278
void parse_segment_tracks(struct matroska_ctx *mkv_ctx);

0 commit comments

Comments
 (0)