Skip to content

Commit 701271e

Browse files
cfsmp3claude
andcommitted
fix(mp4): Add HEVC/H.265 caption extraction for MP4 containers
PR #1852 added HEVC caption extraction for MPEG-TS containers, but MP4/MKV containers weren't supported. This adds HEVC support for MP4 containers using GPAC. Changes: - Add HEVC subtype definitions (hev1, hvc1) - Add process_hevc_sample() to parse HEVC NAL units and extract CC - Add process_hevc_track() to iterate through HEVC track samples - Detect and process HEVC tracks in processmp4() - Add store_hdcc() call to flush buffered CC data after each sample The key fix was adding store_hdcc() after processing each sample. Without this, CC data was being parsed but never output because store_hdcc() is normally called from slice_header() which is AVC-only. Closes #1690 (for MP4 containers) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <[email protected]>
1 parent 5bad373 commit 701271e

File tree

1 file changed

+226
-2
lines changed

1 file changed

+226
-2
lines changed

src/lib_ccx/mp4.c

Lines changed: 226 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include <stdlib.h>
44

55
#include <gpac/isomedia.h>
6+
#include <gpac/mpeg4_odf.h>
67
#include "lib_ccx.h"
78
#include "utility.h"
89
#include "ccx_encoders_common.h"
@@ -16,6 +17,14 @@
1617

1718
#define GF_ISOM_SUBTYPE_C708 GF_4CC('c', '7', '0', '8')
1819

20+
// HEVC subtypes (hev1, hvc1)
21+
#ifndef GF_ISOM_SUBTYPE_HEV1
22+
#define GF_ISOM_SUBTYPE_HEV1 GF_4CC('h', 'e', 'v', '1')
23+
#endif
24+
#ifndef GF_ISOM_SUBTYPE_HVC1
25+
#define GF_ISOM_SUBTYPE_HVC1 GF_4CC('h', 'v', 'c', '1')
26+
#endif
27+
1928
static short bswap16(short v)
2029
{
2130
return ((v >> 8) & 0x00FF) | ((v << 8) & 0xFF00);
@@ -101,6 +110,88 @@ static int process_avc_sample(struct lib_ccx_ctx *ctx, u32 timescale, GF_AVCConf
101110

102111
return status;
103112
}
113+
114+
static int process_hevc_sample(struct lib_ccx_ctx *ctx, u32 timescale, GF_HEVCConfig *c, GF_ISOSample *s, struct cc_subtitle *sub)
115+
{
116+
int status = 0;
117+
u32 i;
118+
s32 signed_cts = (s32)s->CTS_Offset;
119+
struct lib_cc_decode *dec_ctx = NULL;
120+
struct encoder_ctx *enc_ctx = NULL;
121+
122+
dec_ctx = update_decoder_list(ctx);
123+
enc_ctx = update_encoder_list(ctx);
124+
125+
// Enable HEVC mode for NAL parsing
126+
dec_ctx->avc_ctx->is_hevc = 1;
127+
128+
set_current_pts(dec_ctx->timing, (s->DTS + signed_cts) * MPEG_CLOCK_FREQ / timescale);
129+
set_fts(dec_ctx->timing);
130+
131+
for (i = 0; i < s->dataLength;)
132+
{
133+
u32 nal_length;
134+
135+
if (i + c->nal_unit_size > s->dataLength)
136+
{
137+
mprint("Corrupted packet detected in process_hevc_sample. dataLength "
138+
"%u is less than index %u + nal_unit_size %u. Ignoring.\n",
139+
s->dataLength, i, c->nal_unit_size);
140+
return status;
141+
}
142+
switch (c->nal_unit_size)
143+
{
144+
case 1:
145+
nal_length = s->data[i];
146+
break;
147+
case 2:
148+
nal_length = bswap16(*(short *)&s->data[i]);
149+
break;
150+
case 4:
151+
nal_length = bswap32(*(long *)&s->data[i]);
152+
break;
153+
default:
154+
mprint("Unexpected nal_unit_size %u in HEVC config\n", c->nal_unit_size);
155+
return status;
156+
}
157+
const u32 previous_index = i;
158+
i += c->nal_unit_size;
159+
if (i + nal_length <= previous_index || i + nal_length > s->dataLength)
160+
{
161+
mprint("Corrupted sample detected in process_hevc_sample. dataLength %u "
162+
"is less than index %u + nal_unit_size %u + nal_length %u. Ignoring.\n",
163+
s->dataLength, previous_index, c->nal_unit_size, nal_length);
164+
return status;
165+
}
166+
167+
s_nalu_stats.total += 1;
168+
temp_debug = 0;
169+
170+
if (nal_length > 0)
171+
{
172+
// For HEVC, NAL type is in bits [6:1] of byte 0
173+
u8 nal_type = (s->data[i] >> 1) & 0x3F;
174+
if (nal_type < 32)
175+
s_nalu_stats.type[nal_type] += 1;
176+
do_NAL(enc_ctx, dec_ctx, (unsigned char *)&(s->data[i]), nal_length, sub);
177+
}
178+
i += nal_length;
179+
}
180+
assert(i == s->dataLength);
181+
182+
// For HEVC, we need to flush CC data after each sample (unlike H.264 which does this in slice_header)
183+
// This is because HEVC SEI messages contain the CC data and we don't parse slice headers
184+
if (dec_ctx->avc_ctx->cc_count > 0)
185+
{
186+
// Store the CC data for processing
187+
store_hdcc(enc_ctx, dec_ctx, dec_ctx->avc_ctx->cc_data, dec_ctx->avc_ctx->cc_count,
188+
dec_ctx->timing->current_tref, dec_ctx->timing->fts_now, sub);
189+
dec_ctx->avc_ctx->cc_buffer_saved = CCX_TRUE;
190+
dec_ctx->avc_ctx->cc_count = 0;
191+
}
192+
193+
return status;
194+
}
104195
static int process_xdvb_track(struct lib_ccx_ctx *ctx, const char *basename, GF_ISOFile *f, u32 track, struct cc_subtitle *sub)
105196
{
106197
u32 timescale, i, sample_count;
@@ -223,6 +314,83 @@ static int process_avc_track(struct lib_ccx_ctx *ctx, const char *basename, GF_I
223314
return status;
224315
}
225316

317+
static int process_hevc_track(struct lib_ccx_ctx *ctx, const char *basename, GF_ISOFile *f, u32 track, struct cc_subtitle *sub)
318+
{
319+
u32 timescale, i, sample_count, last_sdi = 0;
320+
int status;
321+
GF_HEVCConfig *c = NULL;
322+
struct lib_cc_decode *dec_ctx = NULL;
323+
324+
dec_ctx = update_decoder_list(ctx);
325+
326+
// Enable HEVC mode
327+
dec_ctx->avc_ctx->is_hevc = 1;
328+
329+
if ((sample_count = gf_isom_get_sample_count(f, track)) < 1)
330+
{
331+
return 0;
332+
}
333+
334+
timescale = gf_isom_get_media_timescale(f, track);
335+
336+
status = 0;
337+
338+
for (i = 0; i < sample_count; i++)
339+
{
340+
u32 sdi;
341+
342+
GF_ISOSample *s = gf_isom_get_sample(f, track, i + 1, &sdi);
343+
344+
if (s != NULL)
345+
{
346+
if (sdi != last_sdi)
347+
{
348+
if (c != NULL)
349+
{
350+
gf_odf_hevc_cfg_del(c);
351+
c = NULL;
352+
}
353+
354+
if ((c = gf_isom_hevc_config_get(f, track, sdi)) == NULL)
355+
{
356+
gf_isom_sample_del(&s);
357+
status = -1;
358+
break;
359+
}
360+
361+
last_sdi = sdi;
362+
}
363+
364+
status = process_hevc_sample(ctx, timescale, c, s, sub);
365+
366+
gf_isom_sample_del(&s);
367+
368+
if (status != 0)
369+
{
370+
break;
371+
}
372+
}
373+
374+
int progress = (int)((i * 100) / sample_count);
375+
if (ctx->last_reported_progress != progress)
376+
{
377+
int cur_sec = (int)(get_fts(dec_ctx->timing, dec_ctx->current_field) / 1000);
378+
activity_progress(progress, cur_sec / 60, cur_sec % 60);
379+
ctx->last_reported_progress = progress;
380+
}
381+
}
382+
int cur_sec = (int)(get_fts(dec_ctx->timing, dec_ctx->current_field) / 1000);
383+
activity_progress(100, cur_sec / 60, cur_sec % 60);
384+
385+
if (c != NULL)
386+
{
387+
gf_odf_hevc_cfg_del(c);
388+
c = NULL;
389+
}
390+
391+
return status;
392+
}
393+
226394
static char *format_duration(u64 dur, u32 timescale, char *szDur, size_t szDur_size)
227395
{
228396
u32 h, m, s, ms;
@@ -544,7 +712,7 @@ int processmp4(struct lib_ccx_ctx *ctx, struct ccx_s_mp4Cfg *cfg, char *file)
544712
{
545713
int mp4_ret = 0;
546714
GF_ISOFile *f;
547-
u32 i, j, track_count, avc_track_count, cc_track_count;
715+
u32 i, j, track_count, avc_track_count, hevc_track_count, cc_track_count;
548716
struct cc_subtitle dec_sub;
549717
struct lib_cc_decode *dec_ctx = NULL;
550718
struct encoder_ctx *enc_ctx = update_encoder_list(ctx);
@@ -575,6 +743,7 @@ int processmp4(struct lib_ccx_ctx *ctx, struct ccx_s_mp4Cfg *cfg, char *file)
575743
track_count = gf_isom_get_track_count(f);
576744

577745
avc_track_count = 0;
746+
hevc_track_count = 0;
578747
cc_track_count = 0;
579748

580749
for (i = 0; i < track_count; i++)
@@ -589,9 +758,11 @@ int processmp4(struct lib_ccx_ctx *ctx, struct ccx_s_mp4Cfg *cfg, char *file)
589758
cc_track_count++;
590759
if (type == GF_ISOM_MEDIA_VISUAL && subtype == GF_ISOM_SUBTYPE_AVC_H264)
591760
avc_track_count++;
761+
if (type == GF_ISOM_MEDIA_VISUAL && (subtype == GF_ISOM_SUBTYPE_HEV1 || subtype == GF_ISOM_SUBTYPE_HVC1))
762+
hevc_track_count++;
592763
}
593764

594-
mprint("MP4: found %u tracks: %u avc and %u cc\n", track_count, avc_track_count, cc_track_count);
765+
mprint("MP4: found %u tracks: %u avc, %u hevc and %u cc\n", track_count, avc_track_count, hevc_track_count, cc_track_count);
595766

596767
for (i = 0; i < track_count; i++)
597768
{
@@ -661,6 +832,54 @@ int processmp4(struct lib_ccx_ctx *ctx, struct ccx_s_mp4Cfg *cfg, char *file)
661832
}
662833
break;
663834

835+
case MEDIA_TYPE(GF_ISOM_MEDIA_VISUAL, GF_ISOM_SUBTYPE_HEV1): // vide:hev1 (HEVC)
836+
case MEDIA_TYPE(GF_ISOM_MEDIA_VISUAL, GF_ISOM_SUBTYPE_HVC1): // vide:hvc1 (HEVC)
837+
if (cc_track_count && !cfg->mp4vidtrack)
838+
continue;
839+
// If there are multiple tracks, change fd for different tracks
840+
if (hevc_track_count > 1)
841+
{
842+
switch_output_file(ctx, enc_ctx, i);
843+
}
844+
// Enable HEVC mode for caption extraction
845+
dec_ctx->avc_ctx->is_hevc = 1;
846+
847+
// Process VPS/SPS/PPS from HEVC config to enable SEI parsing
848+
GF_HEVCConfig *hevc_cnf = gf_isom_hevc_config_get(f, i + 1, 1);
849+
if (hevc_cnf != NULL)
850+
{
851+
// Process parameter sets from config
852+
for (j = 0; j < gf_list_count(hevc_cnf->param_array); j++)
853+
{
854+
GF_NALUFFParamArray *ar = (GF_NALUFFParamArray *)gf_list_get(hevc_cnf->param_array, j);
855+
if (ar)
856+
{
857+
for (u32 k = 0; k < gf_list_count(ar->nalus); k++)
858+
{
859+
GF_NALUFFParam *sl = (GF_NALUFFParam *)gf_list_get(ar->nalus, k);
860+
if (sl && sl->data && sl->size > 0)
861+
{
862+
do_NAL(enc_ctx, dec_ctx, (unsigned char *)sl->data, sl->size, &dec_sub);
863+
}
864+
}
865+
}
866+
}
867+
gf_odf_hevc_cfg_del(hevc_cnf);
868+
}
869+
if (process_hevc_track(ctx, file, f, i + 1, &dec_sub) != 0)
870+
{
871+
mprint("Error on process_hevc_track()\n");
872+
free(dec_ctx->xds_ctx);
873+
return -3;
874+
}
875+
if (dec_sub.got_output)
876+
{
877+
mp4_ret = 1;
878+
encode_sub(enc_ctx, &dec_sub);
879+
dec_sub.got_output = 0;
880+
}
881+
break;
882+
664883
default:
665884
if (type != GF_ISOM_MEDIA_CLOSED_CAPTION && type != GF_ISOM_MEDIA_SUBT && type != GF_ISOM_MEDIA_TEXT)
666885
break; // ignore non cc track
@@ -794,6 +1013,11 @@ int processmp4(struct lib_ccx_ctx *ctx, struct ccx_s_mp4Cfg *cfg, char *file)
7941013
else
7951014
mprint("Found no AVC track(s). ");
7961015

1016+
if (hevc_track_count)
1017+
mprint("Found %d HEVC track(s). ", hevc_track_count);
1018+
else
1019+
mprint("Found no HEVC track(s). ");
1020+
7971021
if (cc_track_count)
7981022
mprint("Found %d CC track(s).\n", cc_track_count);
7991023
else

0 commit comments

Comments
 (0)