Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docker/dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ RUN apk add --no-cache --update git curl gcc cmake glew glfw \
zlib-dev libpng-dev libjpeg-turbo-dev openssl-dev freetype-dev libxml2-dev

RUN cd && git clone https://github.com/gpac/gpac
WORKDIR root/gpac/
RUN ./configure && make && make install-lib && cd && rm -rf /root/gpac
WORKDIR /root/gpac/
RUN ./configure && make && make install-lib && cd && rm -rf /root/gpac

WORKDIR /root
RUN git clone https://github.com/CCExtractor/ccextractor.git
Expand Down
1 change: 1 addition & 0 deletions docs/CHANGES.TXT
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
1.0 (to be released)
-----------------
- Fix: Improved handling of IETF language tags in Matroska files (#1665)
- New: Create unit test for rust code (#1615)
- Breaking: Major argument flags revamp for CCExtractor (#1564 & #1619)
- New: Create a Docker image to simplify the CCExtractor usage without any environmental hustle (#1611)
Expand Down
1 change: 1 addition & 0 deletions src/lib_ccx/ccx_common_timing.c
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,7 @@ LLONG get_fts(struct ccx_common_timing_ctx *ctx, int current_field)
return fts;
}


LLONG get_fts_max(struct ccx_common_timing_ctx *ctx)
{
#ifndef DISABLE_RUST
Expand Down
1 change: 1 addition & 0 deletions src/lib_ccx/ccx_decoders_vbi.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ void delete_decoder_vbi(struct ccx_decoder_vbi_ctx **arg)

freep(arg);
}

struct ccx_decoder_vbi_ctx *init_decoder_vbi(struct ccx_decoder_vbi_cfg *cfg)
{
struct ccx_decoder_vbi_ctx *vbi;
Expand Down
119 changes: 92 additions & 27 deletions src/lib_ccx/matroska.c
Original file line number Diff line number Diff line change
Expand Up @@ -154,10 +154,13 @@ void parse_ebml(FILE *file)
default:
if (code_len == MATROSKA_MAX_ID_LENGTH)
{
mprint(MATROSKA_ERROR "Unknown element 0x%x at position " LLD ", skipping EBML block\n", code,
mprint(MATROSKA_WARNING "Unknown element 0x%x at position " LLD ", skipping this element\n", code,
get_current_byte(file) - MATROSKA_MAX_ID_LENGTH);
set_bytes(file, pos + len);
return;
// Skip just the unknown element, not the entire block
read_vint_block_skip(file);
// Reset code and code_len to start fresh with next element
code = 0;
code_len = 0;
}
break;
}
Expand Down Expand Up @@ -232,10 +235,13 @@ void parse_segment_info(FILE *file)
default:
if (code_len == MATROSKA_MAX_ID_LENGTH)
{
mprint(MATROSKA_ERROR "Unknown element 0x%x at position " LLD ", skipping segment info block\n", code,
mprint(MATROSKA_WARNING "Unknown element 0x%x at position " LLD ", skipping this element\n", code,
get_current_byte(file) - MATROSKA_MAX_ID_LENGTH);
set_bytes(file, pos + len);
return;
// Skip just the unknown element, not the entire block
read_vint_block_skip(file);
// Reset code and code_len to start fresh with next element
code = 0;
code_len = 0;
}
break;
}
Expand Down Expand Up @@ -489,10 +495,13 @@ void parse_segment_cluster_block_group(struct matroska_ctx *mkv_ctx, ULLONG clus
default:
if (code_len == MATROSKA_MAX_ID_LENGTH)
{
mprint(MATROSKA_ERROR "Unknown element 0x%x at position " LLD ", skipping segment cluster block group\n", code,
mprint(MATROSKA_WARNING "Unknown element 0x%x at position " LLD ", skipping this element\n", code,
get_current_byte(file) - MATROSKA_MAX_ID_LENGTH);
set_bytes(file, pos + len);
return;
// Skip just the unknown element, not the entire block
read_vint_block_skip(file);
// Reset code and code_len to start fresh with next element
code = 0;
code_len = 0;
}
break;
}
Expand Down Expand Up @@ -597,10 +606,13 @@ void parse_segment_cluster(struct matroska_ctx *mkv_ctx)
default:
if (code_len == MATROSKA_MAX_ID_LENGTH)
{
mprint(MATROSKA_ERROR "Unknown element 0x%x at position " LLD ", skipping segment cluster block\n", code,
mprint(MATROSKA_WARNING "Unknown element 0x%x at position " LLD ", skipping this element\n", code,
get_current_byte(file) - MATROSKA_MAX_ID_LENGTH);
set_bytes(file, pos + len);
return;
// Skip just the unknown element, not the entire block
read_vint_block_skip(file);
// Reset code and code_len to start fresh with next element
code = 0;
code_len = 0;
}
break;
}
Expand Down Expand Up @@ -728,6 +740,7 @@ void parse_segment_track_entry(struct matroska_ctx *mkv_ctx)
enum matroska_track_entry_type track_type = MATROSKA_TRACK_TYPE_VIDEO;
char *lang = strdup("eng");
char *header = NULL;
char *lang_ietf = NULL;
char *codec_id_string = NULL;
enum matroska_track_subtitle_codec_id codec_id = MATROSKA_TRACK_SUBTITLE_CODEC_ID_UTF8;

Expand Down Expand Up @@ -863,6 +876,31 @@ void parse_segment_track_entry(struct matroska_ctx *mkv_ctx)
case MATROSKA_SEGMENT_TRACK_TRICK_MASTER_TRACK_SEGMENT_UID:
read_vint_block_skip(file);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_TRACK_LANGUAGE_IETF:
lang_ietf = read_vint_block_string(file);
mprint(" Language IETF: %s\n", lang_ietf);
// We'll store this for later use rather than freeing it immediately
if (track_type == MATROSKA_TRACK_TYPE_SUBTITLE)
{
// Don't free lang_ietf here, store in track
if (lang != NULL)
{
// If we previously allocated lang, free it as we'll prefer IETF
free(lang);
lang = NULL;
}
// Default to "eng" if we somehow don't have a language yet
if (lang == NULL)
{
lang = strdup("eng");
}
}
else
{
free(lang_ietf); // Free if not a subtitle track
lang_ietf = NULL;
}
MATROSKA_SWITCH_BREAK(code, code_len);

/* Misc ids */
case MATROSKA_VOID:
Expand All @@ -874,10 +912,13 @@ void parse_segment_track_entry(struct matroska_ctx *mkv_ctx)
default:
if (code_len == MATROSKA_MAX_ID_LENGTH)
{
mprint(MATROSKA_ERROR "Unknown element 0x%x at position " LLD ", skipping segment track entry block\n", code,
mprint(MATROSKA_WARNING "Unknown element 0x%x at position " LLD ", skipping this element\n", code,
get_current_byte(file) - MATROSKA_MAX_ID_LENGTH);
set_bytes(file, pos + len);
return;
// Skip just the unknown element, not the entire block
read_vint_block_skip(file);
// Reset code and code_len to start fresh with next element
code = 0;
code_len = 0;
}
break;
}
Expand All @@ -888,6 +929,7 @@ void parse_segment_track_entry(struct matroska_ctx *mkv_ctx)
struct matroska_sub_track *sub_track = malloc(sizeof(struct matroska_sub_track));
sub_track->header = header;
sub_track->lang = lang;
sub_track->lang_ietf = lang_ietf;
sub_track->track_number = track_number;
sub_track->lang_index = 0;
sub_track->codec_id = codec_id;
Expand All @@ -904,6 +946,8 @@ void parse_segment_track_entry(struct matroska_ctx *mkv_ctx)
else
{
free(lang);
if (lang_ietf)
free(lang_ietf);
if (codec_id_string)
free(codec_id_string);
}
Expand Down Expand Up @@ -997,10 +1041,13 @@ void parse_segment_tracks(struct matroska_ctx *mkv_ctx)
default:
if (code_len == MATROSKA_MAX_ID_LENGTH)
{
mprint(MATROSKA_ERROR "Unknown element 0x%x at position " LLD ", skipping segment tracks block\n", code,
mprint(MATROSKA_WARNING "Unknown element 0x%x at position " LLD ", skipping this element\n", code,
get_current_byte(file) - MATROSKA_MAX_ID_LENGTH);
set_bytes(file, pos + len);
return;
// Skip just the unknown element, not the entire block
read_vint_block_skip(file);
// Reset code and code_len to start fresh with next element
code = 0;
code_len = 0;
}
break;
}
Expand Down Expand Up @@ -1058,10 +1105,13 @@ void parse_segment(struct matroska_ctx *mkv_ctx)
default:
if (code_len == MATROSKA_MAX_ID_LENGTH)
{
mprint(MATROSKA_ERROR "Unknown element 0x%x at position " LLD ", skipping segment block\n", code,
mprint(MATROSKA_WARNING "Unknown element 0x%x at position " LLD ", skipping this element\n", code,
get_current_byte(file) - MATROSKA_MAX_ID_LENGTH);
set_bytes(file, pos + len);
return;
// Skip just the unknown element, not the entire block
read_vint_block_skip(file);
// Reset code and code_len to start fresh with next element
code = 0;
code_len = 0;
}
break;
}
Expand All @@ -1071,11 +1121,15 @@ void parse_segment(struct matroska_ctx *mkv_ctx)
char *generate_filename_from_track(struct matroska_ctx *mkv_ctx, struct matroska_sub_track *track)
{
char *buf = malloc(sizeof(char) * 200);
// Use lang_ietf if available, otherwise fall back to lang
const char *lang_to_use = track->lang_ietf ? track->lang_ietf : track->lang;

if (track->lang_index == 0)
sprintf(buf, "%s_%s.%s", get_basename(mkv_ctx->filename), track->lang, matroska_track_text_subtitle_id_extensions[track->codec_id]);
else
sprintf(buf, "%s_%s_" LLD ".%s", get_basename(mkv_ctx->filename), track->lang, track->lang_index,
sprintf(buf, "%s_%s.%s", get_basename(mkv_ctx->filename), lang_to_use,
matroska_track_text_subtitle_id_extensions[track->codec_id]);
else
sprintf(buf, "%s_%s_" LLD ".%s", get_basename(mkv_ctx->filename), lang_to_use,
track->lang_index, matroska_track_text_subtitle_id_extensions[track->codec_id]);
return buf;
}

Expand Down Expand Up @@ -1263,6 +1317,8 @@ void free_sub_track(struct matroska_sub_track *track)
free(track->header);
if (track->lang != NULL)
free(track->lang);
if (track->lang_ietf != NULL)
free(track->lang_ietf);
if (track->codec_id_string != NULL)
free(track->codec_id_string);
for (int i = 0; i < track->sentence_count; i++)
Expand All @@ -1281,7 +1337,12 @@ void matroska_save_all(struct matroska_ctx *mkv_ctx, char *lang)
{
if (lang)
{
if ((match = strstr(lang, mkv_ctx->sub_tracks[i]->lang)) != NULL)
// Try to match against IETF tag first if available
if (mkv_ctx->sub_tracks[i]->lang_ietf &&
(match = strstr(lang, mkv_ctx->sub_tracks[i]->lang_ietf)) != NULL)
save_sub_track(mkv_ctx, mkv_ctx->sub_tracks[i]);
// Fall back to 3-letter code
else if ((match = strstr(lang, mkv_ctx->sub_tracks[i]->lang)) != NULL)
save_sub_track(mkv_ctx, mkv_ctx->sub_tracks[i]);
}
else
Expand Down Expand Up @@ -1337,9 +1398,13 @@ void matroska_parse(struct matroska_ctx *mkv_ctx)
default:
if (code_len == MATROSKA_MAX_ID_LENGTH)
{
mprint(MATROSKA_ERROR "Unknown element 0x%x at position " LLD ", skipping file parsing\n", code,
mprint(MATROSKA_WARNING "Unknown element 0x%x at position " LLD ", skipping this element\n", code,
get_current_byte(file) - MATROSKA_MAX_ID_LENGTH);
return;
// Skip just the unknown element, not the entire block
read_vint_block_skip(file);
// Reset code and code_len to start fresh with next element
code = 0;
code_len = 0;
}
break;
}
Expand Down
2 changes: 2 additions & 0 deletions src/lib_ccx/matroska.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@
/* Misc ids */
#define MATROSKA_VOID 0xEC
#define MATROSKA_CRC32 0xBF
#define MATROSKA_SEGMENT_TRACK_LANGUAGE_IETF 0x22B59D

/* DEFENCE FROM THE FOOL - deprecated IDs */
#define MATROSKA_SEGMENT_TRACK_TRACK_TIMECODE_SCALE 0x23314F
Expand Down Expand Up @@ -214,6 +215,7 @@ struct matroska_avc_frame {
struct matroska_sub_track {
char* header; // Style header for ASS/SSA (and other) subtitles
char* lang;
char *lang_ietf; //IETF language tag (BCP47)
ULLONG track_number;
ULLONG lang_index;
enum matroska_track_subtitle_codec_id codec_id;
Expand Down
Loading
Loading