Skip to content

Commit b2ecdde

Browse files
committed
video: merge the tiles of grid heif and avif images
Fixes #13585. Fixes #16486. Switching between different grid images added with --external-files is also supported. The prority of independent tracks is reverted for images, because we now want to select a dependent track to trigger the merging, rather than small preview tracks.
1 parent ec4d50f commit b2ecdde

4 files changed

Lines changed: 287 additions & 3 deletions

File tree

demux/demux_lavf.c

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -900,6 +900,89 @@ static void add_new_streams(demuxer_t *demuxer)
900900
handle_new_stream(demuxer, priv->num_streams);
901901
}
902902

903+
static void handle_tile_grid_groups(demuxer_t *demuxer)
904+
{
905+
#if LIBAVFORMAT_VERSION_INT >= AV_VERSION_INT(61, 1, 100)
906+
lavf_priv_t *priv = demuxer->priv;
907+
AVFormatContext *avfc = priv->avfc;
908+
909+
for (int g = 0; g < avfc->nb_stream_groups; g++) {
910+
AVStreamGroup *stream_group = avfc->stream_groups[g];
911+
if (stream_group->type != AV_STREAM_GROUP_PARAMS_TILE_GRID)
912+
continue;
913+
914+
const AVStreamGroupTileGrid *av_grid = stream_group->params.tile_grid;
915+
if (!av_grid || av_grid->nb_tiles == 0)
916+
continue;
917+
918+
bool valid = true;
919+
for (int i = 0; i < av_grid->nb_tiles; i++) {
920+
if (av_grid->offsets[i].horizontal >= av_grid->coded_width ||
921+
av_grid->offsets[i].vertical >= av_grid->coded_height)
922+
{
923+
MP_WARN(demuxer, "Tile grid offsets exceed coded canvas (%dx%d) -"
924+
"ignoring tile grid.\n",
925+
av_grid->coded_width, av_grid->coded_height);
926+
valid = false;
927+
break;
928+
}
929+
}
930+
if (!valid)
931+
continue;
932+
933+
struct mp_tile_grid *mp_grid = talloc_zero(demuxer, struct mp_tile_grid);
934+
mp_grid->nb_tiles = av_grid->nb_tiles;
935+
mp_grid->width = av_grid->width;
936+
mp_grid->height = av_grid->height;
937+
mp_grid->coded_width = av_grid->coded_width;
938+
mp_grid->coded_height = av_grid->coded_height;
939+
mp_grid->horizontal_offset = av_grid->horizontal_offset;
940+
mp_grid->vertical_offset = av_grid->vertical_offset;
941+
memcpy(mp_grid->background, av_grid->background, 4);
942+
943+
mp_grid->tiles = talloc_array(mp_grid, struct mp_tile_grid_entry,
944+
av_grid->nb_tiles);
945+
946+
for (int i = 0; i < av_grid->nb_tiles; i++) {
947+
unsigned int group_idx = av_grid->offsets[i].idx;
948+
if (group_idx >= stream_group->nb_streams) {
949+
MP_WARN(demuxer, "Tile %d references out-of-range group "
950+
"stream index %u (group has %u streams) – skipping.\n",
951+
i, group_idx, stream_group->nb_streams);
952+
continue;
953+
}
954+
955+
int ff_idx = stream_group->streams[group_idx]->index;
956+
957+
mp_grid->tiles[i].ff_index = ff_idx;
958+
mp_grid->tiles[i].horizontal = av_grid->offsets[i].horizontal;
959+
mp_grid->tiles[i].vertical = av_grid->offsets[i].vertical;
960+
961+
if (ff_idx >= 0 && ff_idx < priv->num_streams &&
962+
priv->streams[ff_idx])
963+
{
964+
struct sh_stream *sh = priv->streams[ff_idx]->sh;
965+
if (sh && sh->type == STREAM_VIDEO) {
966+
sh->tile_grid = mp_grid;
967+
} else {
968+
MP_WARN(demuxer, "Tile %u stream %d is not a video "
969+
"stream – ignoring tile grid for it.\n",
970+
i, ff_idx);
971+
}
972+
}
973+
}
974+
975+
MP_VERBOSE(demuxer,
976+
"Stream group %u: tile grid %d tile(s), "
977+
"display %dx%d, coded %dx%d, offset (%d,%d).\n",
978+
g, mp_grid->nb_tiles,
979+
mp_grid->width, mp_grid->height,
980+
mp_grid->coded_width, mp_grid->coded_height,
981+
mp_grid->horizontal_offset, mp_grid->vertical_offset);
982+
}
983+
#endif
984+
}
985+
903986
static void update_metadata(demuxer_t *demuxer)
904987
{
905988
lavf_priv_t *priv = demuxer->priv;
@@ -1140,6 +1223,8 @@ static int demux_open_lavf(demuxer_t *demuxer, enum demux_check check)
11401223

11411224
add_new_streams(demuxer);
11421225

1226+
handle_tile_grid_groups(demuxer);
1227+
11431228
mp_tags_move_from_av_dictionary(demuxer->metadata, &avfc->metadata);
11441229

11451230
demuxer->ts_resets_possible =

demux/stheader.h

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,11 @@ struct sh_stream {
6565
// stream is a picture (such as album art)
6666
struct demux_packet *attached_picture;
6767

68+
// Metadata for tiled grid images.
69+
// All streams belonging to the same group share the same mp_tile_grid
70+
// object.
71+
struct mp_tile_grid *tile_grid;
72+
6873
// Internal to demux.c
6974
struct demux_stream *ds;
7075
};
@@ -142,4 +147,37 @@ struct mp_codec_params {
142147
double duration;
143148
};
144149

150+
struct mp_tile_grid {
151+
int nb_tiles;
152+
153+
// Dimensions after cropping.
154+
int width, height;
155+
156+
// Dimensions before cropping (union of all tile areas plus
157+
// any alignment padding on the right/bottom edges).
158+
int coded_width, coded_height;
159+
160+
// Top-left offset of the display rectangle within the coded canvas.
161+
// crop_right = coded_width - width - horizontal_offset
162+
// crop_bottom = coded_height - height - vertical_offset
163+
int horizontal_offset;
164+
int vertical_offset;
165+
166+
// Per-tile placement info, array of length nb_tiles.
167+
struct mp_tile_grid_entry *tiles;
168+
169+
// Background fill colour used outside tile boundaries (R,G,B,A bytes).
170+
uint8_t background[4];
171+
};
172+
173+
// Describes one tile's position within a tiled grid image.
174+
struct mp_tile_grid_entry {
175+
// Global AVFormatContext stream index (AVStream.index).
176+
// Used to find the matching track.
177+
int ff_index;
178+
// Top-left pixel position of this tile in the assembled image.
179+
int horizontal;
180+
int vertical;
181+
};
182+
145183
#endif /* MPLAYER_STHEADER_H */

player/loadfile.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -504,7 +504,7 @@ static bool compare_track(struct track *t1, struct track *t2, char **langs, bool
504504
if (t1->image != t2->image)
505505
return !t1->image;
506506
if (t1->dependent_track != t2->dependent_track)
507-
return !t1->dependent_track;
507+
return t1->image ? t1->dependent_track : !t1->dependent_track;
508508
if (t1->stream && t2->stream && opts->hls_bitrate >= 0 &&
509509
t1->stream->hls_bitrate != t2->stream->hls_bitrate)
510510
{
@@ -706,7 +706,7 @@ void mp_switch_track_n(struct MPContext *mpctx, int order, enum stream_type type
706706
if (track == current)
707707
return;
708708

709-
if (current && current->sink) {
709+
if (current && current->sink && !current->stream->tile_grid) {
710710
MP_ERR(mpctx, "Can't disable input to complex filter.\n");
711711
goto error;
712712
}

player/video.c

Lines changed: 162 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
#include "sub/osd.h"
4040
#include "video/hwdec.h"
4141
#include "filters/f_decoder_wrapper.h"
42+
#include "filters/f_lavfi.h"
4243
#include "video/out/vo.h"
4344

4445
#include "core.h"
@@ -155,10 +156,33 @@ static void vo_chain_uninit(struct vo_chain *vo_c)
155156
// this does not free the VO
156157
}
157158

159+
static void uninit_grid(struct MPContext *mpctx)
160+
{
161+
struct track *primary = mpctx->vo_chain->track;
162+
if (!primary || !primary->stream || !primary->stream->tile_grid)
163+
return;
164+
165+
struct mp_tile_grid *grid = primary->stream->tile_grid;
166+
for (int n = 0; n < mpctx->num_tracks; n++) {
167+
struct track *track = mpctx->tracks[n];
168+
if (!track->stream || track->stream->tile_grid != grid)
169+
continue;
170+
if (track->sink) {
171+
mp_pin_disconnect(track->sink);
172+
track->sink = NULL;
173+
}
174+
if (track != primary)
175+
track->dec = NULL;
176+
track->selected = false;
177+
reselect_demux_stream(mpctx, track, false);
178+
}
179+
}
180+
158181
void uninit_video_chain(struct MPContext *mpctx)
159182
{
160183
if (mpctx->vo_chain) {
161184
reset_video_state(mpctx);
185+
uninit_grid(mpctx);
162186
vo_chain_uninit(mpctx->vo_chain);
163187
mpctx->vo_chain = NULL;
164188

@@ -201,14 +225,151 @@ int init_video_decoder(struct MPContext *mpctx, struct track *track)
201225
return 0;
202226
}
203227

228+
static char *tile_grid_graph(void *ctx, const struct mp_tile_grid *grid)
229+
{
230+
bstr buf = {0};
231+
232+
for (int i = 0; i < grid->nb_tiles; i++)
233+
bstr_xappend_asprintf(ctx, &buf, "[in%d]", i);
234+
235+
bstr_xappend_asprintf(ctx, &buf, "xstack=inputs=%d:layout=", grid->nb_tiles);
236+
for (int i = 0; i < grid->nb_tiles; i++) {
237+
if (i > 0)
238+
bstr_xappend(ctx, &buf, bstr0("|"));
239+
bstr_xappend_asprintf(ctx, &buf, "%d_%d", grid->tiles[i].horizontal,
240+
grid->tiles[i].vertical);
241+
}
242+
bstr_xappend_asprintf(ctx, &buf,
243+
":fill=0x%02X%02X%02X@0x%02X",
244+
grid->background[0], grid->background[1],
245+
grid->background[2], grid->background[3]);
246+
247+
if (grid->coded_width != grid->width || grid->coded_height != grid->height) {
248+
bstr_xappend_asprintf(ctx, &buf, ",crop=w=%d:h=%d:x=%d:y=%d", grid->width,
249+
grid->height, grid->horizontal_offset, grid->vertical_offset);
250+
}
251+
252+
bstr_xappend(ctx, &buf, bstr0("[vo]"));
253+
return buf.start;
254+
}
255+
256+
static struct track *find_tile_track(struct MPContext *mpctx,
257+
const struct mp_tile_grid *tg, int tile_idx)
258+
{
259+
260+
int wanted_ff = tg->tiles[tile_idx].ff_index;
261+
for (int n = 0; n < mpctx->num_tracks; n++) {
262+
struct track *t = mpctx->tracks[n];
263+
if (t->ff_index == wanted_ff && t->stream && t->stream->tile_grid == tg)
264+
return t;
265+
}
266+
return NULL;
267+
}
268+
269+
static void reinit_video_chain_tiled(struct MPContext *mpctx, struct track *track)
270+
{
271+
struct mp_tile_grid *grid = track->stream->tile_grid;
272+
mp_assert(grid);
273+
274+
for (int i = 0; i < grid->nb_tiles; i++) {
275+
struct track *t = find_tile_track(mpctx, grid, i);
276+
if (t) {
277+
t->selected = true;
278+
reselect_demux_stream(mpctx, t, false);
279+
}
280+
}
281+
282+
reinit_video_chain_src(mpctx, NULL);
283+
if (!mpctx->vo_chain)
284+
return;
285+
286+
struct vo_chain *vo_c = mpctx->vo_chain;
287+
288+
void *tmp = talloc_new(NULL);
289+
char *graph_str = tile_grid_graph(tmp, grid);
290+
MP_VERBOSE(mpctx, "Tile grid xstack graph: %s\n", graph_str);
291+
292+
struct mp_lavfi *lavfi =
293+
mp_lavfi_create_graph(vo_c->filter->f, 0, false, NULL, NULL, graph_str);
294+
talloc_free(tmp);
295+
296+
if (!lavfi) {
297+
MP_ERR(mpctx, "Failed to create tile grid filtergraph.\n");
298+
goto err_out;
299+
}
300+
301+
struct mp_filter *lavfi_f = lavfi->f;
302+
303+
struct mp_pin *out_pad = mp_filter_get_named_pin(lavfi_f, "vo");
304+
if (!out_pad || mp_pin_get_dir(out_pad) != MP_PIN_OUT) {
305+
MP_ERR(mpctx, "Tile grid filtergraph missing output pin 'vo'.\n");
306+
goto err_out;
307+
}
308+
vo_c->filter_src = out_pad;
309+
mp_pin_connect(vo_c->filter->f->pins[0], vo_c->filter_src);
310+
311+
for (int i = 0; i < grid->nb_tiles; i++) {
312+
struct track *tile_track = find_tile_track(mpctx, grid, i);
313+
if (!tile_track) {
314+
MP_ERR(mpctx, "No track found for tile %d (ff_index %d).\n",
315+
i, grid->tiles[i].ff_index);
316+
goto err_out;
317+
}
318+
319+
tile_track->vo_c = vo_c;
320+
bool result = init_video_decoder(mpctx, tile_track);
321+
// vo_chain_uninit() only unsets vo_c on the primary track
322+
// (vo_c->track).
323+
tile_track->vo_c = NULL;
324+
if (!result)
325+
goto err_out;
326+
327+
char label[16];
328+
snprintf(label, sizeof(label), "in%d", i);
329+
struct mp_pin *in_pad = mp_filter_get_named_pin(lavfi_f, label);
330+
if (!in_pad || mp_pin_get_dir(in_pad) != MP_PIN_IN) {
331+
MP_ERR(mpctx, "Tile grid filtergraph missing input pin '%s'.\n",
332+
label);
333+
goto err_out;
334+
}
335+
tile_track->sink = in_pad;
336+
mp_pin_connect(tile_track->sink, tile_track->dec->f->pins[0]);
337+
}
338+
339+
struct track *primary = find_tile_track(mpctx, grid, 0);
340+
vo_c->track = primary;
341+
primary->vo_c = vo_c;
342+
vo_c->filter->container_fps =
343+
mp_decoder_wrapper_get_container_fps(primary->dec);
344+
vo_c->is_coverart = !!primary->attached_picture;
345+
vo_c->is_sparse = primary->stream->still_image || vo_c->is_coverart;
346+
347+
if (vo_c->is_coverart)
348+
mp_decoder_wrapper_set_coverart_flag(track->dec, true);
349+
350+
MP_VERBOSE(mpctx, "Tile grid: assembling %d tile(s) into %dx%d image.\n",
351+
grid->nb_tiles, grid->width, grid->height);
352+
return;
353+
354+
err_out:
355+
uninit_video_chain(mpctx);
356+
error_on_track(mpctx, track);
357+
handle_force_window(mpctx, true);
358+
}
359+
204360
void reinit_video_chain(struct MPContext *mpctx)
205361
{
206362
struct track *track = mpctx->current_track[0][STREAM_VIDEO];
207363
if (!track || !track->stream) {
208364
error_on_track(mpctx, track);
209365
return;
210366
}
211-
reinit_video_chain_src(mpctx, track);
367+
368+
if (track->stream->tile_grid) {
369+
reinit_video_chain_tiled(mpctx, track);
370+
} else {
371+
reinit_video_chain_src(mpctx, track);
372+
}
212373
}
213374

214375
static void filter_update_subtitles(void *ctx, double pts)

0 commit comments

Comments
 (0)