Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions DOCS/man/options.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2826,16 +2826,19 @@ Subtitles
rendering of ASS/SSA subtitles. It can sometimes be useful to forcibly
override the styling of ASS subtitles, but should be avoided in general.

``--sub-auto=<no|exact|fuzzy|all>``
``--sub-auto=<no|exact|fuzzy|all|closest>``
Load additional subtitle files matching the video filename. The parameter
specifies how external subtitle files are matched. ``exact`` is enabled by
default.

:no: Don't automatically load external subtitle files.
:exact: Load the media filename with subtitle file extension and possibly
language suffixes (default).
:fuzzy: Load all subs containing the media filename.
:all: Load all subs in the current and ``--sub-file-paths`` directories.
:fuzzy: Load all subs containing the media filename.
:all: Load all subs in the current and ``--sub-file-paths`` directories.
:closest: Load exactly one external subtitle: the single file whose filename
is most similar to the media's base name. Language/flag suffixes
like ``.en``, ``.eng``, ``.forced`` are ignored for similarity.

``--sub-auto-exts=ext1,ext2,...``
Subtitle extensions to try and match when using ``--sub-auto``. Note that
Expand Down
8 changes: 4 additions & 4 deletions demux/demux_mf.c
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ static mf_t *open_mf_pattern(void *talloc_ctx, struct demuxer *d, char *filename
}

if (strchr(filename, ',')) {
mp_info(log, "filelist: %s\n", filename);
mp_info(log, "filelist: %s\n", filename ? filename : "(null)");
bstr bfilename = bstr0(filename);

while (bfilename.len) {
Expand Down Expand Up @@ -191,15 +191,15 @@ static mf_t *open_mf_pattern(void *talloc_ctx, struct demuxer *d, char *filename
if (bad_spec || nspec != 1) {
mp_err(log,
"unsupported expr format: '%s' - exactly one format specifier of the form %%[.][NUM]d is expected\n",
filename);
filename ? filename : "(null)");
goto exit_mf;
}

mp_info(log, "search expr: %s\n", filename);
mp_info(log, "search expr: %s\n", filename ? filename : "(null)");

while (error_count < 5) {
if (snprintf(fname, fname_avail, filename, count++) >= fname_avail) {
mp_err(log, "format result too long: '%s'\n", filename);
mp_err(log, "format result too long: '%s'\n", filename ? filename : "(null)");
goto exit_mf;
}
if (!mp_path_exists(fname)) {
Expand Down
1 change: 1 addition & 0 deletions meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ sources = files(
'misc/json.c',
'misc/language.c',
'misc/natural_sort.c',
'misc/string_similarity.c',
'misc/node.c',
'misc/path_utils.c',
'misc/random.c',
Expand Down
22 changes: 22 additions & 0 deletions misc/language.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

#include "common/common.h"
#include "misc/ctype.h"
#include <string.h>

#define L(s) { #s, sizeof(#s) - 1 }

Expand Down Expand Up @@ -297,6 +298,27 @@ int mp_match_lang(char **langs, const char *lang)
return best_score;
}

bool mp_language_is_suffix_token(const char *token)
{
if (!token || !token[0])
return false;

char *lang_list[] = {(char *)token, NULL};
if (mp_match_lang(lang_list, token) > 0)
return true;

static const char *const special[] = {
"jp", "chs", "cht", "sub", "subs", "sdh", "forced", "cc", NULL,
};

for (int i = 0; special[i]; i++) {
if (strcmp(token, special[i]) == 0)
return true;
}

return false;
}

bstr mp_guess_lang_from_filename(bstr name, int *lang_start, enum track_flags *flags)
{
name = bstr_strip(bstr_strip_ext(name));
Expand Down
3 changes: 3 additions & 0 deletions misc/language.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,15 @@
#ifndef MP_LANGUAGE_H
#define MP_LANGUAGE_H

#include <stdbool.h>

#include "misc/bstr.h"
#include "common/common.h"

// Result numerically higher => better match. 0 == no match.
int mp_match_lang(char **langs, const char *lang);
char **mp_get_user_langs(void);
bstr mp_guess_lang_from_filename(bstr name, int *lang_start, enum track_flags *flags);
bool mp_language_is_suffix_token(const char *token);

#endif /* MP_LANGUAGE_H */
96 changes: 96 additions & 0 deletions misc/string_similarity.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
/*
* This file is part of mpv.
*
* mpv is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* mpv is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with mpv. If not, see <http://www.gnu.org/licenses/>.
*/

#include <ctype.h>
#include <string.h>

#include "mpv_talloc.h"

#include "common/common.h"
#include "misc/bstr.h"
#include "misc/language.h"
#include "misc/path_utils.h"
#include "misc/string_similarity.h"

static bool is_suffix_token(const char *tkn)
{
return mp_language_is_suffix_token(tkn);
}

char *mp_normalize_base_name(void *ta_ctx, const char *path)
{
struct bstr base = bstr0(mp_basename(path));
base = bstr_strip_ext(base);
char *tmpbuf = talloc_strndup(ta_ctx, base.start, base.len);
for (int i = 0; tmpbuf[i]; i++)
tmpbuf[i] = tolower((unsigned char)tmpbuf[i]);
char **tokens = NULL;
int ntok = 0;
char *p = tmpbuf;
while (*p) {
while (*p && !isalnum((unsigned char)*p)) p++;
if (!*p) break;
char *start = p;
while (*p && isalnum((unsigned char)*p)) p++;
char save = *p; *p = '\0';
MP_TARRAY_APPEND(ta_ctx, tokens, ntok, talloc_strdup(ta_ctx, start));
*p = save;
}
while (ntok > 0 && is_suffix_token(tokens[ntok - 1]))
ntok--;
char *out = talloc_strdup(ta_ctx, "");
for (int i = 0; i < ntok; i++)
out = talloc_asprintf_append_buffer(out, "%s", tokens[i]);
if (!out[0])
out = talloc_strdup(ta_ctx, tmpbuf);
return out;
}

int mp_levenshtein_dist(const char *a, const char *b)
{
int la = (int)strlen(a), lb = (int)strlen(b);
if (la == 0) return lb;
if (lb == 0) return la;
int *prev = talloc_array(NULL, int, lb + 1);
int *curr = talloc_array(NULL, int, lb + 1);
for (int j = 0; j <= lb; j++) prev[j] = j;
for (int i = 1; i <= la; i++) {
curr[0] = i;
for (int j = 1; j <= lb; j++) {
int cost = a[i - 1] == b[j - 1] ? 0 : 1;
int del = prev[j] + 1;
int ins = curr[j - 1] + 1;
int sub = prev[j - 1] + cost;
int m = del < ins ? del : ins;
curr[j] = m < sub ? m : sub;
}
int *tmpv = prev; prev = curr; curr = tmpv;
}
int d = prev[lb];
talloc_free(prev);
talloc_free(curr);
return d;
}

double mp_similarity_ratio(const char *a, const char *b)
{
int la = (int)strlen(a), lb = (int)strlen(b);
int m = la > lb ? la : lb;
if (m == 0) return 1.0;
int d = mp_levenshtein_dist(a, b);
return 1.0 - (double)d / (double)m;
}
24 changes: 24 additions & 0 deletions misc/string_similarity.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#pragma once

/*
* This file is part of mpv.
*
* mpv is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* mpv is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with mpv. If not, see <http://www.gnu.org/licenses/>.
*/

#include <stddef.h>

char *mp_normalize_base_name(void *ta_ctx, const char *path);
double mp_similarity_ratio(const char *a, const char *b);
int mp_levenshtein_dist(const char *a, const char *b);
2 changes: 1 addition & 1 deletion options/options.c
Original file line number Diff line number Diff line change
Expand Up @@ -722,7 +722,7 @@ static const m_option_t mp_opts[] = {
{"autoload-files", OPT_BOOL(autoload_files)},

{"sub-auto", OPT_CHOICE(sub_auto,
{"no", -1}, {"exact", 0}, {"fuzzy", 1}, {"all", 2})},
{"no", -1}, {"exact", 0}, {"fuzzy", 1}, {"all", 2}, {"closest", 3})},
{"sub-auto-exts", OPT_STRINGLIST(sub_auto_exts), .flags = UPDATE_SUB_EXTS},
{"audio-file-auto", OPT_CHOICE(audiofile_auto,
{"no", -1}, {"exact", 0}, {"fuzzy", 1}, {"all", 2})},
Expand Down
Loading