Skip to content

Commit 65e935b

Browse files
authored
Perform a regex substitution in the substitute plugin (#5357)
This utilises regex substitution in the substitute plugin. The previous approach only used regex to match the pattern, then replaced it with a static string. This change allows more complex substitutions, where the output depends on the input. ### Example use case Say we want to keep only the first artist of a multi-artist credit, as in the following list: ``` Neil Young & Crazy Horse -> Neil Young Michael Hurley, The Holy Modal Rounders, Jeffrey Frederick & The Clamtones -> Michael Hurley James Yorkston and the Athletes -> James Yorkston ```` This would previously have required three separate rules, one for each resulting artist. By using a regex substitution, we can get the desired behaviour in a single rule: ```yaml substitute: ^(.*?)(,| &| and).*: \1 ``` (Capture the text until the first `,` ` &` or ` and`, then use that capture group as the output) ### Notes I've kept the previous behaviour of only applying the first matching rule, but I'm not 100% sure it's the ideal approach. I can imagine both cases where you want to apply several rules in sequence and cases where you want to stop after the first match.
2 parents 979f123 + 8e0558b commit 65e935b

File tree

5 files changed

+125
-18
lines changed

5 files changed

+125
-18
lines changed

beets/test/helper.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -503,12 +503,8 @@ def unload_plugins(self) -> None:
503503
Album._queries = getattr(Album, "_original_queries", {})
504504

505505
@contextmanager
506-
def configure_plugin(self, config: list[Any] | dict[str, Any]):
507-
if isinstance(config, list):
508-
beets.config[self.plugin] = config
509-
else:
510-
for key, value in config.items():
511-
beets.config[self.plugin][key] = value
506+
def configure_plugin(self, config: Any):
507+
beets.config[self.plugin].set(config)
512508
self.load_plugins(self.plugin)
513509

514510
yield

beetsplug/substitute.py

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,7 @@ def tmpl_substitute(self, text):
3434
"""Do the actual replacing."""
3535
if text:
3636
for pattern, replacement in self.substitute_rules:
37-
if pattern.match(text.lower()):
38-
return replacement
37+
text = pattern.sub(replacement, text)
3938
return text
4039
else:
4140
return ""
@@ -47,10 +46,8 @@ def __init__(self):
4746
substitute rules.
4847
"""
4948
super().__init__()
50-
self.substitute_rules = []
5149
self.template_funcs["substitute"] = self.tmpl_substitute
52-
53-
for key, view in self.config.items():
54-
value = view.as_str()
55-
pattern = re.compile(key.lower())
56-
self.substitute_rules.append((pattern, value))
50+
self.substitute_rules = [
51+
(re.compile(key, flags=re.IGNORECASE), value)
52+
for key, value in self.config.flatten().items()
53+
]

docs/changelog.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@ New features:
3131
* Beets now uses ``platformdirs`` to determine the default music directory.
3232
This location varies between systems -- for example, users can configure it
3333
on Unix systems via ``user-dirs.dirs(5)``.
34+
* :doc:`/plugins/substitute`: Allow the replacement string to use capture groups
35+
from the match. It is thus possible to create more general rules, applying to
36+
many different artists at once.
3437

3538
Bug fixes:
3639

docs/plugins/substitute.rst

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,34 @@ the ``rewrite`` plugin modifies the metadata, this plugin does not.
1111

1212
Enable the ``substitute`` plugin (see :ref:`using-plugins`), then make a ``substitute:`` section in your config file to contain your rules.
1313
Each rule consists of a case-insensitive regular expression pattern, and a
14-
replacement value. For example, you might use:
14+
replacement string. For example, you might use:
15+
16+
.. code-block:: yaml
17+
18+
substitute:
19+
.*jimi hendrix.*: Jimi Hendrix
20+
21+
The replacement can be an expression utilising the matched regex, allowing us
22+
to create more general rules. Say for example, we want to sort all albums by
23+
multiple artists into the directory of the first artist. We can thus capture
24+
everything before the first ``,``, `` &`` or `` and``, and use this capture
25+
group in the output, discarding the rest of the string.
26+
27+
.. code-block:: yaml
1528
1629
substitute:
17-
.*jimi hendrix.*: Jimi Hendrix
30+
^(.*?)(,| &| and).*: \1
31+
32+
This would handle all the below cases in a single rule:
33+
34+
Bob Dylan and The Band -> Bob Dylan
35+
Neil Young & Crazy Horse -> Neil Young
36+
James Yorkston, Nina Persson & The Second Hand Orchestra -> James Yorkston
1837

1938

2039
To apply the substitution, you have to call the function ``%substitute{}`` in the paths section. For example:
21-
40+
41+
.. code-block:: yaml
42+
2243
paths:
23-
default: %substitute{$albumartist}/$year - $album%aunique{}/$track - $title
44+
default: \%substitute{$albumartist}/$year - $album\%aunique{}/$track - $title

test/plugins/test_substitute.py

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
# This file is part of beets.
2+
# Copyright 2024, Nicholas Boyd Isacsson.
3+
#
4+
# Permission is hereby granted, free of charge, to any person obtaining
5+
# a copy of this software and associated documentation files (the
6+
# "Software"), to deal in the Software without restriction, including
7+
# without limitation the rights to use, copy, modify, merge, publish,
8+
# distribute, sublicense, and/or sell copies of the Software, and to
9+
# permit persons to whom the Software is furnished to do so, subject to
10+
# the following conditions:
11+
#
12+
# The above copyright notice and this permission notice shall be
13+
# included in all copies or substantial portions of the Software.
14+
15+
"""Test the substitute plugin regex functionality."""
16+
17+
from beets.test.helper import PluginTestCase
18+
from beetsplug.substitute import Substitute
19+
20+
21+
class SubstitutePluginTest(PluginTestCase):
22+
plugin = "substitute"
23+
preload_plugin = False
24+
25+
def run_substitute(self, config, cases):
26+
with self.configure_plugin(config):
27+
for input, expected in cases:
28+
assert Substitute().tmpl_substitute(input) == expected
29+
30+
def test_simple_substitute(self):
31+
self.run_substitute(
32+
{
33+
"a": "x",
34+
"b": "y",
35+
"c": "z",
36+
},
37+
[("a", "x"), ("b", "y"), ("c", "z")],
38+
)
39+
40+
def test_case_insensitivity(self):
41+
self.run_substitute({"a": "x"}, [("A", "x")])
42+
43+
def test_unmatched_input_preserved(self):
44+
self.run_substitute({"a": "x"}, [("c", "c")])
45+
46+
def test_regex_to_static(self):
47+
self.run_substitute(
48+
{".*jimi hendrix.*": "Jimi Hendrix"},
49+
[("The Jimi Hendrix Experience", "Jimi Hendrix")],
50+
)
51+
52+
def test_regex_capture_group(self):
53+
self.run_substitute(
54+
{"^(.*?)(,| &| and).*": r"\1"},
55+
[
56+
("King Creosote & Jon Hopkins", "King Creosote"),
57+
(
58+
"Michael Hurley, The Holy Modal Rounders, Jeffrey Frederick & "
59+
+ "The Clamtones",
60+
"Michael Hurley",
61+
),
62+
("James Yorkston and the Athletes", "James Yorkston"),
63+
],
64+
)
65+
66+
def test_partial_substitution(self):
67+
self.run_substitute({r"\.": ""}, [("U.N.P.O.C.", "UNPOC")])
68+
69+
def test_rules_applied_in_definition_order(self):
70+
self.run_substitute(
71+
{
72+
"a": "x",
73+
"[ab]": "y",
74+
"b": "z",
75+
},
76+
[
77+
("a", "x"),
78+
("b", "y"),
79+
],
80+
)
81+
82+
def test_rules_applied_in_sequence(self):
83+
self.run_substitute(
84+
{"a": "b", "b": "c", "d": "a"},
85+
[
86+
("a", "c"),
87+
("b", "c"),
88+
("d", "a"),
89+
],
90+
)

0 commit comments

Comments
 (0)