Skip to content

Commit 0e99e3a

Browse files
authored
Fix autotagger results penalisation due to the data source field (beetbox#6077)
This PR refactors the metadata source plugin architecture to fix incorrect data source penalty calculations during import matching. **Key Changes:** - **Fixed distance calculation**: Removed `track_distance()` and `album_distance()` methods from `MetadataSourcePlugin`. Data source mismatch penalty is now calculated directly in `beets/autotag/distance.py`, same as most of the rest of field-based penalties. - **Plugin registration**: Metadata source plugins now register themselves in `_instance_by_data_source` dict on instantiation, enabling efficient penalty lookups. - **Renamed configuration options**: - `source_weight` → `data_source_mismatch_penalty` (plugin-level) - `match.distance_weights.source` → `match.distance_weights.data_source` (global matching) - **Documentation**: added a `yaml` block with default configuration values to each metadata source documentation page. Included the renamed penalty field in each to underline that it's available for each data source.
2 parents fe98841 + 3b38045 commit 0e99e3a

File tree

17 files changed

+323
-176
lines changed

17 files changed

+323
-176
lines changed

beets/autotag/distance.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,12 @@ def add_string(self, key: str, str1: str | None, str2: str | None):
345345
dist = string_dist(str1, str2)
346346
self.add(key, dist)
347347

348+
def add_data_source(self, before: str | None, after: str | None) -> None:
349+
if before != after and (
350+
before or len(metadata_plugins.find_metadata_source_plugins()) > 1
351+
):
352+
self.add("data_source", metadata_plugins.get_penalty(after))
353+
348354

349355
@cache
350356
def get_track_length_grace() -> float:
@@ -408,8 +414,7 @@ def track_distance(
408414
if track_info.medium and item.disc:
409415
dist.add_expr("medium", item.disc != track_info.medium)
410416

411-
# Plugins.
412-
dist.update(metadata_plugins.track_distance(item, track_info))
417+
dist.add_data_source(item.get("data_source"), track_info.data_source)
413418

414419
return dist
415420

@@ -525,7 +530,6 @@ def distance(
525530
for _ in range(len(items) - len(mapping)):
526531
dist.add("unmatched_tracks", 1.0)
527532

528-
# Plugins.
529-
dist.update(metadata_plugins.album_distance(items, album_info, mapping))
533+
dist.add_data_source(likelies["data_source"], album_info.data_source)
530534

531535
return dist

beets/config_default.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ match:
166166
missing_tracks: medium
167167
unmatched_tracks: medium
168168
distance_weights:
169-
source: 2.0
169+
data_source: 2.0
170170
artist: 3.0
171171
album: 3.0
172172
media: 1.0

beets/metadata_plugins.py

Lines changed: 35 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,11 @@
99

1010
import abc
1111
import re
12-
import warnings
12+
from functools import cache, cached_property
1313
from typing import TYPE_CHECKING, Generic, Literal, Sequence, TypedDict, TypeVar
1414

1515
import unidecode
16+
from confuse import NotFoundError
1617
from typing_extensions import NotRequired
1718

1819
from beets.util import cached_classproperty
@@ -23,36 +24,14 @@
2324
if TYPE_CHECKING:
2425
from collections.abc import Iterable
2526

26-
from confuse import ConfigView
27-
28-
from .autotag import Distance
2927
from .autotag.hooks import AlbumInfo, Item, TrackInfo
3028

3129

30+
@cache
3231
def find_metadata_source_plugins() -> list[MetadataSourcePlugin]:
33-
"""Returns a list of MetadataSourcePlugin subclass instances
34-
35-
Resolved from all currently loaded beets plugins.
36-
"""
37-
38-
all_plugins = find_plugins()
39-
metadata_plugins: list[MetadataSourcePlugin | BeetsPlugin] = []
40-
for plugin in all_plugins:
41-
if isinstance(plugin, MetadataSourcePlugin):
42-
metadata_plugins.append(plugin)
43-
elif hasattr(plugin, "data_source"):
44-
# TODO: Remove this in the future major release, v3.0.0
45-
warnings.warn(
46-
f"{plugin.__class__.__name__} is used as a legacy metadata source. "
47-
"It should extend MetadataSourcePlugin instead of BeetsPlugin. "
48-
"Support for this will be removed in the v3.0.0 release!",
49-
DeprecationWarning,
50-
stacklevel=2,
51-
)
52-
metadata_plugins.append(plugin)
53-
54-
# typeignore: BeetsPlugin is not a MetadataSourcePlugin (legacy support)
55-
return metadata_plugins # type: ignore[return-value]
32+
"""Return a list of all loaded metadata source plugins."""
33+
# TODO: Make this an isinstance(MetadataSourcePlugin, ...) check in v3.0.0
34+
return [p for p in find_plugins() if hasattr(p, "data_source")] # type: ignore[misc]
5635

5736

5837
@notify_info_yielded("albuminfo_received")
@@ -95,46 +74,17 @@ def track_for_id(_id: str) -> TrackInfo | None:
9574
return None
9675

9776

98-
def track_distance(item: Item, info: TrackInfo) -> Distance:
99-
"""Returns the track distance for an item and trackinfo.
100-
101-
Returns a Distance object is populated by all metadata source plugins
102-
that implement the :py:meth:`MetadataSourcePlugin.track_distance` method.
103-
"""
104-
from beets.autotag.distance import Distance
105-
106-
dist = Distance()
107-
for plugin in find_metadata_source_plugins():
108-
dist.update(plugin.track_distance(item, info))
109-
return dist
110-
111-
112-
def album_distance(
113-
items: Sequence[Item],
114-
album_info: AlbumInfo,
115-
mapping: dict[Item, TrackInfo],
116-
) -> Distance:
117-
"""Returns the album distance calculated by plugins."""
118-
from beets.autotag.distance import Distance
119-
120-
dist = Distance()
121-
for plugin in find_metadata_source_plugins():
122-
dist.update(plugin.album_distance(items, album_info, mapping))
123-
return dist
124-
125-
126-
def _get_distance(
127-
config: ConfigView, data_source: str, info: AlbumInfo | TrackInfo
128-
) -> Distance:
129-
"""Returns the ``data_source`` weight and the maximum source weight
130-
for albums or individual tracks.
131-
"""
132-
from beets.autotag.distance import Distance
133-
134-
dist = Distance()
135-
if info.data_source == data_source:
136-
dist.add("source", config["source_weight"].as_number())
137-
return dist
77+
@cache
78+
def get_penalty(data_source: str | None) -> float:
79+
"""Get the penalty value for the given data source."""
80+
return next(
81+
(
82+
p.data_source_mismatch_penalty
83+
for p in find_metadata_source_plugins()
84+
if p.data_source == data_source
85+
),
86+
MetadataSourcePlugin.DEFAULT_DATA_SOURCE_MISMATCH_PENALTY,
87+
)
13888

13989

14090
class MetadataSourcePlugin(BeetsPlugin, metaclass=abc.ABCMeta):
@@ -145,12 +95,29 @@ class MetadataSourcePlugin(BeetsPlugin, metaclass=abc.ABCMeta):
14595
and tracks, and to retrieve album and track information by ID.
14696
"""
14797

98+
DEFAULT_DATA_SOURCE_MISMATCH_PENALTY = 0.5
99+
100+
@cached_classproperty
101+
def data_source(cls) -> str:
102+
"""The data source name for this plugin.
103+
104+
This is inferred from the plugin name.
105+
"""
106+
return cls.__name__.replace("Plugin", "") # type: ignore[attr-defined]
107+
108+
@cached_property
109+
def data_source_mismatch_penalty(self) -> float:
110+
try:
111+
return self.config["source_weight"].as_number()
112+
except NotFoundError:
113+
return self.config["data_source_mismatch_penalty"].as_number()
114+
148115
def __init__(self, *args, **kwargs) -> None:
149116
super().__init__(*args, **kwargs)
150117
self.config.add(
151118
{
152119
"search_limit": 5,
153-
"source_weight": 0.5,
120+
"data_source_mismatch_penalty": self.DEFAULT_DATA_SOURCE_MISMATCH_PENALTY, # noqa: E501
154121
}
155122
)
156123

@@ -224,35 +191,6 @@ def tracks_for_ids(self, ids: Sequence[str]) -> Iterable[TrackInfo | None]:
224191

225192
return (self.track_for_id(id) for id in ids)
226193

227-
def album_distance(
228-
self,
229-
items: Sequence[Item],
230-
album_info: AlbumInfo,
231-
mapping: dict[Item, TrackInfo],
232-
) -> Distance:
233-
"""Calculate the distance for an album based on its items and album info."""
234-
return _get_distance(
235-
data_source=self.data_source, info=album_info, config=self.config
236-
)
237-
238-
def track_distance(
239-
self,
240-
item: Item,
241-
info: TrackInfo,
242-
) -> Distance:
243-
"""Calculate the distance for a track based on its item and track info."""
244-
return _get_distance(
245-
data_source=self.data_source, info=info, config=self.config
246-
)
247-
248-
@cached_classproperty
249-
def data_source(cls) -> str:
250-
"""The data source name for this plugin.
251-
252-
This is inferred from the plugin name.
253-
"""
254-
return cls.__name__.replace("Plugin", "") # type: ignore[attr-defined]
255-
256194
def _extract_id(self, url: str) -> str | None:
257195
"""Extract an ID from a URL for this metadata source plugin.
258196

beets/plugins.py

Lines changed: 65 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import inspect
2121
import re
2222
import sys
23+
import warnings
2324
from collections import defaultdict
2425
from functools import wraps
2526
from importlib import import_module
@@ -160,19 +161,46 @@ class BeetsPlugin(metaclass=abc.ABCMeta):
160161
import_stages: list[ImportStageFunc]
161162

162163
def __init_subclass__(cls) -> None:
163-
# Dynamically copy methods to BeetsPlugin for legacy support
164-
# TODO: Remove this in the future major release, v3.0.0
164+
"""Enable legacy metadata‐source plugins to work with the new interface.
165+
166+
When a plugin subclass of BeetsPlugin defines a `data_source` attribute
167+
but does not inherit from MetadataSourcePlugin, this hook:
168+
169+
1. Skips abstract classes.
170+
2. Warns that the class should extend MetadataSourcePlugin (deprecation).
171+
3. Copies any nonabstract methods from MetadataSourcePlugin onto the
172+
subclass to provide the full plugin API.
173+
174+
This compatibility layer will be removed in the v3.0.0 release.
175+
"""
176+
# TODO: Remove in v3.0.0
165177
if inspect.isabstract(cls):
166178
return
167179

168180
from beets.metadata_plugins import MetadataSourcePlugin
169181

170-
abstractmethods = MetadataSourcePlugin.__abstractmethods__
182+
if issubclass(cls, MetadataSourcePlugin) or not hasattr(
183+
cls, "data_source"
184+
):
185+
return
186+
187+
warnings.warn(
188+
f"{cls.__name__} is used as a legacy metadata source. "
189+
"It should extend MetadataSourcePlugin instead of BeetsPlugin. "
190+
"Support for this will be removed in the v3.0.0 release!",
191+
DeprecationWarning,
192+
stacklevel=3,
193+
)
194+
171195
for name, method in inspect.getmembers(
172-
MetadataSourcePlugin, predicate=inspect.isfunction
196+
MetadataSourcePlugin,
197+
predicate=lambda f: (
198+
inspect.isfunction(f)
199+
and f.__name__ not in MetadataSourcePlugin.__abstractmethods__
200+
and not hasattr(cls, f.__name__)
201+
),
173202
):
174-
if name not in abstractmethods and not hasattr(cls, name):
175-
setattr(cls, name, method)
203+
setattr(cls, name, method)
176204

177205
def __init__(self, name: str | None = None):
178206
"""Perform one-time plugin setup."""
@@ -197,6 +225,37 @@ def __init__(self, name: str | None = None):
197225
if not any(isinstance(f, PluginLogFilter) for f in self._log.filters):
198226
self._log.addFilter(PluginLogFilter(self))
199227

228+
# In order to verify the config we need to make sure the plugin is fully
229+
# configured (plugins usually add the default configuration *after*
230+
# calling super().__init__()).
231+
self.register_listener("pluginload", self.verify_config)
232+
233+
def verify_config(self, *_, **__) -> None:
234+
"""Verify plugin configuration.
235+
236+
If deprecated 'source_weight' option is explicitly set by the user, they
237+
will see a warning in the logs. Otherwise, this must be configured by
238+
a third party plugin, thus we raise a deprecation warning which won't be
239+
shown to user but will be visible to plugin developers.
240+
"""
241+
# TODO: Remove in v3.0.0
242+
if (
243+
not hasattr(self, "data_source")
244+
or "source_weight" not in self.config
245+
):
246+
return
247+
248+
message = (
249+
"'source_weight' configuration option is deprecated and will be"
250+
" removed in v3.0.0. Use 'data_source_mismatch_penalty' instead"
251+
)
252+
for source in self.config.root().sources:
253+
if "source_weight" in (source.get(self.name) or {}):
254+
if source.filename: # user config
255+
self._log.warning(message)
256+
else: # 3rd-party plugin config
257+
warnings.warn(message, DeprecationWarning, stacklevel=0)
258+
200259
def commands(self) -> Sequence[Subcommand]:
201260
"""Should return a list of beets.ui.Subcommand objects for
202261
commands that should be added to beets' CLI.

beets/test/helper.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,6 @@
5858
from beets.util import (
5959
MoveOperation,
6060
bytestring_path,
61-
cached_classproperty,
6261
clean_module_tempdir,
6362
syspath,
6463
)
@@ -495,7 +494,6 @@ def load_plugins(self, *plugins: str) -> None:
495494
# FIXME this should eventually be handled by a plugin manager
496495
plugins = (self.plugin,) if hasattr(self, "plugin") else plugins
497496
self.config["plugins"] = plugins
498-
cached_classproperty.cache.clear()
499497
beets.plugins.load_plugins()
500498

501499
def unload_plugins(self) -> None:

beets/util/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -836,9 +836,10 @@ def get_most_common_tags(
836836
"country",
837837
"media",
838838
"albumdisambig",
839+
"data_source",
839840
]
840841
for field in fields:
841-
values = [item[field] for item in items if item]
842+
values = [item.get(field) for item in items if item]
842843
likelies[field], freq = plurality(values)
843844
consensus[field] = freq == len(values)
844845

beetsplug/beatport.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -328,7 +328,6 @@ def __init__(self):
328328
"apikey": "57713c3906af6f5def151b33601389176b37b429",
329329
"apisecret": "b3fe08c93c80aefd749fe871a16cd2bb32e2b954",
330330
"tokenfile": "beatport_token.json",
331-
"source_weight": 0.5,
332331
}
333332
)
334333
self.config["apikey"].redact = True

beetsplug/discogs.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,6 @@ def __init__(self):
129129
"apikey": API_KEY,
130130
"apisecret": API_SECRET,
131131
"tokenfile": "discogs_token.json",
132-
"source_weight": 0.5,
133132
"user_token": "",
134133
"separator": ", ",
135134
"index_tracks": False,

docs/changelog.rst

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,10 @@ Bug fixes:
4545
an import of another :class:`beets.plugins.BeetsPlugin` class. :bug:`6033`
4646
- :doc:`/plugins/fromfilename`: Fix :bug:`5218`, improve the code (refactor
4747
regexps, allow for more cases, add some logging), add tests.
48+
- Metadata source plugins: Fixed data source penalty calculation that was
49+
incorrectly applied during import matching. The ``source_weight``
50+
configuration option has been renamed to ``data_source_mismatch_penalty`` to
51+
better reflect its purpose. :bug:`6066`
4852

4953
For packagers:
5054

@@ -66,12 +70,22 @@ Other changes:
6670
disambiguation stripping.
6771
- When installing ``beets`` via git or locally the version string now reflects
6872
the current git branch and commit hash. :bug:`4448`
73+
- :ref:`match-config`: ``match.distance_weights.source`` configuration has been
74+
renamed to ``match.distance_weights.data_source`` for consistency with the
75+
name of the field it refers to.
6976

7077
For developers and plugin authors:
7178

7279
- Typing improvements in ``beets/logging.py``: ``getLogger`` now returns
7380
``BeetsLogger`` when called with a name, or ``RootLogger`` when called without
7481
a name.
82+
- The ``track_distance()`` and ``album_distance()`` methods have been removed
83+
from ``MetadataSourcePlugin``. Distance calculation for data source mismatches
84+
is now handled automatically by the core matching logic. This change
85+
simplifies the plugin architecture and fixes incorrect penalty calculations.
86+
:bug:`6066`
87+
- Metadata source plugins are now registered globally when instantiated, which
88+
makes their handling slightly more efficient.
7589

7690
2.4.0 (September 13, 2025)
7791
--------------------------

0 commit comments

Comments
 (0)