Skip to content

Commit ab4fad9

Browse files
committed
Take data source into account when deciding duplicate candidates
1 parent e1c05ee commit ab4fad9

File tree

3 files changed

+37
-34
lines changed

3 files changed

+37
-34
lines changed

beets/autotag/hooks.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,16 @@ def __hash__(self) -> int: # type: ignore[override]
5858
class Info(AttrDict[Any]):
5959
"""Container for metadata about a musical entity."""
6060

61+
Identifier = tuple[str | None, str | None]
62+
63+
@property
64+
def id(self) -> str | None:
65+
raise NotImplementedError
66+
67+
@property
68+
def identifier(self) -> Identifier:
69+
return (self.data_source, self.id)
70+
6171
@cached_property
6272
def name(self) -> str:
6373
raise NotImplementedError
@@ -103,6 +113,10 @@ class AlbumInfo(Info):
103113
user items, and later to drive tagging decisions once selected.
104114
"""
105115

116+
@property
117+
def id(self) -> str | None:
118+
return self.album_id
119+
106120
@cached_property
107121
def name(self) -> str:
108122
return self.album or ""
@@ -179,6 +193,10 @@ class TrackInfo(Info):
179193
stand alone for singleton matching.
180194
"""
181195

196+
@property
197+
def id(self) -> str | None:
198+
return self.track_id
199+
182200
@cached_property
183201
def name(self) -> str:
184202
return self.title or ""

beets/autotag/match.py

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
from __future__ import annotations
2020

2121
from enum import IntEnum
22-
from typing import TYPE_CHECKING, Any, NamedTuple, TypeVar
22+
from typing import TYPE_CHECKING, NamedTuple, TypeVar
2323

2424
import lap
2525
import numpy as np
@@ -35,6 +35,11 @@
3535

3636
from beets.library import Item
3737

38+
from .hooks import Info
39+
40+
AnyMatch = TypeVar("AnyMatch", TrackMatch, AlbumMatch)
41+
Candidates = dict[Info.Identifier, AnyMatch]
42+
3843
# Global logger.
3944
log = logging.getLogger("beets")
4045

@@ -179,33 +184,33 @@ def _recommendation(
179184
return rec
180185

181186

182-
AnyMatch = TypeVar("AnyMatch", TrackMatch, AlbumMatch)
183-
184-
185187
def _sort_candidates(candidates: Iterable[AnyMatch]) -> Sequence[AnyMatch]:
186188
"""Sort candidates by distance."""
187189
return sorted(candidates, key=lambda match: match.distance)
188190

189191

190192
def _add_candidate(
191193
items: Sequence[Item],
192-
results: dict[Any, AlbumMatch],
194+
results: Candidates[AlbumMatch],
193195
info: AlbumInfo,
194196
):
195197
"""Given a candidate AlbumInfo object, attempt to add the candidate
196198
to the output dictionary of AlbumMatch objects. This involves
197199
checking the track count, ordering the items, checking for
198200
duplicates, and calculating the distance.
199201
"""
200-
log.debug("Candidate: {0.artist} - {0.album} ({0.album_id})", info)
202+
log.debug(
203+
"Candidate: {0.artist} - {0.album} ({0.album_id}) from {0.data_source}",
204+
info,
205+
)
201206

202207
# Discard albums with zero tracks.
203208
if not info.tracks:
204209
log.debug("No tracks.")
205210
return
206211

207212
# Prevent duplicates.
208-
if info.album_id and info.album_id in results:
213+
if info.album_id and info.identifier in results:
209214
log.debug("Duplicate.")
210215
return
211216

@@ -233,7 +238,7 @@ def _add_candidate(
233238
return
234239

235240
log.debug("Success. Distance: {}", dist)
236-
results[info.album_id] = hooks.AlbumMatch(
241+
results[info.identifier] = hooks.AlbumMatch(
237242
dist, info, dict(item_info_pairs), extra_items, extra_tracks
238243
)
239244

@@ -268,15 +273,15 @@ def tag_album(
268273
log.debug("Tagging {} - {}", cur_artist, cur_album)
269274

270275
# The output result, keys are the MB album ID.
271-
candidates: dict[Any, AlbumMatch] = {}
276+
candidates: Candidates[AlbumMatch] = {}
272277

273278
# Search by explicit ID.
274279
if search_ids:
275280
for search_id in search_ids:
276281
log.debug("Searching for album ID: {}", search_id)
277282
for _info in metadata_plugins.albums_for_ids(search_id):
278283
_add_candidate(items, candidates, _info)
279-
if opt_candidate := candidates.get(_info.album_id):
284+
if opt_candidate := candidates.get(_info.identifier):
280285
plugins.send("album_matched", match=opt_candidate)
281286

282287
# Use existing metadata or text search.
@@ -320,7 +325,7 @@ def tag_album(
320325
items, search_artist, search_name, va_likely
321326
):
322327
_add_candidate(items, candidates, matched_candidate)
323-
if opt_candidate := candidates.get(matched_candidate.album_id):
328+
if opt_candidate := candidates.get(matched_candidate.identifier):
324329
plugins.send("album_matched", match=opt_candidate)
325330

326331
log.debug("Evaluating {} candidates.", len(candidates))
@@ -345,7 +350,7 @@ def tag_item(
345350
"""
346351
# Holds candidates found so far: keys are MBIDs; values are
347352
# (distance, TrackInfo) pairs.
348-
candidates = {}
353+
candidates: Candidates[TrackMatch] = {}
349354
rec: Recommendation | None = None
350355

351356
# First, try matching by the external source ID.
@@ -355,7 +360,7 @@ def tag_item(
355360
log.debug("Searching for track ID: {}", trackid)
356361
for info in metadata_plugins.tracks_for_ids(trackid):
357362
dist = track_distance(item, info, incl_artist=True)
358-
candidates[info.track_id] = hooks.TrackMatch(dist, info)
363+
candidates[info.identifier] = hooks.TrackMatch(dist, info)
359364

360365
# If this is a good match, then don't keep searching.
361366
rec = _recommendation(_sort_candidates(candidates.values()))
@@ -381,7 +386,7 @@ def tag_item(
381386
item, search_artist, search_name
382387
):
383388
dist = track_distance(item, track_info, incl_artist=True)
384-
candidates[track_info.track_id] = hooks.TrackMatch(dist, track_info)
389+
candidates[track_info.identifier] = hooks.TrackMatch(dist, track_info)
385390

386391
# Sort by distance and return with recommendation.
387392
log.debug("Found {} candidates.", len(candidates))

test/autotag/test_match.py

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -162,41 +162,21 @@ def check_proposal(self, proposal):
162162
assert len(sources) == 2
163163
assert set(sources) == {"Discogs", "Deezer"}
164164

165-
@pytest.mark.xfail(
166-
reason="Same ID from different sources is considered a duplicate (#6181)",
167-
raises=AssertionError,
168-
strict=True,
169-
)
170165
def test_search_album_ids(self, shared_album_id):
171166
_, _, proposal = match.tag_album([Item()], search_ids=[shared_album_id])
172167

173168
self.check_proposal(proposal)
174169

175-
@pytest.mark.xfail(
176-
reason="Same ID from different sources is considered a duplicate (#6181)",
177-
raises=AssertionError,
178-
strict=True,
179-
)
180170
def test_search_album_current_id(self, shared_album_id):
181171
_, _, proposal = match.tag_album([Item(mb_albumid=shared_album_id)])
182172

183173
self.check_proposal(proposal)
184174

185-
@pytest.mark.xfail(
186-
reason="The last match wins",
187-
raises=AssertionError,
188-
strict=True,
189-
)
190175
def test_search_track_ids(self, shared_track_id):
191176
proposal = match.tag_item(Item(), search_ids=[shared_track_id])
192177

193178
self.check_proposal(proposal)
194179

195-
@pytest.mark.xfail(
196-
reason="The last match wins",
197-
raises=AssertionError,
198-
strict=True,
199-
)
200180
def test_search_track_current_id(self, shared_track_id):
201181
proposal = match.tag_item(Item(mb_trackid=shared_track_id))
202182

0 commit comments

Comments
 (0)