diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index d6e14c175f..cd73f021d8 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -190,17 +190,24 @@ def format_url(url: str, params: JSONDict | None) -> str: return f"{url}?{urlencode(params)}" def get_text( - self, url: str, params: JSONDict | None = None, **kwargs + self, + url: str, + params: JSONDict | None = None, + force_utf8: bool = False, + **kwargs, ) -> str: """Return text / HTML data from the given URL. - Set the encoding to None to let requests handle it because some sites - set it incorrectly. + Set encoding to None to let requests auto-detect (works for most sites). + For Genius, force UTF-8 to avoid MacRoman misdetection. """ url = self.format_url(url, params) self.debug("Fetching HTML from {}", url) r = self.get(url, **kwargs) - r.encoding = None + if force_utf8: + r.encoding = r.encoding or "utf-8" + else: + r.encoding = None return r.text def get_json(self, url: str, params: JSONDict | None = None, **kwargs): @@ -544,6 +551,16 @@ class Genius(SearchBackend): def headers(self) -> dict[str, str]: return {"Authorization": f"Bearer {self.config['genius_api_key']}"} + def get_text( + self, + url: str, + params: JSONDict | None = None, + force_utf8: bool = True, + **kwargs, + ) -> str: + """Force UTF-8 encoding for Genius to avoid MacRoman misdetection.""" + return super().get_text(url, params, force_utf8=force_utf8, **kwargs) + def search(self, artist: str, title: str) -> Iterable[SearchResult]: search_data: GeniusAPI.Search = self.get_json( self.SEARCH_URL, @@ -557,7 +574,10 @@ def search(self, artist: str, title: str) -> Iterable[SearchResult]: def scrape(cls, html: str) -> str | None: if m := cls.LYRICS_IN_JSON_RE.search(html): html_text = cls.remove_backslash(m[0]).replace(r"\n", "\n") - return cls.get_soup(html_text).get_text().strip() + lyrics = cls.get_soup(html_text).get_text().strip() + # Genius embeds lyrics in JSON; escape sequences remain after parsing + lyrics = re.sub(r'\\+"', '"', lyrics) + return lyrics return None diff --git a/test/plugins/lyrics_pages.py b/test/plugins/lyrics_pages.py index 15cb812a10..83f14e07b7 100644 --- a/test/plugins/lyrics_pages.py +++ b/test/plugins/lyrics_pages.py @@ -576,4 +576,172 @@ def backend(self) -> str: """, marks=[pytest.mark.xfail(reason="Tekstowo seems to be broken again")], ), + LyricsPage.make( + "https://genius.com/Caparezza-argenti-vive-lyrics", + """ + Ciao Dante, ti ricordi di me? Sono Filippo Argenti + Il vicino di casa che nella Commedia ponesti tra questi violenti + Sono quello che annega nel fango, pestato dai demoni intorno + Cos'è, vuoi provocarmi, sommo? Puoi solo provocarmi sonno! + """, + artist="Caparezza", + track_title="Argenti vive", + marks=[xfail_on_ci("Genius returns 403 FORBIDDEN in CI")], + ), + LyricsPage.make( + "https://genius.com/Arctic-monkeys-r-u-mine-lyrics", + """ + I go crazy 'cause here isn't where I wanna be + And satisfaction feels like a distant memory + And I can't help myself, all I + Wanna hear her say is "Are you mine?" + """, + artist="Arctic Monkeys", + track_title="R U Mine?", + marks=[xfail_on_ci("Genius returns 403 FORBIDDEN in CI")], + ), + LyricsPage.make( + "https://www.azlyrics.com/lyrics/derivakat/bountyhunter.html", + """ + [Romanized:] + + Living legend made of myths and made of stories + But I'm present, find the mark and I'll take the glory + Hit the dead-end, cornered like an animal, it just + Takes one second, I'm nothing if not practical + But I don't really care if you're right or you're wrong + Bùguǎn nǐ de wùhuì or you're just a disgrace + Get in the zone when I put on my song + Bìrán de xiànshí when I'm up in your face + Zhēn tài kě qíng kùn zài lóng lǐ de nǐ + But you'll find in the end that I'll leave you erased + (If they call?) I'm the one + (Got a job?) Get it done + All alone, you and me, I'll be leaving no trace + + No place where I won't go to + No limits I won't break through + No godless deed I won't do + If they got problems, then they know to + + Call the bounty hunter + Yeah, they got my number + I'm the problem solver + Leave you six feet under + You won't see another + Winter, spring or summer + Wǒ huì ràng nǐ xiāoshī + No way to recover (Hey!) + + Run it up + Night or day (Hey!) + Hunt you down + Run away + + No mercy from this mercenary + Take away your sanctuary + Shìlì jiù xiàng 20/20 + Way I see you'll end up buried + Don't deviate + Zhǎodào nǐ so I terminate + Wúlùn, wǒ huì make you pay + I'll double tap you like bang, bang + + (Dead lilac) A ghost you only heard in rumor + (Dead lilac) But only if you heard me sooner + (Dead lilac) Losing your chances and losing your time + (Dead lilac) shìjiè wèilái méi nǐ de cúnzài (Yeah) + + No place where I won't go to (I won't go to) + No limits I won't break through (I won't break through) + No godless deed I won't do (That I won't do) + If they got problems, then they know to + + Call the bounty hunter (Hey) + Yeah, they got my number + I'm the problem solver (Hey) + Leave you six feet under + You won't see another (Hey) + Winter, spring or summer + Wǒ huì ràng nǐ xiāoshī (Hey) + No way to recover (Hey!) + + Run it up (Run it up) + Night or day (Hey!) (Day) + Hunt you down (Hunt you down) + Run away + + [English/Chinese:] + + Living legend made of myths and made of stories + But I'm present, find the mark and I'll take the glory + Hit the dead-end, cornered like an animal, it just + Takes one second, I'm nothing if not practical + But I don't really care if you're right or you're wrong + 不管你的误会 or you're just a disgrace + Get in the zone when I put on my song + 必然的现实 when I'm up in your face + 真太可情 困在笼里的你 + But you'll find in the end that I'll leave you erased + (If they call?) I'm the one + (Got a job?) Get it done + All alone, you and me, I'll be leaving no trace + + No place where I won't go to + No limits I won't break through + No godless deed I won't do + If they got problems, then they know to + + Call the bounty hunter + Yeah, they got my number + I'm the problem solver + Leave you six feet under + You won't see another + Winter, spring or summer + 我会让你消失 + No way to recover (Hey!) + + Run it up + Night or day (Hey!) + Hunt you down + Run away + + No mercy from this mercenary + Take away your sanctuary + 视力就像 20/20 + Way I see you'll end up buried + Don't deviate + 找到你 so I terminate + 无论, 我会 make you pay + I'll double tap you like bang, bang + + (Dead lilac) A ghost you only heard in rumor + (Dead lilac) But only if you heard me sooner + (Dead lilac) Losing your chances and losing your time + (Dead lilac) 世界未来没你的存在 (Yeah) + + No place where I won't go to (I won't go to) + No limits I won't break through (I won't break through) + No godless deed I won't do (That I won't do) + If they got problems, then they know to + + Call the bounty hunter (Hey) + Yeah, they got my number + I'm the problem solver (Hey) + Leave you six feet under + You won't see another (Hey) + Winter, spring or summer + 我会让你消失 (Hey) + No way to recover (Hey!) + + Run it up (Run it up) + Night or day (Hey!) (Day) + Hunt you down (Hunt you down) + Run away + """, + artist="Derivakat", + track_title="Bounty Hunter", + url_title="Derivakat - Bounty Hunter Lyrics | AZLyrics.com", + marks=[xfail_on_ci("AZLyrics is blocked by Cloudflare")], + ), ]