@@ -17,8 +17,6 @@ class LetrasSource : public LyricSourceRemote
1717 const GUID& id () const final { return src_guid; }
1818 std::tstring_view friendly_name () const final { return _T (" letras.com" ); }
1919
20- std::string extract_lyrics_from_page (pfc::string8 page_content) const ;
21-
2220 std::vector<LyricDataRaw> search (const LyricSearchParams& params, abort_callback& abort) final ;
2321 bool lookup (LyricDataRaw& data, abort_callback& abort) final ;
2422};
@@ -56,22 +54,6 @@ static std::string transform_artist_for_url(const std::string_view artist)
5654 return transform_tag_for_url (artist);
5755}
5856
59- std::string LetrasSource::extract_lyrics_from_page (pfc::string8 page_content) const
60- {
61- std::string lyric_text;
62- pugi::xml_document doc;
63- load_html_document (page_content.c_str (), doc);
64-
65- pugi::xpath_query query_lyricdivs (" //div[@class='lyric-original']" );
66- pugi::xpath_node_set lyricdivs = query_lyricdivs.evaluate_node_set (doc);
67- if (!lyricdivs.empty ())
68- {
69- add_all_text_to_string (lyric_text, lyricdivs.first ().node ());
70- }
71-
72- return lyric_text;
73- }
74-
7557std::vector<LyricDataRaw> LetrasSource::search (const LyricSearchParams& params, abort_callback& abort)
7658{
7759 http_request::ptr request = http_client::get ()->create_request (" GET" );
@@ -94,25 +76,45 @@ std::vector<LyricDataRaw> LetrasSource::search(const LyricSearchParams& params,
9476 return {};
9577 }
9678
97- const std::string lyric_text = extract_lyrics_from_page (content);
98- if (lyric_text.empty ())
79+ LyricDataRaw result = {};
80+ result.source_id = id ();
81+ result.source_path = url;
82+
83+ pugi::xml_document doc;
84+ load_html_document (content.c_str (), doc);
85+
86+ std::string lyric_text;
87+ pugi::xpath_query query_lyricdivs (" //div[@class='lyric-original']" );
88+ pugi::xpath_node_set lyricdivs = query_lyricdivs.evaluate_node_set (doc);
89+ if (!lyricdivs.empty ())
90+ {
91+ add_all_text_to_string (lyric_text, lyricdivs.first ().node ());
92+ result.text_bytes = string_to_raw_bytes (trim_surrounding_whitespace (lyric_text));
93+ }
94+
95+ pugi::xpath_query query_title_elem (" //div[@id='js-lyricHeader']//div[@class='title-content']//h1" );
96+ pugi::xpath_node_set title_element = query_title_elem.evaluate_node_set (doc);
97+ if (!title_element.empty ())
98+ {
99+ add_all_text_to_string (result.title , title_element.first ().node ());
100+ }
101+
102+ pugi::xpath_query query_artist_elem (" //div[@id='js-lyricHeader']//div[@class='title-content']//h2" );
103+ pugi::xpath_node_set artist_element = query_artist_elem.evaluate_node_set (doc);
104+ if (!artist_element.empty ())
105+ {
106+ add_all_text_to_string (result.artist , artist_element.first ().node ());
107+ }
108+
109+ if (result.text_bytes .empty () || result.artist .empty () || result.title .empty ())
99110 {
100111 throw new std::runtime_error (" Failed to parse lyrics, the page format may have changed" );
101112 }
102113 else
103114 {
104115 LOG_INFO (" Successfully retrieved lyrics from %s" , url.c_str ());
105- const std::string_view trimmed_text = trim_surrounding_whitespace (lyric_text);
106-
107- LyricDataRaw result = {};
108- result.source_id = id ();
109- result.source_path = url;
110- result.artist = params.artist ;
111- result.album = params.album ;
112- result.title = params.title ;
113- result.text_bytes = string_to_raw_bytes (trimmed_text);
114116
115- const LyricData parsed = parsers::lrc::parse (result, trimmed_text );
117+ const LyricData parsed = parsers::lrc::parse (result, lyric_text );
116118 result.type = parsed.IsTimestamped () ? LyricType::Synced : LyricType::Unsynced;
117119 return {std::move (result)};
118120 }
0 commit comments