33import sys
44from multiprocessing import Pool , freeze_support
55
6- import matplotlib .pyplot as plot
76import requests
87from bs4 import BeautifulSoup
98from unidecode import unidecode
109from wordcloud import WordCloud
1110
12- from constants import ARTIST_RE , CLEAN_PUNC_RE , COMBINED_STOPWORDS , HTML_TAG_RE , LYRIC_CLASS , SECTION_RE
11+ from artistwordcloud .constants import (
12+ ARTIST_RE ,
13+ CLEAN_PUNC_RE ,
14+ COMBINED_STOPWORDS ,
15+ HTML_TAG_RE ,
16+ LYRIC_CLASS ,
17+ SECTION_RE ,
18+ )
1319
1420
15- def remove_fluff (element ) -> str :
21+ def _remove_fluff (element ) -> str :
1622 """
1723 Removes html tags, section names, and additional non-lyric text from lyrics
1824 """
@@ -32,27 +38,37 @@ def build_artist_page(artist_name: str) -> str:
3238 return base_url + constructed_url + "/songs"
3339
3440
35- def build_song_links ( artist_page : str , artist_name : str ) -> list :
41+ def find_api ( page : str , name : str ) -> str :
3642 """
37- Compiles a list of song links associated to a particular artist
38- Pulls data from Genius's API
43+ Finds the api key for an artist on their artist page
3944 """
40- response = requests .get (artist_page )
45+ response = requests .get (page )
4146 candidates = re .findall (r"artists/[0-9]+" , response .text )
4247 api_string = ""
4348 for candidate in candidates :
4449 content = requests .get (f"https://genius.com/api/{ candidate } " ).json ()
45- if (unidecode (re .sub (r"\W" , "" , artist_name .lower ())) in
46- unidecode (re .sub (r"\W" , "" , content ["response" ]["artist" ]["name" ].lower ()))):
50+ if unidecode (re .sub (r"\W" , "" , name .lower ())) in unidecode (
51+ re .sub (r"\W" , "" , content ["response" ]["artist" ]["name" ].lower ())
52+ ):
4753 api_string = re .sub (r"artists/" , "" , candidate )
48- break
54+ return api_string
55+ return api_string
56+
57+
58+ def _build_song_links (artist_page : str , artist_name : str ) -> list :
59+ """
60+ Compiles a list of song links associated to a particular artist
61+ Pulls data from Genius's API
62+ """
63+ api_string = find_api (artist_page , artist_name )
4964 if api_string == "" :
5065 raise ValueError ()
5166 print (
5267 "Collecting links...\n Depending on the size of the artist's library, this may take a while..."
5368 )
5469 content = requests .get (
55- f"https://genius.com/api/artists/{ api_string } /songs?page=1&per_page=20&sort=popularity&text_format=html" ).json ()
70+ f"https://genius.com/api/artists/{ api_string } /songs?page=1&per_page=20&sort=popularity&text_format=html"
71+ ).json ()
5672 link_list = []
5773 while True :
5874 for entry in content ["response" ]["songs" ]:
@@ -68,7 +84,7 @@ def build_song_links(artist_page: str, artist_name: str) -> list:
6884 return link_list
6985
7086
71- def process_lyrics (url : str ) -> str :
87+ def _process_lyrics (url : str ) -> str :
7288 """
7389 Processes the lyrics for a particular webpage and returns them as a nicely formatted string
7490 This function is called by convert_lyrics as part of a multiprocessing pool
@@ -77,12 +93,12 @@ def process_lyrics(url: str) -> str:
7793 soup = BeautifulSoup (response .text , "html.parser" )
7894 lyrics_elements = soup .find_all ("div" , class_ = LYRIC_CLASS )
7995 portions = [
80- remove_fluff (item .decode_contents ().lower ()) for item in lyrics_elements
96+ _remove_fluff (item .decode_contents ().lower ()) for item in lyrics_elements
8197 ]
8298 return " " .join (re .sub (r"\s+" , " " , portion ) for portion in portions )
8399
84100
85- def convert_lyrics (song_links : list [str ]) -> str :
101+ def _convert_lyrics (song_links : list [str ]) -> str :
86102 """
87103 Processes the content of the webpage links lists into neatly formatted lyrics strings.
88104 """
@@ -91,17 +107,39 @@ def convert_lyrics(song_links: list[str]) -> str:
91107 print ("This may take a while..." )
92108 # Multiprocess lyrics
93109 with Pool () as pool :
94- data_set = pool .map (process_lyrics , song_links )
110+ data_set = pool .map (_process_lyrics , song_links )
95111 pool .close ()
96112 return " " .join (data_set )
97113
98114
99- def build_cloud (data_set : str ) -> None :
115+ def _build_cloud (data_set : str ) -> None :
100116 """
101117 Processes the string into a word cloud, which are saved as .png files.
102118 Files are named after the artist as they appear in the Genius links
103119 """
104120 print ("Generating word cloud..." )
121+ output_name = re .sub (ARTIST_RE , "" , unidecode (artist ).replace (" " , "-" ).lower ())
122+ try :
123+ WordCloud (
124+ width = 1080 ,
125+ height = 1080 ,
126+ background_color = "black" ,
127+ stopwords = COMBINED_STOPWORDS ,
128+ min_font_size = 8 ,
129+ max_words = 150 ,
130+ relative_scaling = 0.7 ,
131+ ).generate (unidecode (data_set )).to_file (f"{ output_name } .png" )
132+ print (f"Saved word cloud as { output_name } .png!" )
133+ except OSError :
134+ print (
135+ f"Could not save { output_name } .png\n You may not have access to write in this directory."
136+ )
137+
138+
139+ def _export_cloud (data_set : str ) -> WordCloud :
140+ """
141+ Processes the string into a word cloud, returning the cloud
142+ """
105143 wordcloud = WordCloud (
106144 width = 1080 ,
107145 height = 1080 ,
@@ -111,23 +149,23 @@ def build_cloud(data_set: str) -> None:
111149 max_words = 150 ,
112150 relative_scaling = 0.7 ,
113151 ).generate (unidecode (data_set ))
114- plot . figure ( figsize = ( 8 , 8 ), facecolor = None )
115- plot . imshow ( wordcloud )
116- plot . axis ( "off" )
117- plot . tight_layout ( pad = 0 )
118- output_name = re . sub ( ARTIST_RE , '' , unidecode (artist ). replace ( ' ' , '-' ). lower () )
152+ return wordcloud
153+
154+
155+ def cloud_hook ( artist_name : str ) -> WordCloud or None :
156+ decode_artist = unidecode (artist_name )
119157 try :
120- plot . savefig ( fname = f"./OutputClouds/ { output_name } .png" )
121- print ( f"Saved word cloud as { output_name } .png!" )
122- except OSError :
123- print ( f"Could not save { output_name } .png \n You may not have access to write in this directory." )
158+ links = _build_song_links ( build_artist_page ( decode_artist ), decode_artist )
159+ return _export_cloud ( _convert_lyrics ( links ) )
160+ except ValueError :
161+ return None
124162
125163
126164if __name__ == "__main__" :
127165 freeze_support ()
128166 cmd_args = sys .argv [1 :]
129167 try :
130- os .mkdir ("./OutputClouds/" )
168+ os .mkdir (".. /OutputClouds/" )
131169 except FileExistsError :
132170 pass
133171 if len (cmd_args ) == 0 :
@@ -136,8 +174,11 @@ def build_cloud(data_set: str) -> None:
136174 try :
137175 if artist == "" :
138176 break
139- song_list = build_song_links (build_artist_page (unidecode (artist )), unidecode (artist ))
140- build_cloud (convert_lyrics (song_list ))
177+ decoded_artist = unidecode (artist )
178+ song_list = _build_song_links (
179+ build_artist_page (decoded_artist ), decoded_artist
180+ )
181+ _build_cloud (_convert_lyrics (song_list ))
141182 except ValueError :
142183 artist = input (
143184 f"Artist { artist } could not be found on Genius.\n "
@@ -149,8 +190,11 @@ def build_cloud(data_set: str) -> None:
149190 for artist in artists :
150191 try :
151192 print (f"\n \n Current artist: { artist } " )
152- song_list = build_song_links (build_artist_page (unidecode (artist )), unidecode (artist ))
153- build_cloud (convert_lyrics (song_list ))
193+ decoded_artist = unidecode (artist )
194+ song_list = _build_song_links (
195+ build_artist_page (decoded_artist ), decoded_artist
196+ )
197+ _build_cloud (_convert_lyrics (song_list ))
154198 except ValueError :
155199 print (
156200 f"Artist { artist } could not be found on Genius. "
0 commit comments