-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfunctions.py
More file actions
71 lines (58 loc) · 2.12 KB
/
functions.py
File metadata and controls
71 lines (58 loc) · 2.12 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import requests # To make get calls to API
from bs4 import BeautifulSoup # Helps make parsing html files much easier
import re, string, unicodedata
import nltk
import contractions
import inflect
from nltk import word_tokenize, sent_tokenize
from nltk.corpus import stopwords
from nltk.stem import LancasterStemmer, WordNetLemmatizer
# This token was granted through the use of Genius's website and allows us to make calls to the API
client_access_token = "_mDTViAvedsTbhpMuhLml-2gkMw1DU8tKQRnqmFkQDXT2aypVvCg67wDEr4eqBIs"
# Get artist object from Genius API
def request_artist_info(artist_name, page):
base_url = 'https://api.genius.com'
headers = {'Authorization': 'Bearer ' + client_access_token}
search_url = base_url + '/search?per_page=10&page=' + str(page)
data = {'q': artist_name}
response = requests.get(search_url, data=data, headers=headers)
return response
# Get Genius.com song url's from artist object
def request_song_url(artist_name, song_cap):
page = 1
songs = []
while True:
response = request_artist_info(artist_name, page)
json = response.json()
# Collect up to song_cap song objects from artist
song_info = []
for hit in json['response']['hits']:
if artist_name.lower() in hit['result']['primary_artist']['name'].lower():
song_info.append(hit)
# Collect song URL's from song objects
for song in song_info:
if (len(songs) < song_cap):
url = song['result']['url']
songs.append(url)
if (len(songs) == song_cap):
break
else:
page += 1
print('Found {} songs by {}'.format(len(songs), artist_name))
return songs
def clean_lyrics(test_str):
ret = ''
skip1c = 0
skip2c = 0
for i in test_str:
if i == '[':
skip1c += 1
elif i == '(':
skip2c += 1
elif i == ']' and skip1c > 0:
skip1c -= 1
elif i == ')'and skip2c > 0:
skip2c -= 1
elif skip1c == 0 and skip2c == 0:
ret += i
return ret