-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathcollect_from_api.py
More file actions
119 lines (81 loc) · 3.49 KB
/
collect_from_api.py
File metadata and controls
119 lines (81 loc) · 3.49 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
### Collect match, team, and player data from https://api.footystats.org ###
import requests
import pandas as pd
import json
import time
import os
## CONSTANTS ##
URL = "https://api.footystats.org" # non-changing base url
# examples = "England Premier League", "Germany Bundesliga", "Spain La Liga", etc.
leagueName = "USA MLS"
apiKey = '3c87c3773c4ff88f7cbdc8f44e4ced63130a5756031056c90b9025b5ffe62407'
## FUNCTIONS ##
# Find season id's for specified league(s), league name must be looked up beforehand
def getSeasonIds(leagueName):
response = requests.get(URL + '/league-list', params={'key': apiKey})
leagues = response.json()['data']
seasonIds = []
for league in leagues:
if league['name'] == leagueName:
for season in league['season']:
seasonIds.append(season['id'])
break # no need to keep searching league list
return seasonIds
def getMatchIds(season_id):
matchIds = []
response = requests.get(URL + '/league-matches', params={'key': apiKey, 'season_id': season_id, 'max_per_page': 500})
seasonMatches = response.json()['data']
for match in seasonMatches:
matchIds.append(match['id'])
return matchIds
def getMatchDetails(matchIds):
matchDetails = []
#count = 0
for match_id in matchIds:
time.sleep(2) #sleep for two seconds because of 30-60 api calls/minute limit
response = requests.get(URL + '/match', params={'key': apiKey, 'match_id': match_id})
matchDetails.append(response.json()['data'])
#if count > 2:
# break
#else:
# count = count + 1
return matchDetails
def getTeams(seasonId):
response = requests.get(URL + '/league-teams', params={'key': apiKey, 'season_id': seasonId})
teams = response.json()['data']
return teams
def getPlayers(seasonId):
allPlayers = []
response = requests.get(URL + '/league-players', params={'key': apiKey, 'season_id': seasonId})
pages = response.json()['pager']['max_page']
for page in range(1, pages+1): #must manually loop through pages
response = requests.get(URL + '/league-players', params={'key': apiKey, 'season_id': seasonId, 'page': page})
players = response.json()['data']
allPlayers = allPlayers + players
return allPlayers
def produceCSV(jsonData, fileName):
df = pd.json_normalize(jsonData)
outdir = "./footy_api_csv_data/{}".format(leagueName)
if not os.path.exists(outdir): #check if directory exists and make it if it doesn't
os.mkdir(outdir)
fullname = os.path.join(outdir, fileName)
df.to_csv(fullname, index = False)
return True
## MAIN CODE ##
seasonIds = getSeasonIds(leagueName) #get season id's for specified league
#print(seasonIds)
#allSeasons = pd.DataFrame() #empty data frame to populate
for season_id in seasonIds:
matchIds = getMatchIds(season_id)
#print(matchIds)
matchDetails = getMatchDetails(matchIds)
#allSeasons = allSeasons.append(df) #append to sum dataframe
teams = getTeams(season_id)
players = getPlayers(season_id)
# Produce CSV's #
produceCSV(matchDetails, "{}.csv".format(season_id))
produceCSV(teams, "{}_teams.csv".format(season_id))
produceCSV(players, "{}_players.csv".format(season_id))
print("finished season {}".format(season_id))
#allSeasons.to_csv(outdir + '/all_seasons.csv', index=False)
print("Done creating csv's for all {} seasons of the {}".format(len(seasonIds), leagueName))