-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapp.py
More file actions
134 lines (107 loc) · 4.71 KB
/
app.py
File metadata and controls
134 lines (107 loc) · 4.71 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import logging
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
def fetch_nba_players():
"""
Fetch current NBA players from basketball-reference.com
Returns a list of player names in firstname-lastname format
"""
try:
# Use basketball-reference.com as it's regularly updated
url = "https://www.basketball-reference.com/leagues/NBA_2024_per_game.html"
# Add headers to mimic browser request
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
# Fetch the page
response = requests.get(url, headers=headers)
response.raise_for_status()
# Parse the HTML
soup = BeautifulSoup(response.content, 'html.parser')
# Find the player table
player_table = soup.find('table', id='per_game_stats')
if not player_table:
raise ValueError("Could not find player statistics table")
# Extract player names
players = []
rows = player_table.find_all('tr', class_=lambda x: x != 'thead')
for row in rows:
name_cell = row.find('td', {'data-stat': 'player'})
if name_cell and name_cell.a: # Only get players with links (active players)
full_name = name_cell.a.text
# Process the name
# Handle special cases like "P.J. Tucker" or "R.J. Barrett"
if '.' in full_name:
parts = full_name.split()
if len(parts) == 2: # Like "P.J. Tucker"
firstname = parts[0].replace('.', '')
lastname = parts[1]
else: # Like "R.J. Hampton"
firstname = parts[0].replace('.', '')
lastname = ' '.join(parts[2:])
else:
# Split into first and last name
parts = full_name.split()
firstname = parts[0]
lastname = ' '.join(parts[1:])
# Clean and format the name
formatted_name = f"{firstname}-{lastname}".lower()
formatted_name = formatted_name.replace(' ', '-') # Handle multi-word last names
formatted_name = formatted_name.replace("'", '') # Remove apostrophes
formatted_name = formatted_name.replace(".", '') # Remove any remaining periods
players.append({
'full_name': full_name,
'formatted_name': formatted_name
})
# Convert to DataFrame for easier handling
df = pd.DataFrame(players).drop_duplicates()
# Sort by full name
df = df.sort_values('full_name')
# Save to CSV for reference
df.to_csv('nba_players.csv', index=False)
# Print some stats
logging.info(f"Found {len(df)} NBA players")
# Return the formatted names as a list
return df['formatted_name'].tolist()
except requests.exceptions.RequestException as e:
logging.error(f"Error fetching data: {e}")
return []
except Exception as e:
logging.error(f"Error processing data: {e}")
return []
def display_sample_names(players, num_samples=10):
"""Display a sample of player names for verification"""
if players:
logging.info("\nSample of formatted names:")
for player in sorted(players)[:num_samples]:
print(player)
def main():
start_time = time.time()
# Fetch players
players = fetch_nba_players()
if players:
# Display some statistics
logging.info(f"\nTotal players found: {len(players)}")
# Show sample of names
display_sample_names(players)
# Write the formatted list to a Python file
with open('nba_players_list.py', 'w') as f:
f.write("NBA_PLAYERS = [\n")
for player in sorted(players):
f.write(f" \"{player}\",\n")
f.write("]\n")
logging.info(f"\nPlayer list has been saved to 'nba_players_list.py'")
logging.info(f"Full player data has been saved to 'nba_players.csv'")
else:
logging.error("No players were found")
execution_time = time.time() - start_time
logging.info(f"\nExecution time: {execution_time:.2f} seconds")
if __name__ == "__main__":
main()