-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathinsert_data_helper.py
More file actions
79 lines (66 loc) · 3.09 KB
/
insert_data_helper.py
File metadata and controls
79 lines (66 loc) · 3.09 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
from wiki_scraping import *
from datetime import datetime
def gather_player_info(player_name, player_info):
known_name = player_name
date_end_index = 0
try:
full_name = process_text(player_info['Full name'])
except Exception as e:
logging.warning(f"different-biography table {e}.")
full_name = known_name
try:
date_str = process_text(player_info['Date of birth'])
date_match = re.search(r'(\d{1,2} \w+ \d{4})', date_str)
date_of_birth = datetime.strptime(date_match.group(1), '%d %B %Y') if date_match else None
except Exception as e:
logging.warning(f"date of birth error {e}")
try:
date_match = re.search(r'(\d{4}-\d{2}-\d{2})', process_text((player_info['Born'])))
date_of_birth = datetime.strptime(date_match.group(1), '%Y-%m-%d') if date_match else None
date_end_index = date_match.end() if date_match else 0
except KeyError as e:
logging.warning(f"Lol, Gabriel the Saint clause")
date_of_birth = None
try:
place_of_birth = process_text(player_info['Place of birth'])
nationality = place_of_birth.split(',')[-1].strip()
place_of_birth = place_of_birth.rsplit(',', 1)[0].strip()
place_of_birth = re.sub(r',\s*', ', ', place_of_birth)
except Exception as e:
logging.warning(f"place of birth error {e}")
try:
place_of_birth = process_text(player_info['Born'])[date_end_index:]
nationality = place_of_birth.split(',')[-1]
place_match = re.search(r'([^,()]+)\s*,', place_of_birth)
place_of_birth = place_match.group(1).strip() if place_match else None
except:
place_of_birth = None
nationality = None
try:
height = extract_height_in_metres(player_info["Height"], player_name)
except KeyError:
height = None
return known_name, full_name, date_of_birth, place_of_birth, nationality, height
def gather_position_info(player_info):
# positions
position_keys = ["Position", "Position(s)", "Positions"]
for key in position_keys:
if key in player_info:
position = key
break
if not position:
return [], []
position = process_text(player_info[position])
positions = re.split(r'\s*,\s*', position)
position_ids = []
return positions, position_ids
def gather_club_info(season_recap):
season, team, league, league_apps, league_goals, all_apps, all_goals = season_recap
league_apps, all_apps = fix_numbers(league_apps), fix_numbers(all_apps)
league_goals, all_goals = fix_numbers(league_goals), fix_numbers(all_goals)
return season, team, league, league_apps, league_goals, all_apps, all_goals
def gather_intl_info(year_recap):
year, nation, competitive_apps, competitive_goals, caps, goals = year_recap
competitive_apps, caps = fix_numbers(competitive_apps), fix_numbers(caps)
competitive_goals, goals = fix_numbers(competitive_goals), fix_numbers(goals)
return year, nation, competitive_apps, competitive_goals, caps, goals