-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbar-chart-data.py
More file actions
114 lines (99 loc) · 4.7 KB
/
bar-chart-data.py
File metadata and controls
114 lines (99 loc) · 4.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
### Dependencies
import pandas as pd
### Import data from CSV
df = pd.read_csv('datasets/positive-cases/conposcovidloc.csv')
print('Loading...')
### Get Age-Gender Combinations (Method 1)
gender_age_list = []
for index, row in df.iterrows(): # Get all the existing unique combinations of age and gender
list_search = [row['Age_Group'],row['Client_Gender']]
if list_search not in gender_age_list:
gender_age_list.append(list_search)
dfAB = pd.DataFrame.from_records(gender_age_list)
### Data clean-up & Formatting (1)
dfAB.columns = ['Age_Group', 'Client_Gender']
dfAB = dfAB.sort_values(by=['Client_Gender', 'Age_Group'], ascending=(True, True))
dfAB = dfAB.reset_index(drop=True)
### Distribute Combination Icons
icon_list = []
link_path = 'https://raw.githubusercontent.com/Eldrick19/ontario-covid19-analysis/master/img/bar-chart-race/'
for index, row in dfAB.iterrows():
if row['Age_Group'] == '<20' and row['Client_Gender'] == 'FEMALE':
icon_list.append(link_path + 'minor-f.png')
elif row['Age_Group'] == '<20' and row['Client_Gender'] == 'MALE':
icon_list.append(link_path + 'minor-m.png')
elif row['Age_Group'] == '20s' and row['Client_Gender'] == 'FEMALE':
icon_list.append(link_path + '20-f.png')
elif row['Age_Group'] == '20s' and row['Client_Gender'] == 'MALE':
icon_list.append(link_path + '20-m.png')
elif row['Age_Group'] == '30s' and row['Client_Gender'] == 'FEMALE':
icon_list.append(link_path + '30-f.png')
elif row['Age_Group'] == '30s' and row['Client_Gender'] == 'MALE':
icon_list.append(link_path + '30-m.png')
elif row['Age_Group'] == '40s' and row['Client_Gender'] == 'FEMALE':
icon_list.append(link_path + 'middleaged-f.png')
elif row['Age_Group'] == '40s' and row['Client_Gender'] == 'MALE':
icon_list.append(link_path + 'middleaged-m.png')
elif row['Age_Group'] == '50s' and row['Client_Gender'] == 'FEMALE':
icon_list.append(link_path + 'middleaged-f.png')
elif row['Age_Group'] == '50s' and row['Client_Gender'] == 'MALE':
icon_list.append(link_path + 'middleaged-m.png')
elif row['Age_Group'] == '60s' and row['Client_Gender'] == 'FEMALE':
icon_list.append(link_path + 'elderly-f.png')
elif row['Age_Group'] == '60s' and row['Client_Gender'] == 'MALE':
icon_list.append(link_path + 'elderly-m.png')
elif row['Age_Group'] == '70s' and row['Client_Gender'] == 'FEMALE':
icon_list.append(link_path + 'elderly-f.png')
elif row['Age_Group'] == '70s' and row['Client_Gender'] == 'MALE':
icon_list.append(link_path + 'elderly-m.png')
elif row['Age_Group'] == '80s' and row['Client_Gender'] == 'FEMALE':
icon_list.append(link_path + 'elderly-f.png')
elif row['Age_Group'] == '80s' and row['Client_Gender'] == 'MALE':
icon_list.append(link_path + 'elderly-m.png')
elif row['Age_Group'] == '90s' and row['Client_Gender'] == 'FEMALE':
icon_list.append(link_path + 'elderly-f.png')
elif row['Age_Group'] == '90s' and row['Client_Gender'] == 'MALE':
icon_list.append(link_path + 'elderly-m.png')
else:
icon_list.append('')
### Data clean-up & Formatting (2)
dfC = pd.DataFrame(icon_list)
dfC.columns = ['IMG_URL']
### Merge Columns A, B & C
dfABC = pd.concat([dfAB, dfC], axis=1)
### Add date columns
dfABCZ = dfABC
dfbyDate = df.sort_values(by='Accurate_Episode_Date', ascending=True)
dfbyDate = dfbyDate.reset_index(drop=True)
values_dict = {}
prev_date = 0
for index, row in dfbyDate.iterrows():
age_group = row['Age_Group']
gender = row['Client_Gender']
date = row['Accurate_Episode_Date']
if not (isinstance(age_group, str) and isinstance(gender, str) and isinstance(date, str)):
print('Null value found.')
continue
print('Age Group: ', age_group, 'Gender: ', gender, 'Index:', index)
key = age_group + gender
if key not in values_dict:
values_dict[key] = 1
else:
values_dict[key] += 1
if date not in dfbyDate.columns:
dfABCZ[date] = prev_date
age_condition = dfABCZ['Age_Group'] == age_group
gender_condition = dfABCZ['Client_Gender'] == gender
dfABCZ.loc[age_condition & gender_condition, date] = values_dict[key]
else:
age_condition = dfABCZ['Age_Group'] == age_group
gender_condition = dfABCZ['Client_Gender'] == gender
dfABCZ.loc[age_condition & gender_condition, date] = values_dict[key]
prev_date = dfABCZ[date]
### Data clean-up & Formatting (3)
dfABCZ = dfABCZ[dfABCZ.Client_Gender != 'UNKNOWN']
dfABCZ = dfABCZ[dfABCZ.Client_Gender != 'OTHER']
dfABCZ = dfABCZ[dfABCZ.Client_Gender != '(blank)']
### Export to CSV
dfABCZ.to_csv('bar-chart-race-output.csv', index=False)
print('Done!')