|
| 1 | +import streamlit as st |
| 2 | +import pandas as pd |
| 3 | +import preprocessor,helper |
| 4 | +import plotly.express as px |
| 5 | +import matplotlib.pyplot as plt |
| 6 | +import seaborn as sns |
| 7 | +import plotly.figure_factory as ff |
| 8 | + |
| 9 | +##def main(): |
| 10 | + |
| 11 | +df = pd.read_csv('athlete_events.csv') |
| 12 | +region_df = pd.read_csv('noc_regions.csv') |
| 13 | + |
| 14 | +df = preprocessor.preprocess(df, region_df) |
| 15 | + |
| 16 | +user_menu = st.sidebar.radio( |
| 17 | + 'Select an Option', |
| 18 | + ('Medal Tally','Overall Analysis','Country-wise Analysis','Athlete wise Analysis') |
| 19 | +) |
| 20 | + |
| 21 | +st.sidebar.title("Olympics Analysis") |
| 22 | +st.sidebar.image('https://e7.pngegg.com/pngimages/1020/402/png-clipart-2024-summer-olympics-brand-circle-area-olympic-rings-olympics-logo-text-sport.png') |
| 23 | +if user_menu == 'Medal Tally': |
| 24 | + st.sidebar.header("Medal Tally") |
| 25 | + years, country = helper.country_year_list(df) |
| 26 | + |
| 27 | + selected_year = st.sidebar.selectbox("Select Year",years) |
| 28 | + selected_country = st.sidebar.selectbox("Select Country", country) |
| 29 | + |
| 30 | + medal_tally = helper.fetch_medal_tally(df,selected_year,selected_country) |
| 31 | + if selected_year == 'Overall' and selected_country =='Overall': |
| 32 | + st.title("Overall Tally") |
| 33 | + if selected_year!='Overall' and selected_country=="Overall": |
| 34 | + st.title("Medal Tally in " + str(selected_year) + " Olympics ") |
| 35 | + if selected_year=="Overall" and selected_country != "overall": |
| 36 | + st.title(selected_country + " Overall Performance ") |
| 37 | + if selected_year!='Overall' and selected_country!= "overall": |
| 38 | + st.title(selected_country + " Performance in " + str(selected_year) + " Olympics ") |
| 39 | + |
| 40 | + st.table(medal_tally) |
| 41 | + |
| 42 | +if user_menu =='Overall Analysis': |
| 43 | + editions=df['Year'].unique().shape[0]-1 |
| 44 | + cities=df['City'].unique().shape[0] |
| 45 | + sports=df['Sport'].unique().shape[0] |
| 46 | + events=df['Event'].unique().shape[0] |
| 47 | + athletes=df['Name'].unique().shape[0] |
| 48 | + nations=df['region'].unique().shape[0] |
| 49 | + |
| 50 | + st.title(" Top Statistics") |
| 51 | + col1,col2,col3=st.columns(3) |
| 52 | + with col1: |
| 53 | + st.header(" Edition ") |
| 54 | + st.title(editions) |
| 55 | + |
| 56 | + with col2: |
| 57 | + st.header(" Hosts ") |
| 58 | + st.title(cities) |
| 59 | + |
| 60 | + with col3: |
| 61 | + st.header(" Sports ") |
| 62 | + st.title(sports) |
| 63 | + |
| 64 | + col1,col2,col3=st.columns(3) |
| 65 | + with col1: |
| 66 | + st.header(" Events ") |
| 67 | + st.title(events) |
| 68 | + |
| 69 | + with col2: |
| 70 | + st.header(" Nations ") |
| 71 | + st.title(nations) |
| 72 | + |
| 73 | + with col3: |
| 74 | + st.header(" Athletes ") |
| 75 | + st.title(athletes) |
| 76 | + |
| 77 | + nations_over_time=helper.data_over_time(df,'region') |
| 78 | + fig=px.line(nations_over_time,x='Edition',y='region') |
| 79 | + st.title(" Participating Nations over the years ") |
| 80 | + st.plotly_chart(fig) |
| 81 | + |
| 82 | + |
| 83 | + events_over_time=helper.data_over_time(df,'Event') |
| 84 | + fig=px.line(events_over_time,x='Edition',y='Event') |
| 85 | + st.title(" Events over the years ") |
| 86 | + st.plotly_chart(fig) |
| 87 | + |
| 88 | + athlete_over_time=helper.data_over_time(df,'Name') |
| 89 | + fig=px.line(athlete_over_time,x='Edition',y='Name') |
| 90 | + st.title(" Athletes over the years ") |
| 91 | + st.plotly_chart(fig) |
| 92 | + |
| 93 | + st.title(" No. of Events over time(Every Sport)") |
| 94 | + fig,ax = plt.subplots(figsize=(20,20)) |
| 95 | + x=df.drop_duplicates(['Year','Sport','Event']) |
| 96 | + ax=sns.heatmap(x.pivot_table(index='Sport',columns='Year',values='Event',aggfunc='count').fillna(0).astype('int'),annot=True) |
| 97 | + st.pyplot(fig) |
| 98 | + |
| 99 | + st.title("Most Successful Athletes") |
| 100 | + sport_list=df['Sport'].unique().tolist() |
| 101 | + sport_list.sort() |
| 102 | + sport_list.insert(0,'Overall') |
| 103 | + |
| 104 | + selected_sport = st.selectbox("Select a Sport ",sport_list) |
| 105 | + x=helper.most_successful(df,selected_sport) |
| 106 | + st.table(x) |
| 107 | + |
| 108 | +if user_menu =='Country-wise Analysis': |
| 109 | + |
| 110 | + st.sidebar.title('Country-wise Analysis') |
| 111 | + |
| 112 | + country_list=df['region'].dropna().unique().tolist() |
| 113 | + country_list.sort() |
| 114 | + |
| 115 | + selected_country=st.sidebar.selectbox('Select a Country',country_list) |
| 116 | + |
| 117 | + country_df=helper.yearwise_medal_tally(df,selected_country) |
| 118 | + fig=px.line(country_df,x='Year',y='Medal') |
| 119 | + st.title(selected_country + " Medal Tally over the years ") |
| 120 | + st.plotly_chart(fig) |
| 121 | + |
| 122 | + |
| 123 | + st.title(selected_country + " excels int the following sports") |
| 124 | + pt=helper.country_event_heatmap(df,selected_country) |
| 125 | + fig,ax = plt.subplots(figsize=(20,20)) |
| 126 | + ax=sns.heatmap(pt,annot=True) |
| 127 | + st.pyplot(fig) |
| 128 | + |
| 129 | + st.title(" Top 10 athletes " + selected_country) |
| 130 | + top10_df=helper.most_successful_countrywise(df,selected_country) |
| 131 | + st.table(top10_df) |
| 132 | + |
| 133 | +if user_menu == 'Athlete wise Analysis': |
| 134 | + athlete_df=df.drop_duplicates(subset=['Name','region']) |
| 135 | + |
| 136 | + x1=athlete_df['Age'].dropna() |
| 137 | + x2=athlete_df[athlete_df['Medal']=='Gold']['Age'].dropna() |
| 138 | + x3=athlete_df[athlete_df['Medal']=='Silver']['Age'].dropna() |
| 139 | + x4=athlete_df[athlete_df['Medal']=='Bronze']['Age'].dropna() |
| 140 | + |
| 141 | + fig=ff.create_distplot([x1,x2,x3,x4],['Overall Age','Gold Medalist','Silver Medalist','Bronze Medalist'],show_hist=False,show_rug=False) |
| 142 | + |
| 143 | + fig.update_layout(autosize=False,width=1000,height=600) |
| 144 | + st.plotly_chart(fig) |
| 145 | + |
| 146 | + x = [] |
| 147 | + name = [] |
| 148 | + famous_sports = ['Basketball', 'Judo', 'Football', 'Tug-Of-War', 'Athletics', |
| 149 | + 'Swimming', 'Badminton', 'Sailing', 'Gymnastics', |
| 150 | + 'Art Competitions', 'Handball', 'Weightlifting', 'Wrestling', |
| 151 | + 'Water Polo', 'Hockey', 'Rowing', 'Fencing', |
| 152 | + 'Shooting', 'Boxing', 'Taekwondo', 'Cycling', 'Diving', 'Canoeing', |
| 153 | + 'Tennis', 'Golf', 'Softball', 'Archery', |
| 154 | + 'Volleyball', 'Synchronized Swimming', 'Table Tennis', 'Baseball', |
| 155 | + 'Rhythmic Gymnastics', 'Rugby Sevens', |
| 156 | + 'Beach Volleyball', 'Triathlon', 'Rugby', 'Polo', 'Ice Hockey'] |
| 157 | + for sport in famous_sports: |
| 158 | + temp_df = athlete_df[athlete_df['Sport'] == sport] |
| 159 | + x.append(temp_df[temp_df['Medal'] == 'Gold']['Age'].dropna()) |
| 160 | + name.append(sport) |
| 161 | + |
| 162 | + fig = ff.create_distplot(x, name, show_hist=False, show_rug=False) |
| 163 | + fig.update_layout(autosize=False, width=1000, height=600) |
| 164 | + st.title("Distribution of Age wrt Sports(Gold Medalist)") |
| 165 | + st.plotly_chart(fig) |
| 166 | + |
| 167 | + |
| 168 | + sport_list=df['Sport'].unique().tolist() |
| 169 | + sport_list.sort() |
| 170 | + sport_list.insert(0,'Overall') |
| 171 | + |
| 172 | + st.title("Height Vs Weight ") |
| 173 | + selected_sport=st.selectbox(' Select a Sport ',sport_list) |
| 174 | + temp_df=helper.weight_v_height(df,selected_sport) |
| 175 | + fig,ax=plt.subplots() |
| 176 | + ax=sns.scatterplot(x='Weight',y='Height',data=temp_df,hue=temp_df['Medal'],style=temp_df['Sex'],s=60) |
| 177 | + |
| 178 | + st.pyplot(fig) |
| 179 | + |
| 180 | + st.title(' Men VS Women Participation over the years') |
| 181 | + final=helper.men_vs_women(df) |
| 182 | + fig=px.line(final,x='Year',y=["Male","Female"]) |
| 183 | + fig.update_layout(autosize=False, width=1000, height=600) |
| 184 | + st.plotly_chart(fig) |
| 185 | + |
| 186 | + |
| 187 | + |
| 188 | + |
| 189 | + |
0 commit comments