diff --git a/.gitignore b/.gitignore index 05572c5..be611ef 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ catboost_info/ +venv/ +.venv/ streamlit\__pycache__ \ No newline at end of file diff --git a/streamlit/__pycache__/functions.cpython-312.pyc b/streamlit/__pycache__/functions.cpython-312.pyc index 23cffd0..7153492 100644 Binary files a/streamlit/__pycache__/functions.cpython-312.pyc and b/streamlit/__pycache__/functions.cpython-312.pyc differ diff --git a/streamlit/__pycache__/main_analysis.cpython-312.pyc b/streamlit/__pycache__/main_analysis.cpython-312.pyc index c827a5d..5ba7207 100644 Binary files a/streamlit/__pycache__/main_analysis.cpython-312.pyc and b/streamlit/__pycache__/main_analysis.cpython-312.pyc differ diff --git a/streamlit/functions.py b/streamlit/functions.py index 89eef48..13c1519 100644 --- a/streamlit/functions.py +++ b/streamlit/functions.py @@ -20,6 +20,21 @@ results = pd.read_csv("https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/results.csv") +# Load the data from local CSV files +# data = pd.read_csv('df2020.csv') +# df2018 = pd.read_csv('df2018.csv') +# full_data2018 = pd.read_csv('survey_results_sample_2018.csv') +# full_data2019 = pd.read_csv('survey_results_sample_2019.csv') +# full_df2020 = pd.read_csv('survey_results_sample_2020.csv') +# df2019 = pd.read_csv('df2019.csv') + +# # Filter the 2020 data +# df2020 = data[(data['SalaryUSD'] < 200000)] + +# # Load results for job satisfaction from the local file +# results = pd.read_csv("results.csv") + + ####################################### # VISUALISATION STARTS ####################################### @@ -35,7 +50,7 @@ def plot_boxplot(data, x, y, title): ######################################################################### -def plot_bar_plotly(df, column_name, top_n=10, height=450, width=700): +def plot_bar_plotly(df, column_name, top_n=10, height=450, width=700, key=None): df_counts = df[column_name].value_counts().head(top_n).reset_index() df_counts.columns = [column_name, 'Count'] @@ -46,7 +61,7 @@ def plot_bar_plotly(df, column_name, top_n=10, height=450, width=700): fig.update_layout(xaxis_title=column_name, yaxis_title='Number of Developers') fig.update_layout(height=height, width=width) - return st.plotly_chart(fig) + return st.plotly_chart(fig, key=key) def plot_pie_plotly(df, column_name,top_n=10, height=400, width=400 ): diff --git a/streamlit/home.py b/streamlit/home.py index 2d7568b..0aa06bb 100644 --- a/streamlit/home.py +++ b/streamlit/home.py @@ -16,14 +16,24 @@ def load_data(url): return pd.read_csv(url) # Loading data files from the 'streamlit' directory -df = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2020.csv') -df2018 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2018.csv') -full_data2018 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/survey_results_sample_2018.csv') -full_data2019 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/survey_results_sample_2019.csv') -full_df2020 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/survey_results_sample_2020.csv') -df2019 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2019.csv') -df2021 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2021.csv') -df2022 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2022.csv') +# df = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2020.csv') +# df2018 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2018.csv') +# full_data2018 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/survey_results_sample_2018.csv') +# full_data2019 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/survey_results_sample_2019.csv') +# full_df2020 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/survey_results_sample_2020.csv') +# df2019 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2019.csv') +# df2021 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2021.csv') +# df2022 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2022.csv') + +df = load_data('df2020.csv') +df2018 = load_data('df2018.csv') +full_data2018 = load_data('survey_results_sample_2018.csv') +full_data2019 = load_data('survey_results_sample_2019.csv') +full_df2020 = load_data('survey_results_sample_2020.csv') +df2019 = load_data('df2019.csv') +df2021 = load_data('df2021.csv') +df2022 = load_data('df2022.csv') + # Filter the 2020 dataframe df2020 = df[df['SalaryUSD'] < 200000] @@ -136,7 +146,7 @@ def plot_value_counts(column_name): if year == '2018': main.main_analysis(df2018) - main.main_analysis_2(df2018) + main.main_analysis_2(df2018, year) visual, analysis = st.columns((3, 1)) with visual: @@ -209,7 +219,7 @@ def plot_value_counts(column_name): elif year == '2019': main.main_analysis(df2019) - main.main_analysis_2(df2019) + main.main_analysis_2(df2019, year) visual, analysis = st.columns((3, 1)) with visual: @@ -228,7 +238,7 @@ def plot_value_counts(column_name): elif year == '2020': main.main_analysis(df2020) - main.main_analysis_2(df2020) + main.main_analysis_2(df2020, year) visual, analysis = st.columns((3, 1)) with visual: @@ -247,7 +257,7 @@ def plot_value_counts(column_name): elif year == '2021': main.main_analysis(df2021) - main.common_analysis_2021_2022(df2021) + main.common_analysis_2021_2022(df2021, year) visual, analysis = st.columns((3, 1)) with visual: fig = func.plot_valuecounts_plotly(df2021,'NEWStuck') @@ -281,7 +291,7 @@ def plot_value_counts(column_name): else: main.main_analysis(df2022) - main.common_analysis_2021_2022(df2022) + main.common_analysis_2021_2022(df2022, year) fig = func.compare_language_columns_and_plot(df2022, 'OpSysPersonal use', 'OpSysProfessional use') diff --git a/streamlit/main_analysis.py b/streamlit/main_analysis.py index d178aec..dca25e2 100644 --- a/streamlit/main_analysis.py +++ b/streamlit/main_analysis.py @@ -103,7 +103,7 @@ def main_analysis(df): with visual: st.title("Ethnicity VS Participation") - ff.plot_bar_plotly(df, 'Ethnicity') + ff.plot_bar_plotly(df, 'Ethnicity', key='ethnicity_plot') with analysis: Ethnicity_text = """ @@ -176,7 +176,7 @@ def main_analysis(df): if ds is not None: with visual: st.title("Country Wise Data Scientists Participation") - ff.plot_bar_plotly(ds, "Country") + ff.plot_bar_plotly(ds, "Country", key='country_plot') with analysis: data_scientist_participation_text = """ @@ -196,7 +196,7 @@ def main_analysis(df): ##### To Speed Up the Web Page, Main Analysis is divided into 2 ###### -def main_analysis_2(df): +def main_analysis_2(df, year): visual2, analysis2 = st.columns((3,1)) if df is df2019: @@ -262,7 +262,8 @@ def main_analysis_2(df): if ds is not None: with visual2: st.title("Country Wise Data Scientists Participation") - ff.plot_bar_plotly(ds, "Country") + # Use the year parameter instead of year_variable + ff.plot_bar_plotly(ds, "Country", key=f'country_plot_{year}') with analysis2: data_scientist_participation_text = """ @@ -368,7 +369,7 @@ def common_analysis_2021_2022(df): st.markdown(employment_text, unsafe_allow_html=True) with visual3: - ff.plot_bar_plotly(df, 'DevType', top_n=10, height=500, width=1000) + ff.plot_bar_plotly(df, 'DevType', top_n=10, height=500, width=1000, key=f'devtype_plot_{year_variable}') with analysis3: devtype_text = """ @@ -429,4 +430,3 @@ def common_analysis_2021_2022(df): """ st.markdown(webframe_text, unsafe_allow_html=True) -