diff --git a/data_analysis (1).py b/data_analysis (1).py new file mode 100644 index 0000000..bd47c78 --- /dev/null +++ b/data_analysis (1).py @@ -0,0 +1,57 @@ +# -*- coding: utf-8 -*- +"""Data analysis.ipynb + +Automatically generated by Colab. + +Original file is located at + https://colab.research.google.com/drive/1rgZ1KKaswBXKXYF_m5svCLsk6eBzLBT9 +""" + +!pip install streamlit + +import pandas as pd +import streamlit as st +import seaborn as sns +import matplotlib.pyplot as plt + +def load_and_scatterplot(year): + + file_path = f"Survey_results_sample_{year}.csv" + + try: + data = pd.read_csv(file_path) + except FileNotFoundError: + st.error(f"File for the year {year} not found!") + return None + + + + cols = ['Country', 'YearsCodePro', 'ConvertedCompYearly', 'DevType'] + filtered_data = data[cols].dropna() + + filtered_data['YearsCodePro'] = pd.to_numeric(filtered_data['YearsCodePro'], errors='coerce') + filtered_data = filtered_data.dropna(subset=['YearsCodePro']) + # Create a scatter plot for YearsCodePro vs ConvertedCompYearly, color-coded by DevType + st.write(f"Scatter Plot: Years of Professional Coding Experience vs Yearly Compensation for {year}") + + plt.figure(figsize=(14, 8)) + + scatter = sns.scatterplot( + data=filtered_data, + x='YearsCodePro', + y='ConvertedCompYearly', + hue='DevType', + style='Country', + palette='deep', + s=100, + alpha=0.6 + ) + + scatter.legend(loc='center left', bbox_to_anchor=(1, 0.5), title='Developer Type') + + plt.title(f'YearsCodePro vs ConvertedCompYearly ({year}), colored by DevType', fontsize=16) + plt.xlabel('Years of Professional Coding Experience', fontsize=14) + plt.ylabel('Yearly Compensation (USD)', fontsize=14) + + st.pyplot(plt) +