diff --git a/Stackoverflow_Survey_Analysis.ipynb b/Stackoverflow_Survey_Analysis.ipynb index 4441017..32843f6 100644 --- a/Stackoverflow_Survey_Analysis.ipynb +++ b/Stackoverflow_Survey_Analysis.ipynb @@ -21899,25 +21899,61 @@ ] }, { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": {}, - "outputs": [], - "source": [] + "source": [ + "# Analyze the increase in popularity of a language in the current year due to developer’s interest in the previous year. #301" + ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "import pandas as pd\n", + "\n", + "# Load the CSV files\n", + "file_2022 = pd.read_csv(r\"Data/survey_results_public_2022.csv\")\n", + "file_2023 = pd.read_csv(r\"Data/survey_results_public_2023.csv\")\n", + "\n", + "def preprocess_data(df):\n", + " # Fill missing values in relevant columns\n", + " df['LanguageWorkedWith'] = df['LanguageWorkedWith'].fillna('')\n", + " df['LanguageDesireNextYear'] = df['LanguageDesireNextYear'].fillna('')\n", + " return df\n", + "\n", + "# Preprocess the data\n", + "data_2022 = preprocess_data(file_2022)\n", + "data_2023 = preprocess_data(file_2023)\n", + "\n", + "# Function to count the occurrences of each language\n", + "def count_languages(df, column):\n", + " languages = df[column].str.split(';').explode().str.strip()\n", + " return languages.value_counts()\n", + "\n", + "# Count languages for both years\n", + "lang_count_2022 = count_languages(data_2022, 'LanguageWorkedWith')\n", + "lang_desire_2022 = count_languages(data_2022, 'LanguageDesireNextYear')\n", + "lang_count_2023 = count_languages(data_2023, 'LanguageWorkedWith')\n", + "\n", + "# Convert to DataFrame for easier comparison\n", + "lang_count_2022_df = lang_count_2022.reset_index().rename(columns={'index': 'Language', 'LanguageWorkedWith': 'Count_2022'})\n", + "lang_desire_2022_df = lang_desire_2022.reset_index().rename(columns={'index': 'Language', 'LanguageDesireNextYear': 'Desire_2022'})\n", + "lang_count_2023_df = lang_count_2023.reset_index().rename(columns={'index': 'Language', 'LanguageWorkedWith': 'Count_2023'})\n", + "\n", + "# Merge the dataframes\n", + "merged_df = lang_count_2022_df.merge(lang_desire_2022_df, on='Language', how='outer').merge(lang_count_2023_df, on='Language', how='outer').fillna(0)\n", + "\n", + "# Calculate the increase in popularity\n", + "merged_df['Increase'] = merged_df['Count_2023'] - merged_df['Count_2022']\n", + "merged_df['Interest_to_Popularity'] = (merged_df['Count_2023'] - merged_df['Count_2022']) / merged_df['Desire_2022']\n", + "\n", + "# Sort by the increase in popularity\n", + "merged_df = merged_df.sort_values(by='Increase', ascending=False)\n", + "\n", + "print(merged_df[['Language', 'Count_2022', 'Desire_2022', 'Count_2023', 'Increase', 'Interest_to_Popularity']])\n" + ] }, { "cell_type": "markdown",