From b2dc5453db3f70e7e7e3d8216d0be0346b8b2389 Mon Sep 17 00:00:00 2001 From: Anushka Saxena Date: Tue, 2 Jul 2024 23:11:44 +0530 Subject: [PATCH] =?UTF-8?q?Analyze=20the=20increase=20in=20popularity=20of?= =?UTF-8?q?=20a=20language=20in=20the=20current=20year=20due=20to=20develo?= =?UTF-8?q?per=E2=80=99s=20interest=20in=20the=20previous=20year.=20#301?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Stackoverflow_Survey_Analysis.ipynb | 60 +++++++++++++++++++++++------ 1 file changed, 48 insertions(+), 12 deletions(-) diff --git a/Stackoverflow_Survey_Analysis.ipynb b/Stackoverflow_Survey_Analysis.ipynb index 4441017..32843f6 100644 --- a/Stackoverflow_Survey_Analysis.ipynb +++ b/Stackoverflow_Survey_Analysis.ipynb @@ -21899,25 +21899,61 @@ ] }, { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": {}, - "outputs": [], - "source": [] + "source": [ + "# Analyze the increase in popularity of a language in the current year due to developer’s interest in the previous year. #301" + ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "import pandas as pd\n", + "\n", + "# Load the CSV files\n", + "file_2022 = pd.read_csv(r\"Data/survey_results_public_2022.csv\")\n", + "file_2023 = pd.read_csv(r\"Data/survey_results_public_2023.csv\")\n", + "\n", + "def preprocess_data(df):\n", + " # Fill missing values in relevant columns\n", + " df['LanguageWorkedWith'] = df['LanguageWorkedWith'].fillna('')\n", + " df['LanguageDesireNextYear'] = df['LanguageDesireNextYear'].fillna('')\n", + " return df\n", + "\n", + "# Preprocess the data\n", + "data_2022 = preprocess_data(file_2022)\n", + "data_2023 = preprocess_data(file_2023)\n", + "\n", + "# Function to count the occurrences of each language\n", + "def count_languages(df, column):\n", + " languages = df[column].str.split(';').explode().str.strip()\n", + " return languages.value_counts()\n", + "\n", + "# Count languages for both years\n", + "lang_count_2022 = count_languages(data_2022, 'LanguageWorkedWith')\n", + "lang_desire_2022 = count_languages(data_2022, 'LanguageDesireNextYear')\n", + "lang_count_2023 = count_languages(data_2023, 'LanguageWorkedWith')\n", + "\n", + "# Convert to DataFrame for easier comparison\n", + "lang_count_2022_df = lang_count_2022.reset_index().rename(columns={'index': 'Language', 'LanguageWorkedWith': 'Count_2022'})\n", + "lang_desire_2022_df = lang_desire_2022.reset_index().rename(columns={'index': 'Language', 'LanguageDesireNextYear': 'Desire_2022'})\n", + "lang_count_2023_df = lang_count_2023.reset_index().rename(columns={'index': 'Language', 'LanguageWorkedWith': 'Count_2023'})\n", + "\n", + "# Merge the dataframes\n", + "merged_df = lang_count_2022_df.merge(lang_desire_2022_df, on='Language', how='outer').merge(lang_count_2023_df, on='Language', how='outer').fillna(0)\n", + "\n", + "# Calculate the increase in popularity\n", + "merged_df['Increase'] = merged_df['Count_2023'] - merged_df['Count_2022']\n", + "merged_df['Interest_to_Popularity'] = (merged_df['Count_2023'] - merged_df['Count_2022']) / merged_df['Desire_2022']\n", + "\n", + "# Sort by the increase in popularity\n", + "merged_df = merged_df.sort_values(by='Increase', ascending=False)\n", + "\n", + "print(merged_df[['Language', 'Count_2022', 'Desire_2022', 'Count_2023', 'Increase', 'Interest_to_Popularity']])\n" + ] }, { "cell_type": "markdown",