|
| 1 | +# In this exercise, you'll use Pandas to analyze real global temperature anomaly data from NASA, helping to understand trends in climate change over time. |
| 2 | + |
| 3 | +# The dataset is provided by the GISS Team, 2024: GISS Surface Temperature Analysis (GISTEMP), version 4. NASA Goddard Institute for Space Studies. Dataset at https://data.giss.nasa.gov/gistemp/. |
| 4 | + |
| 5 | +import pandas as pd |
| 6 | +import matplotlib.pyplot as plt |
| 7 | + |
| 8 | +# First, we load the NASA GISTEMP dataset for global temperature anomalies. |
| 9 | +url = "https://data.giss.nasa.gov/gistemp/tabledata_v4/GLB.Ts+dSST.csv" |
| 10 | +temp_anomaly_data = pd.read_csv(url, skiprows=1) # skiprows=1 ensures that the first column is not read as a row index |
| 11 | +# Convert 'Anomaly' column to float |
| 12 | +melted_data['Anomaly'] = melted_data['Anomaly'].astype(float) |
| 13 | + |
| 14 | + |
| 15 | +# a) Display the first 5 rows and keep only 'Year' and month columns |
| 16 | +temp_anomaly_data = temp_anomaly_data.drop(columns=['J-D', 'D-N','DJF','MAM','JJA','SON']) |
| 17 | +print(temp_anomaly_data.head()) |
| 18 | + |
| 19 | +# b) Calculate and print the average temperature anomaly for each year |
| 20 | +melted_data = pd.melt(temp_anomaly_data, id_vars=['Year'], var_name='Month') |
| 21 | +melted_data = melted_data.drop(columns=['Month']) |
| 22 | +print(melted_data.head()) |
| 23 | +melted_data.groupby(['Year']).mean() |
| 24 | +print(melted_data.head()) |
| 25 | +print("\nAverage temperature anomaly for each year:") |
| 26 | +print(melted_data.head()) |
| 27 | + |
| 28 | +# c) Find the year with the highest and lowest temperature anomaly |
| 29 | +max_year = yearly_avg.idxmax() |
| 30 | +min_year = yearly_avg.idxmin() |
| 31 | +print(f"\nYear with highest anomaly: {max_year} ({yearly_avg[max_year]:.2f})") |
| 32 | +print(f"Year with lowest anomaly: {min_year} ({yearly_avg[min_year]:.2f})") |
| 33 | + |
| 34 | +# d) Create 'Anomaly_Category' column |
| 35 | +def categorize_anomaly(value): |
| 36 | + if value < -0.2: |
| 37 | + return 'Cool' |
| 38 | + elif value > 0.2: |
| 39 | + return 'Warm' |
| 40 | + else: |
| 41 | + return 'Neutral' |
| 42 | + |
| 43 | +melted_data['Anomaly_Category'] = melted_data['Anomaly'].apply(categorize_anomaly) |
| 44 | + |
| 45 | +# e) Calculate the percentage of 'Warm' months for each decade |
| 46 | +melted_data['Decade'] = (melted_data['Year'] // 10) * 10 |
| 47 | +warm_percentage = melted_data.groupby('Decade')['Anomaly_Category'].apply(lambda x: (x == 'Warm').mean() * 100) |
| 48 | +print("\nPercentage of 'Warm' months for each decade:") |
| 49 | +print(warm_percentage) |
| 50 | + |
| 51 | +# f) Save the DataFrame to Excel |
| 52 | +melted_data.to_excel('temp_anomaly_data.xlsx', index=False) |
| 53 | +print("\nData saved to 'temp_anomaly_data.xlsx'") |
0 commit comments