diff --git a/app_modules/functions_app.py b/app_modules/functions_app.py index 4c14de71..85474c90 100644 --- a/app_modules/functions_app.py +++ b/app_modules/functions_app.py @@ -1,6 +1,6 @@ import pandas as pd -from .helper_modules.geodata import add_state_abbreviations, merge_with_fips, merge_with_geolocation -from .query_gbq import query_table +from app_modules.helper_modules.geodata import add_state_abbreviations, merge_with_fips, merge_with_geolocation +from app_modules.query_gbq import query_table from google.api_core.exceptions import GoogleAPIError diff --git a/app_modules/visualizations_app.py b/app_modules/visualizations_app.py index 313e93f0..93a13657 100644 --- a/app_modules/visualizations_app.py +++ b/app_modules/visualizations_app.py @@ -6,7 +6,7 @@ import plotly.graph_objects as go from plotly.subplots import make_subplots import requests -from .functions_app import ( +from app_modules.functions_app import ( prep_bird_flu_data, prep_wild_bird_data, prep_egg_price_data, @@ -14,6 +14,53 @@ ) +# === New Function: CREATE GEOSPATIAL PLOT === +def create_geospatial(data): + """ + Creates a geospatial plot using latitude, longitude, and flock size. + """ + fig = px.scatter_mapbox( + data, + lat="lat", + lon="lng", + size="Flock Size", + color="Flock Size", + color_continuous_scale="Viridis", + mapbox_style="carto-positron", + zoom=3, + title="Geospatial Visualization" + ) + return fig + + +# === New Function: CREATE TIME SERIES PLOT === +def create_time_series(egg_price_df, stock_price_df): + """ + Creates a time series plot comparing egg prices and stock prices. + """ + fig = make_subplots(specs=[[{"secondary_y": True}]]) + + fig.add_trace( + go.Scatter(x=egg_price_df.index, y=egg_price_df["Avg_Price"], name="Egg Price"), + secondary_y=False, + ) + + fig.add_trace( + go.Scatter(x=stock_price_df.index, y=stock_price_df["Close/Last"], name="Stock Price"), + secondary_y=True, + ) + + fig.update_layout( + title="Egg Prices vs Stock Prices", + xaxis_title="Date", + height=500, + ) + + fig.update_yaxes(title_text="Egg Price (USD)", secondary_y=False) + fig.update_yaxes(title_text="Stock Price (USD)", secondary_y=True) + + return fig + # === 1. EGG PRICE vs STOCK PRICE TIME SERIES === def show_price_comparison(egg_df, stock_df, stock_name="Selected Stock"): @@ -67,20 +114,19 @@ def show_bird_flu_trends(): # === 3. COMBINED OVERVIEW === def show_combined_dashboard(): - - #Loading data + # Loading data egg_df = prep_egg_price_data("egg_prices") calm_df, _, _ = prep_stock_price_data("calmaine") for col in ["Close_Last", "Open", "High", "Low"]: calm_df[col] = calm_df[col].replace(r'[\$,]', '', regex=True).astype(float) flu_df = prep_bird_flu_data("bird_flu") - #prepping birdflu + # Prepping bird flu flu_df.rename(columns={"Outbreak Date": "Date"}, inplace=True) flu_df = flu_df.groupby("Date")["Flock Size"].sum().reset_index() flu_df = flu_df.set_index("Date").resample("M").sum().reset_index() - # resample stocks + # Resample stocks calm_df = calm_df.set_index("Date").resample("M").mean().reset_index() fig = make_subplots(specs=[[{"secondary_y": True}]]) @@ -106,127 +152,3 @@ def show_combined_dashboard(): fig.update_yaxes(title_text="Stock Price (USD)", secondary_y=True) st.plotly_chart(fig, use_container_width=True) - -def show_wild_bird_map(wild_bird_df, bird_flu_df): - """ - Displays a cumulative-progressive map: - State color = chicken deaths (Sum of Flock Size) - Circles = wild bird infections (Sum of Wild Birds) - Data accumulates progressively from Jan 2022. - Requires https://raw.githubusercontent.com/advanced-computing/chicken_egg/main/app_data/us_states.geojson - """ - - # Cargar el GeoJSON de estados - url = "https://raw.githubusercontent.com/advanced-computing/chicken_egg/main/app_data/us_states.geojson" - response = requests.get(url) - geojson = response.json() - - # Normalizar nombres de estado en tus datos - wild_df = wild_bird_df.copy() - flock_df = bird_flu_df.copy() - wild_df["State"] = wild_df["State"].str.title() - flock_df["State"] = flock_df["State"].str.title() - - # Filtrar GeoJSON para incluir solo estados en los datasets - geojson_states = [f["properties"]["NAME"] for f in geojson["features"]] - used_states = set(wild_df["State"].unique()).union(set(flock_df["State"].unique())) - valid_states = sorted(set(geojson_states).intersection(used_states)) - geojson["features"] = [f for f in geojson["features"] if f["properties"]["NAME"] in valid_states] - - # Preparar columnas de mes - wild_df['Month'] = pd.to_datetime(wild_df['Date Detected'], errors='coerce').dt.to_period("M").dt.to_timestamp() - flock_df['Month'] = pd.to_datetime(flock_df['Outbreak Date'], errors='coerce').dt.to_period("M").dt.to_timestamp() - - # Agrupar por mes y estado - wild_grouped = wild_df.groupby(['Month', 'State']).size().reset_index(name='Wild Count') - flock_grouped = flock_df.groupby(['Month', 'State']).agg({ - 'Flock Size': 'sum', - 'lat': 'mean', - 'lng': 'mean' - }).reset_index() - - # Unir ambos - merged = pd.merge(flock_grouped, wild_grouped, on=['Month', 'State'], how='left') - merged['Wild Count'] = merged['Wild Count'].fillna(0) - merged['Month_str'] = merged['Month'].dt.strftime("%b %Y") - - # Timeline - months_sorted = sorted(merged['Month'].dropna().unique()) - month_strs = [pd.to_datetime(m).strftime("%b %Y") for m in months_sorted] - - selected_label = st.select_slider("🗓️ Progressive Timeline (Cumulative to...)", options=month_strs, value=month_strs[0]) - selected_cutoff = pd.to_datetime(selected_label) - - # Acumulado hasta la fecha seleccionada - cumulative_view = merged[merged['Month'] <= selected_cutoff] - - if cumulative_view.empty: - st.info("No data available up to this date.") - return - - # Agrupar por estado acumulado - view = cumulative_view.groupby("State").agg({ - "Flock Size": "sum", - "Wild Count": "sum", - "lat": "mean", - "lng": "mean" - }).reset_index() - - # Hover info - view["Hover"] = ( - "State: " + view["State"] + - "
Wild Infections: " + view["Wild Count"].astype(int).astype(str) + - "
Flock Deaths: " + view["Flock Size"].astype(int).astype(str) - ) - - # Pintar estados por Flock Size (choropleth) - fig = px.choropleth_mapbox( - view, - geojson=geojson, - locations="State", - featureidkey="properties.NAME", - color="Flock Size", - color_continuous_scale="YlOrRd", - range_color=(0, view["Flock Size"].max()), - mapbox_style="carto-darkmatter", - zoom=3, - center={"lat": 37.8, "lon": -96}, - opacity=0.6, - labels={"Flock Size": "Flock Deaths"}, - height=600 - ) - - # Añadir globos por Wild Count - fig.add_scattermapbox( - lat=view["lat"], - lon=view["lng"], - mode="markers", - marker=px.scatter_mapbox( - view, - lat="lat", - lon="lng", - size="Wild Count", - size_max=50 - ).data[0].marker, - text=view["State"] + "
Wild Cases: " + view["Wild Count"].astype(int).astype(str), - hoverinfo="text", - name="Wild Bird Infections" - ) - - fig.update_layout( - title=f"📍 Wild Bird Infections & Chicken Deaths – Cumulative until {selected_label}", - margin={"r": 0, "t": 50, "l": 0, "b": 0}, - paper_bgcolor="#111111", - font_color="white", - legend_title_text="Chicken Deaths (State Color)", - ) - - # Descripción superior - st.markdown(""" - **Map Explanation** - - **State Color**: Number of chickens lost due to outbreaks (Flock Size) - - **Bubble Size**: Number of wild bird infections detected - - Use the slider to view how both have progressed from Jan 2022 until now. - """) - - st.plotly_chart(fig, use_container_width=True) diff --git a/app_tests/test_data_prep.py b/app_tests/test_data_prep.py index 887f4773..872e40e1 100644 --- a/app_tests/test_data_prep.py +++ b/app_tests/test_data_prep.py @@ -4,12 +4,14 @@ from app_modules.functions_app import ( prep_bird_flu_data, prep_egg_price_data, - prep_stock_price_data + prep_stock_price_data, + prep_wild_bird_data, # Added import for prep_wild_bird_data ) from app_tests.test_helper_data_prep import ( create_stock_ex, create_egg_price_ex, create_bird_flu_ex, + create_wild_bird_ex, # Added import for create_wild_bird_ex ) @pytest.mark.parametrize( @@ -18,6 +20,7 @@ (prep_stock_price_data, pd.DataFrame({"Open": [100, 101, 102]}), KeyError), # Missing 'Close/Last' (prep_egg_price_data, pd.DataFrame({"Price": [2.5, 3.0, 3.2]}), ValueError), # Missing 'Year' (prep_bird_flu_data, pd.DataFrame({"Flock Size": [10, 20]}), KeyError), # Missing 'State' + (prep_wild_bird_data, pd.DataFrame({"County": ["A", "B"]}), KeyError), # Missing 'State' ] ) def test_prep_functions_raise_errors(func, df, expected_exception): @@ -51,3 +54,20 @@ def test_bird_flu_flock_size_is_numeric(): bird_flu_df = pd.read_csv(StringIO(create_bird_flu_ex())) df = prep_bird_flu_data(bird_flu_df) assert pd.api.types.is_numeric_dtype(df["Flock Size"]), "Flock Size should be numeric." + +def test_wild_bird_data_has_lat_lng(): + """ + Test that prep_wild_bird_data returns a DataFrame with 'lat' and 'lng' columns. + """ + wild_bird_df = pd.read_csv(StringIO(create_wild_bird_ex())) + df = prep_wild_bird_data(wild_bird_df) + assert "lat" in df.columns, "DataFrame must have 'lat' column." + assert "lng" in df.columns, "DataFrame must have 'lng' column." + +def test_wild_bird_data_missing_columns(): + """ + Test that prep_wild_bird_data raises an error if required columns are missing. + """ + wild_bird_df = pd.DataFrame({"County": ["A", "B"]}) # Missing 'State' + with pytest.raises(KeyError): + prep_wild_bird_data(wild_bird_df) diff --git a/app_tests/test_helper_data_prep.py b/app_tests/test_helper_data_prep.py index 05d990f1..24e5e5dd 100644 --- a/app_tests/test_helper_data_prep.py +++ b/app_tests/test_helper_data_prep.py @@ -25,3 +25,13 @@ def create_bird_flu_ex(): Alabama,Montgomery,1000,32.3668052,-86.2999689 Georgia,Clarke,2000,33.9519347,-83.357567 """ + +def create_wild_bird_ex(): + """ + Returns example data similar to wild bird data + """ + return """State,County,Observation Count,lat,lng +Alabama,Montgomery,50,32.3668052,-86.2999689 +Georgia,Clarke,75,33.9519347,-83.357567 +Florida,Miami-Dade,100,25.7616798,-80.1917902 +""" diff --git a/app_tests/test_viz.py b/app_tests/test_viz.py index 750d5b3b..8c566cb6 100644 --- a/app_tests/test_viz.py +++ b/app_tests/test_viz.py @@ -1,6 +1,14 @@ import pytest import pandas as pd -from app_modules.visualizations_app import create_geospatial, create_time_series +from app_modules.visualizations_app import ( + create_geospatial, + create_time_series, + show_price_comparison, + show_bird_flu_trends, + show_combined_dashboard, +) +import streamlit as st + def test_create_geospatial(): sample_data = pd.DataFrame({ @@ -13,6 +21,7 @@ def test_create_geospatial(): assert fig and len(fig.data) > 0, "Geospatial plot should contain data points." + def test_create_time_series(): egg_price_df = pd.DataFrame({ 'Date': pd.date_range(start='2020-01-01', periods=5), @@ -27,3 +36,61 @@ def test_create_time_series(): fig = create_time_series(egg_price_df, stock_price_df) assert fig and len(fig.data) == 2, "Time series should contain two lines." + + +def test_show_price_comparison(mocker): + egg_price_df = pd.DataFrame({ + 'Date': pd.date_range(start='2020-01-01', periods=5), + 'Avg_Price': [1, 2, 3, 4, 5] + }).set_index('Date') + + stock_price_df = pd.DataFrame({ + 'Date': pd.date_range(start='2020-01-01', periods=5), + 'Close_Last': [10, 20, 30, 40, 50] + }) + + mocker.patch("streamlit.plotly_chart") # Mock Streamlit's plotly_chart + show_price_comparison(egg_price_df, stock_price_df, stock_name="Test Stock") + + st.plotly_chart.assert_called_once() # Ensure the chart was rendered + + +def test_show_bird_flu_trends(mocker): + mocker.patch("app_modules.visualizations_app.prep_bird_flu_data", return_value=pd.DataFrame({ + 'Outbreak Date': pd.date_range(start='2020-01-01', periods=5), + 'Flock Size': [10, 20, 30, 40, 50] + })) + + mocker.patch("streamlit.plotly_chart") # Mock Streamlit's plotly_chart + show_bird_flu_trends() + + st.plotly_chart.assert_called_once() # Ensure the chart was rendered + + +def test_show_combined_dashboard(mocker): + mocker.patch("app_modules.visualizations_app.prep_egg_price_data", return_value=pd.DataFrame({ + 'Date': pd.date_range(start='2020-01-01', periods=5), + 'Avg_Price': [1, 2, 3, 4, 5] + })) + + mocker.patch("app_modules.visualizations_app.prep_stock_price_data", return_value=( + pd.DataFrame({ + 'Date': pd.date_range(start='2020-01-01', periods=5), + 'Close_Last': [10, 20, 30, 40, 50], + 'Open': [5, 15, 25, 35, 45], + 'High': [15, 25, 35, 45, 55], + 'Low': [0, 10, 20, 30, 40] + }), + None, + None + )) + + mocker.patch("app_modules.visualizations_app.prep_bird_flu_data", return_value=pd.DataFrame({ + 'Outbreak Date': pd.date_range(start='2020-01-01', periods=5), + 'Flock Size': [10, 20, 30, 40, 50] + })) + + mocker.patch("streamlit.plotly_chart") # Mock Streamlit's plotly_chart + show_combined_dashboard() + + st.plotly_chart.assert_called() # Ensure charts were rendered