diff --git a/.github/workflows/python-tests.yml b/.github/workflows/python-tests.yml index c1d6ec6..9418da8 100644 --- a/.github/workflows/python-tests.yml +++ b/.github/workflows/python-tests.yml @@ -13,7 +13,7 @@ jobs: strategy: matrix: - python-version: [3.8, 3.11] + python-version: [3.9, 3.11] steps: - name: Checkout code diff --git a/app.py b/app.py index a3ac632..11314c5 100755 --- a/app.py +++ b/app.py @@ -128,6 +128,7 @@ def layout(): dcc.Store(id="stored-data-subnational"), dcc.Store(id="stored-data-func-econ"), dcc.Store(id="stored-data-subnat-boundaries"), + dcc.Store(id="stored-data-insights"), ] ) @@ -180,6 +181,14 @@ def fetch_data_once(data): } return no_update +@app.callback(Output("stored-data-insights", "data"), Input("stored-data-insights", "data")) +def fetch_insight_data_once(data): + if data is None: + df = db.get_expenditure_insight() + return { + "expenditure_insights": df.to_dict("records"), + } + return no_update @app.callback( Output("stored-data-func-econ", "data"), Input("stored-data-func-econ", "data") diff --git a/components/__init__.py b/components/__init__.py index 13fa6af..ebd1b57 100644 --- a/components/__init__.py +++ b/components/__init__.py @@ -1 +1,2 @@ from .year_slider import slider, get_slider_config +from .narrative_generator import get_segment_narrative diff --git a/components/narrative_generator.py b/components/narrative_generator.py new file mode 100644 index 0000000..7d01993 --- /dev/null +++ b/components/narrative_generator.py @@ -0,0 +1,36 @@ +""" +Thin wrapper around the shared trend-narrative package. + +The function signature (insight_df) is kept for compatibility with home.py. +All logic now lives in: https://github.com/yukinko-iwasaki/trend-narrative +""" + +from trend_narrative import ( + get_segment_narrative as _get_segment_narrative, + consolidate_segments, + millify, +) + + +def get_segment_narrative(insight_df): + """Generate a narrative string from a pre-filtered insight DataFrame. + + Parameters + ---------- + insight_df : pd.DataFrame + Single-row (or empty) DataFrame with columns: + ``metric_name``, ``segments``, ``cv_value``. + + Returns + ------- + str + Plain-English trend narrative, or empty string when no data. + """ + if insight_df is None or insight_df.empty: + return "" + + metric = insight_df["metric_name"].iloc[0] + segments = insight_df["segments"].iloc[0] + cv = insight_df["cv_value"].iloc[0] + + return _get_segment_narrative(segments=segments, cv_value=cv, metric=metric) diff --git a/pages/education.py b/pages/education.py index 92d86aa..b6ace0c 100644 --- a/pages/education.py +++ b/pages/education.py @@ -28,6 +28,7 @@ render_func_subnat_rank, ) from components.disclaimer_div import disclaimer_tooltip +from components import get_segment_narrative db = QueryService.get_instance() @@ -449,18 +450,20 @@ def total_edu_figure(df): return fig -def education_narrative(data, country): +def education_narrative(data, country, insight_df): spending = pd.DataFrame(data["edu_public_expenditure"]) spending = filter_country_sort_year(spending, country) spending.dropna(subset=["real_expenditure", "central_expenditure"], inplace=True) start_year = spending.year.min() end_year = spending.year.max() - start_value = spending[spending.year == start_year].real_expenditure.values[0] - end_value = spending[spending.year == end_year].real_expenditure.values[0] - spending_growth_rate = (end_value - start_value) / start_value - trend = "increased" if end_value > start_value else "decreased" - text = f"Between {start_year} and {end_year} after adjusting for inflation, total public spending on education in {country} has {trend} from ${millify(start_value)} to ${millify(end_value)}, reflecting a growth rate of {spending_growth_rate:.0%}. " + + trend_narrative = get_segment_narrative(insight_df) + if trend_narrative: + trend_narrative = trend_narrative[0].lower() + trend_narrative[1:] + text = f"After accounting for inflation, {trend_narrative} " + else: + text = "" spending["real_central_expenditure"] = ( spending.real_expenditure / spending.expenditure * spending.central_expenditure @@ -521,8 +524,9 @@ def education_narrative(data, country): Output("education-narrative", "children"), Input("stored-data-education-total", "data"), Input("country-select", "value"), + Input("stored-data-insights", "data"), ) -def render_overview_total_figure(data, country): +def render_overview_total_figure(data, country, insights_data): if data is None: return None @@ -535,8 +539,14 @@ def render_overview_total_figure(data, country): generate_error_prompt("DATA_UNAVAILABLE"), ) + insights_df = pd.DataFrame(insights_data["expenditure_insights"]) + insight_df = insights_df[ + (insights_df["country_name"] == country) & + (insights_df["dimension_filter"] == "Education") + ] + fig = total_edu_figure(df) - return fig, education_narrative(data, country) + return fig, education_narrative(data, country, insight_df) def public_private_narrative(df, country): diff --git a/pages/health.py b/pages/health.py index be57138..4652cd4 100644 --- a/pages/health.py +++ b/pages/health.py @@ -26,6 +26,7 @@ render_func_subnat_rank, ) from components.disclaimer_div import disclaimer_tooltip +from components import get_segment_narrative db = QueryService.get_instance() @@ -455,18 +456,20 @@ def total_health_figure(df): return fig -def health_narrative(data, country): +def health_narrative(data, country, insight_df): spending = pd.DataFrame(data["health_public_expenditure"]) spending = filter_country_sort_year(spending, country) spending.dropna(subset=["real_expenditure", "central_expenditure"], inplace=True) start_year = spending.year.min() end_year = spending.year.max() - start_value = spending[spending.year == start_year].real_expenditure.values[0] - end_value = spending[spending.year == end_year].real_expenditure.values[0] - spending_growth_rate = (end_value - start_value) / start_value - trend = "increased" if end_value > start_value else "decreased" - text = f"Between {start_year} and {end_year} after adjusting for inflation, total public spending on health in {country} has {trend} from ${millify(start_value)} to ${millify(end_value)}, reflecting a growth rate of {spending_growth_rate:.0%}. " + + trend_narrative = get_segment_narrative(insight_df) + if trend_narrative: + trend_narrative = trend_narrative[0].lower() + trend_narrative[1:] + text = f"After accounting for inflation, {trend_narrative} " + else: + text = "" spending["real_central_expenditure"] = ( spending.real_expenditure / spending.expenditure * spending.central_expenditure @@ -527,8 +530,9 @@ def health_narrative(data, country): Output("health-narrative", "children"), Input("stored-data-health-total", "data"), Input("country-select", "value"), + Input("stored-data-insights", "data"), ) -def render_overview_total_figure(data, country): +def render_overview_total_figure(data, country, insights_data): if data is None: return None @@ -541,8 +545,14 @@ def render_overview_total_figure(data, country): generate_error_prompt("DATA_UNAVAILABLE"), ) + insights_df = pd.DataFrame(insights_data["expenditure_insights"]) + insight_df = insights_df[ + (insights_df["country_name"] == country) & + (insights_df["dimension_filter"] == "Health") + ] + fig = total_health_figure(df) - return fig, health_narrative(data, country) + return fig, health_narrative(data, country, insight_df) def public_private_narrative(df, country): diff --git a/pages/home.py b/pages/home.py index bf30024..76ef7df 100755 --- a/pages/home.py +++ b/pages/home.py @@ -15,7 +15,7 @@ require_login, ) -from components import slider, get_slider_config, pefa, budget_increment_analysis +from components import slider, get_slider_config, pefa, budget_increment_analysis, get_segment_narrative from components.disclaimer_div import disclaimer_tooltip from constants import COFOG_CATS, FUNC_COLORS, MAP_DISCLAIMER from queries import QueryService @@ -497,34 +497,16 @@ def per_capita_figure(df): return fig -def overview_narrative(df): +def overview_narrative(df, insight_df): country = df.country_name.iloc[0] - earliest = df[df.year == df.year.min()].iloc[0].to_dict() + trend_narrative = get_segment_narrative(insight_df) + if trend_narrative: + trend_narrative = trend_narrative[0].lower() + trend_narrative[1:] + text = f"After accounting for inflation, {trend_narrative} " + else: + text = "" latest = df[df.year == df.year.max()].iloc[0].to_dict() - start_year = earliest["year"] end_year = latest["year"] - latest_year_with_real_exp = df[df.real_expenditure.notnull()].year.max() - latest_real_exp = df[df.year == latest_year_with_real_exp].iloc[0].to_dict() - - total_percent_diff = ( - 100 - * (latest_real_exp["real_expenditure"] - earliest["real_expenditure"]) - / earliest["real_expenditure"] - ) - total_trend = "increased" if total_percent_diff > 0 else "decreased" - - per_capita_percent_diff = ( - 100 - * ( - latest_real_exp["per_capita_real_expenditure"] - - earliest["per_capita_real_expenditure"] - ) - / earliest["per_capita_real_expenditure"] - ) - per_capita_trend = "increased" if per_capita_percent_diff > 0 else "decreased" - - text = f"After accounting for inflation, total public spending has {total_trend} by {total_percent_diff:.1f}% and per capita spending has {per_capita_trend} by {per_capita_percent_diff:.1f}% between {start_year} and {latest_year_with_real_exp}. " - decentral_mean = df.expenditure_decentralization.mean() * 100 decentral_latest = latest["expenditure_decentralization"] * 100 decentral_text = f"On average, {decentral_mean:.1f}% of total public spending is executed by local/regional government. " @@ -539,6 +521,7 @@ def overview_narrative(df): return text + def functional_figure(df): categories = sorted(df.func.unique(), reverse=True) @@ -952,11 +935,14 @@ def update_heading(country): Output("overview-narrative", "children"), Input("stored-data", "data"), Input("country-select", "value"), + Input("stored-data-insights", "data") ) -def render_overview_total_figure(data, country): +def render_overview_total_figure(data, country, insights_data): all_countries = pd.DataFrame(data["expenditure_w_poverty_by_country_year"]) df = filter_country_sort_year(all_countries, country) - return total_figure(df), per_capita_figure(df), overview_narrative(df) + insights_df = pd.DataFrame(insights_data["expenditure_insights"]) + insights_df = insights_df[(insights_df["country_name"] == country) & (insights_df['dimension_filter'] == "Total")] + return total_figure(df), per_capita_figure(df), overview_narrative(df, insights_df) @callback( diff --git a/queries.py b/queries.py index a42888a..fb89aee 100644 --- a/queries.py +++ b/queries.py @@ -247,3 +247,10 @@ def get_user_credentials(self): """ df = self.execute_query(query) return dict(zip(df["username"], df["salted_password"])) + + def get_expenditure_insight(self): + query = f""" + SELECT * + FROM prd_mega.{BOOST_SCHEMA}.expenditure_insights + """ + return self.fetch_data(query) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 5e0ff41..f53a067 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,4 +10,5 @@ flask_login bcrypt dotenv plotly==5.22.0 -colormath \ No newline at end of file +colormath +trend-narrative \ No newline at end of file