From 19bc481d2608b64f7c9d773ff5255443d4a95d41 Mon Sep 17 00:00:00 2001 From: Roger Hunwicks Date: Wed, 24 Sep 2025 14:02:25 -0400 Subject: [PATCH 01/15] +Ignore summary section on DRC Data worksheets - see HEA-740 --- pipelines/assets/livelihood_activity.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pipelines/assets/livelihood_activity.py b/pipelines/assets/livelihood_activity.py index ec46276..242b7da 100644 --- a/pipelines/assets/livelihood_activity.py +++ b/pipelines/assets/livelihood_activity.py @@ -116,6 +116,7 @@ def livelihood_activity_dataframe(config: BSSMetadataConfig, corrected_files) -> "Revenus moins dépenses", "Revenu moins dépense", "revenu moins dépenses", # 2023 Mali BSSs + "revenu mois dépenses", # 2024 DRC BSSs ], header_rows=HEADER_ROWS, ) From 759b9176991f7c858fd1decdede9f9d44e5b91f1 Mon Sep 17 00:00:00 2001 From: Roger Hunwicks Date: Wed, 24 Sep 2025 21:48:10 -0400 Subject: [PATCH 02/15] Fix unicode output in JSON previews - see HEA-573 --- pipelines/assets/baseline.py | 4 ++-- pipelines/assets/fixtures.py | 8 ++++---- pipelines/assets/livelihood_activity.py | 6 ++++-- pipelines/assets/other_cash_income.py | 4 +++- pipelines/assets/wealth_characteristic.py | 6 ++++-- pipelines/assets/wild_foods.py | 4 +++- pipelines/resources.py | 2 +- 7 files changed, 21 insertions(+), 13 deletions(-) diff --git a/pipelines/assets/baseline.py b/pipelines/assets/baseline.py index c170f15..78eb47f 100644 --- a/pipelines/assets/baseline.py +++ b/pipelines/assets/baseline.py @@ -266,7 +266,7 @@ def baseline_instances( } try: - preview = json.dumps(result, indent=4) + preview = json.dumps(result, indent=4, ensure_ascii=False) except TypeError as e: raise ValueError("Cannot serialize Community fixture to JSON. Failing dict is\n %s" % result) from e @@ -359,7 +359,7 @@ def community_instances(context: AssetExecutionContext, config: BSSMetadataConfi result = {"Community": community_df.to_dict(orient="records")} try: - preview = json.dumps(result, indent=4) + preview = json.dumps(result, indent=4, ensure_ascii=False) except TypeError as e: raise ValueError("Cannot serialize Community fixture to JSON. Failing dict is\n %s" % result) from e diff --git a/pipelines/assets/fixtures.py b/pipelines/assets/fixtures.py index 13be77b..5e0574c 100644 --- a/pipelines/assets/fixtures.py +++ b/pipelines/assets/fixtures.py @@ -220,7 +220,7 @@ def validate_instances( metadata = {f"num_{key.lower()}": len(value) for key, value in instances.items()} metadata["total_instances"] = sum(len(value) for value in instances.values()) - metadata["preview"] = MetadataValue.md(f"```json\n{json.dumps(instances, indent=4)}\n```") + metadata["preview"] = MetadataValue.md(f"```json\n{json.dumps(instances, indent=4, ensure_ascii=False)}\n```") return instances, metadata @@ -287,7 +287,7 @@ def get_fixture_from_instances(instance_dict: dict[str, list[dict]]) -> tuple[li metadata[f'num_{str(model._meta).split(".")[-1]}'] += 1 metadata["total_instances"] = len(fixture) - metadata["preview"] = MetadataValue.md(f"```json\n{json.dumps(fixture, indent=4)}\n```") + metadata["preview"] = MetadataValue.md(f"```json\n{json.dumps(fixture, indent=4, ensure_ascii=False)}\n```") return fixture, metadata @@ -300,7 +300,7 @@ def import_fixture(fixture: list[dict]) -> dict: # We need to use a .verbose_json file extension for Django to use the correct serializer. with tempfile.NamedTemporaryFile(mode="w+", suffix=".verbose_json") as f: # Write the fixture to a temporary file so that Django can access it - f.write(json.dumps(fixture)) + f.write(json.dumps(fixture, indent=4, ensure_ascii=False)) f.seek(0) call_command(verbose_load_data.Command(), f.name, verbosity=2, format="verbose_json", stdout=output_buffer) @@ -309,7 +309,7 @@ def import_fixture(fixture: list[dict]) -> dict: for instance in fixture: metadata[f'num_{instance["model"].split(".")[-1]}'] += 1 metadata["total_instances"] = len(fixture) - metadata["preview"] = MetadataValue.md(f"```json\n{json.dumps(fixture, indent=4)}\n```") + metadata["preview"] = MetadataValue.md(f"```json\n{json.dumps(fixture, indent=4, ensure_ascii=False)}\n```") metadata["output"] = MetadataValue.md(f"```\n{output_buffer.getvalue()}\n```") return metadata diff --git a/pipelines/assets/livelihood_activity.py b/pipelines/assets/livelihood_activity.py index 242b7da..b20d184 100644 --- a/pipelines/assets/livelihood_activity.py +++ b/pipelines/assets/livelihood_activity.py @@ -1059,7 +1059,7 @@ def get_instances_from_dataframe( ) * 100 ), - "preview": MetadataValue.md(f"```json\n{json.dumps(result, indent=4)}\n```"), + "preview": MetadataValue.md(f"```json\n{json.dumps(result, indent=4, ensure_ascii=False)}\n```"), } if not unrecognized_labels.empty: metadata["unrecognized_labels"] = MetadataValue.md(unrecognized_labels.to_markdown(index=False)) @@ -1114,7 +1114,9 @@ def livelihood_activity_valid_instances( valid_instances, metadata = validate_instances(context, livelihood_activity_instances, partition_key) metadata = {f"num_{key.lower()}": len(value) for key, value in valid_instances.items()} metadata["total_instances"] = sum(len(value) for value in valid_instances.values()) - metadata["preview"] = MetadataValue.md(f"```json\n{json.dumps(valid_instances, indent=4)}\n```") + metadata["preview"] = MetadataValue.md( + f"```json\n{json.dumps(valid_instances, indent=4, ensure_ascii=False)}\n```" + ) return Output( valid_instances, metadata=metadata, diff --git a/pipelines/assets/other_cash_income.py b/pipelines/assets/other_cash_income.py index b6d0c48..edf3957 100644 --- a/pipelines/assets/other_cash_income.py +++ b/pipelines/assets/other_cash_income.py @@ -171,7 +171,9 @@ def other_cash_income_valid_instances( valid_instances, metadata = validate_instances(context, other_cash_income_instances, partition_key) metadata = {f"num_{key.lower()}": len(value) for key, value in valid_instances.items()} metadata["total_instances"] = sum(len(value) for value in valid_instances.values()) - metadata["preview"] = MetadataValue.md(f"```json\n{json.dumps(valid_instances, indent=4)}\n```") + metadata["preview"] = MetadataValue.md( + f"```json\n{json.dumps(valid_instances, indent=4, ensure_ascii=False)}\n```" + ) return Output( valid_instances, metadata=metadata, diff --git a/pipelines/assets/wealth_characteristic.py b/pipelines/assets/wealth_characteristic.py index 5a0d12f..9641f5f 100644 --- a/pipelines/assets/wealth_characteristic.py +++ b/pipelines/assets/wealth_characteristic.py @@ -450,7 +450,7 @@ def wealth_characteristic_instances( ) * 100 ), - "preview": MetadataValue.md(f"```json\n{json.dumps(result, indent=4)}\n```"), + "preview": MetadataValue.md(f"```json\n{json.dumps(result, indent=4, ensure_ascii=False)}\n```"), } if not unrecognized_labels.empty: metadata["unrecognized_labels"] = MetadataValue.md(unrecognized_labels.to_markdown(index=False)) @@ -473,7 +473,9 @@ def wealth_characteristic_valid_instances( valid_instances, metadata = validate_instances(context, wealth_characteristic_instances, partition_key) metadata = {f"num_{key.lower()}": len(value) for key, value in valid_instances.items()} metadata["total_instances"] = sum(len(value) for value in valid_instances.values()) - metadata["preview"] = MetadataValue.md(f"```json\n{json.dumps(valid_instances, indent=4)}\n```") + metadata["preview"] = MetadataValue.md( + f"```json\n{json.dumps(valid_instances, indent=4, ensure_ascii=False)}\n```" + ) return Output( valid_instances, metadata=metadata, diff --git a/pipelines/assets/wild_foods.py b/pipelines/assets/wild_foods.py index c1f3a71..4ab7ece 100644 --- a/pipelines/assets/wild_foods.py +++ b/pipelines/assets/wild_foods.py @@ -181,7 +181,9 @@ def wild_foods_valid_instances( valid_instances, metadata = validate_instances(context, wild_foods_instances, partition_key) metadata = {f"num_{key.lower()}": len(value) for key, value in valid_instances.items()} metadata["total_instances"] = sum(len(value) for value in valid_instances.values()) - metadata["preview"] = MetadataValue.md(f"```json\n{json.dumps(valid_instances, indent=4)}\n```") + metadata["preview"] = MetadataValue.md( + f"```json\n{json.dumps(valid_instances, indent=4, ensure_ascii=False)}\n```" + ) return Output( valid_instances, metadata=metadata, diff --git a/pipelines/resources.py b/pipelines/resources.py index 36c3109..3260241 100644 --- a/pipelines/resources.py +++ b/pipelines/resources.py @@ -70,7 +70,7 @@ def dump_to_path(self, context: OutputContext, obj: Any, path: "UPath"): self.unlink(path) with path.open("w") as file: - file.write(json.dumps(obj, indent=4)) + file.write(json.dumps(obj, indent=4, ensure_ascii=False)) def load_from_path(self, context: InputContext, path: "UPath") -> Any: with path.open("r") as file: From 81b8ebdde87d1d73d7b3819bcee3c45f678245e3 Mon Sep 17 00:00:00 2001 From: Roger Hunwicks Date: Thu, 25 Sep 2025 12:21:17 -0400 Subject: [PATCH 03/15] Add income, expenditure and kcals totals to WealthGroup in wealth_characteristic_instances - see HEA-573 --- pipelines/__init__.py | 2 + pipelines/assets/livelihood_activity.py | 18 ++++++ pipelines/assets/wealth_characteristic.py | 76 ++++++++++++++++++++--- 3 files changed, 88 insertions(+), 8 deletions(-) diff --git a/pipelines/__init__.py b/pipelines/__init__.py index 7c49004..3a36531 100644 --- a/pipelines/__init__.py +++ b/pipelines/__init__.py @@ -23,6 +23,7 @@ livelihood_activity_instances, livelihood_activity_label_dataframe, livelihood_activity_valid_instances, + livelihood_summary_dataframe, summary_livelihood_activity_labels_dataframe, ) from .assets.other_cash_income import ( @@ -82,6 +83,7 @@ baseline_instances, community_instances, livelihood_activity_dataframe, + livelihood_summary_dataframe, livelihood_activity_label_dataframe, all_livelihood_activity_labels_dataframe, summary_livelihood_activity_labels_dataframe, diff --git a/pipelines/assets/livelihood_activity.py b/pipelines/assets/livelihood_activity.py index b20d184..5608288 100644 --- a/pipelines/assets/livelihood_activity.py +++ b/pipelines/assets/livelihood_activity.py @@ -122,6 +122,24 @@ def livelihood_activity_dataframe(config: BSSMetadataConfig, corrected_files) -> ) +@asset(partitions_def=bss_instances_partitions_def) +def livelihood_summary_dataframe(config: BSSMetadataConfig, corrected_files) -> Output[pd.DataFrame]: + """ + DataFrame of the Livelihood Activity Summary from a BSS + + The summary is at the end of the Data worksheet, after the main Livelihood Activities. + It contains the total values for income, expenditure, kcals consumed, etc. for each Wealth Group. + """ + return get_bss_dataframe( + config, + corrected_files, + "Data", + start_strings=["food summary: total (%)", "synthèse de nourriture : total (%)"], + end_strings=["wealth characteristics", "caractéristiques socio-économiques"], + header_rows=HEADER_ROWS, + ) + + @asset(partitions_def=bss_instances_partitions_def) def livelihood_activity_label_dataframe( context: AssetExecutionContext, diff --git a/pipelines/assets/wealth_characteristic.py b/pipelines/assets/wealth_characteristic.py index 9641f5f..eec7314 100644 --- a/pipelines/assets/wealth_characteristic.py +++ b/pipelines/assets/wealth_characteristic.py @@ -172,6 +172,7 @@ def wealth_characteristic_instances( context: AssetExecutionContext, config: BSSMetadataConfig, wealth_characteristic_dataframe, + livelihood_summary_dataframe, ) -> Output[dict]: """ WealthGroup and WealthGroupCharacteristicValue instances extracted from the BSS. @@ -382,17 +383,29 @@ def wealth_characteristic_instances( wealth_group_df = wealth_group_df[wealth_group_df["wealth_group_category"].notnull()] # We also need to add an extra row for each Wealth Group Category with a null Community, to create the # Baseline Wealth Groups. + baseline_wealth_group_df = wealth_group_df[wealth_group_df["community"] == wealth_group_df.iloc[0]["community"]][ + [ + "wealth_group_category_original", + "wealth_group_category", + "livelihood_zone_baseline", + ] + ].reset_index(drop=True) + baseline_wealth_group_df["community"] = None + baseline_wealth_group_df["district"] = "" + baseline_wealth_group_df["name"] = "" + baseline_wealth_group_df["full_name"] = "" + baseline_wealth_group_df["natural_key"] = baseline_wealth_group_df["wealth_group_category"].apply( + lambda wealth_group_category: [ + livelihood_zone_baseline.livelihood_zone_id, + livelihood_zone_baseline.reference_year_end_date.isoformat(), + wealth_group_category, + "", + ] + ) wealth_group_df = pd.concat( [ wealth_group_df, - wealth_group_df[wealth_group_df["community"] == wealth_group_df.iloc[0]["community"]][ - [ - "wealth_group_category_original", - "wealth_group_category", - "livelihood_zone_baseline", - "community", - ] - ].assign(community=None), + baseline_wealth_group_df, ] ) @@ -434,6 +447,53 @@ def wealth_characteristic_instances( wealth_group_df, extra_attributes_df, on=["full_name", "wealth_group_category"], how="left" ) + # We also need total income, expenditure and kcals from the summary section on the Data worksheet + # First drop any rows that aren't the header rows except the totals. The totals rows are identified by + # having a label that starts with "Total" or "Synthèse" + summary_df = livelihood_summary_dataframe[ + (livelihood_summary_dataframe.index.isin(HEADER_ROWS)) + | (livelihood_summary_dataframe["A"].str.lower().str.startswith("total")) + | (livelihood_summary_dataframe["A"].str.lower().str.startswith("synthèse")) + | (livelihood_summary_dataframe["A"].str.lower().str.startswith("food summary")) + | (livelihood_summary_dataframe["A"].str.lower().str.startswith("income summary")) + | (livelihood_summary_dataframe["A"].str.lower().str.startswith("expenditure summary")) + ] + # Check we found the expected number of rows + if summary_df.shape[0] != 6: + raise ValueError( + f'Expected 6 rows in summary_df, but found {summary_df.shape[0]}: {", ".join(summary_df.iloc[:, 0].tolist())}' + ) + # Rename the headings in column A for the totals rows + summary_df.iloc[3, 0] = "percentage_kcals" + summary_df.iloc[4, 0] = "income" + summary_df.iloc[5, 0] = "expenditure" + + # Now transpose the dataframe and then join it to the wealth groups so we can access + # the real full_name and wealth_category + summary_df = pd.merge( + summary_df.set_index("A").transpose(), + get_wealth_group_dataframe(summary_df, livelihood_zone_baseline, "Data", partition_key).set_index( + "bss_column" + ), + left_index=True, + right_index=True, + ) + + # Add the summary attributes to the Wealth Groups + wealth_group_df = pd.merge( + wealth_group_df, + summary_df[["full_name", "wealth_group_category", "income", "expenditure", "percentage_kcals"]], + on=["full_name", "wealth_group_category"], + how="left", + ) + + # Calculate the kcals_consumed + # Derive it by multiplying percentage_kcals by: + # 2100 (kcals per person per day) * 365 (days per year) * average_household_size + wealth_group_df["kcals_consumed"] = ( + wealth_group_df["percentage_kcals"] * 2100 * 365 * wealth_group_df["average_household_size"] + ) + result = { "WealthGroup": wealth_group_df.to_dict(orient="records"), "WealthGroupCharacteristicValue": wealth_group_characteristic_values, From 9c2057f4ed46ad9782703698befb8bf47c7a85a7 Mon Sep 17 00:00:00 2001 From: Roger Hunwicks Date: Thu, 25 Sep 2025 12:26:01 -0400 Subject: [PATCH 04/15] Fix bug in get_wealth_group_dataframe for WB - see HEA-573 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Francophone BSS have `à` in row 4 in the WB worksheet in the summary columns (`synthèse | de | à`). And `a` is an alias for the B/O Wealth Group Category (from Aisé). So the previous code was matching that and thinking the data was for B/O instead of part of the WB summary. --- pipelines/assets/baseline.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/pipelines/assets/baseline.py b/pipelines/assets/baseline.py index 78eb47f..67ebb1a 100644 --- a/pipelines/assets/baseline.py +++ b/pipelines/assets/baseline.py @@ -78,17 +78,15 @@ def get_wealth_group_dataframe( # In the Summary columns in the Data, Data2, Data3 worksheets, the Wealth # Group Category is in Row 4 (District)rather than Row 3 (Wealth Group Category) # so do a second lookup to update the blank rows. - # If this doesn't find any new values, then it's because in a WB worksheet - # there are no extra Wealth Group Categories on Row 4 - try: + # Note that in a WB worksheet there are no extra Wealth Group Categories on Row 4 + if worksheet_name != "WB": wealth_group_df = wealthgroupcategorylookup.do_lookup( wealth_group_df, "district", "wealth_group_category", update=True ) # Remove the duplicate wealth_group_category_original column created by the second do_lookup(), # which otherwise causes problems when trying to merge dataframes, e.g. when building the wealth_group_df. wealth_group_df = wealth_group_df.loc[:, ~wealth_group_df.columns.duplicated()] - except ValueError: - pass + # Check if there are unrecognized wealth group categories and report wealth_group_missing_category_df = wealth_group_df[ wealth_group_df["wealth_group_category"].isnull() From 13b8755206bd5976415640dc00bb2fdb22bbe01f Mon Sep 17 00:00:00 2001 From: Roger Hunwicks Date: Sat, 4 Oct 2025 17:06:40 -0500 Subject: [PATCH 05/15] Remove amd64 platform requirement - see HEA-760 --- docker/app/Dockerfile | 2 +- docker/db/Dockerfile | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/docker/app/Dockerfile b/docker/app/Dockerfile index cf47a02..61a383b 100644 --- a/docker/app/Dockerfile +++ b/docker/app/Dockerfile @@ -1,4 +1,4 @@ -FROM --platform=linux/amd64 python:3.12-bookworm as base +FROM python:3.12-bookworm as base # set up apt repositories for postgres installation RUN curl -s https://www.postgresql.org/media/keys/ACCC4CF8.asc | gpg --dearmor | tee /usr/share/keyrings/pgdg.gpg >/dev/null && \ diff --git a/docker/db/Dockerfile b/docker/db/Dockerfile index dbccb48..ea0a4f0 100644 --- a/docker/db/Dockerfile +++ b/docker/db/Dockerfile @@ -1,3 +1,5 @@ -FROM --platform=linux/amd64 postgis/postgis:17-3.5 +# Use a third party multicarch base image for compatibility with both ARM and AMD architectures +# until PostGIS fix https://github.com/postgis/docker-postgis/issues/216 +FROM ghcr.io/baosystems/postgis:17-3.5 COPY create_db.sh /docker-entrypoint-initdb.d/create_db.sh From d747ea102872c46986bba9350f9640b1d31477b3 Mon Sep 17 00:00:00 2001 From: Roger Hunwicks Date: Sat, 4 Oct 2025 17:07:20 -0500 Subject: [PATCH 06/15] Fix create_db.sh - see HEA-760 --- docker/db/create_db.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/db/create_db.sh b/docker/db/create_db.sh index fd78e59..08ac769 100755 --- a/docker/db/create_db.sh +++ b/docker/db/create_db.sh @@ -1,5 +1,5 @@ #!/bin/sh -psql --set=CLIENT=$CLIENT --set=APP=$APP --set=ENV=$ENV --set=POSTGRES_PASSWORD=$POSTGRES_PASSWORD --set=DAGSTER_PASSWORD=$DAGSTER_PASSWORD --set=CREATE_TEMPLATE=${CREATE_TEMPLATE:-false} -d postgres --echo-all << EOF +psql --set=CLIENT=$CLIENT --set=APP=$APP --set=ENV=$ENV --set=POSTGRES_PASSWORD=$POSTGRES_PASSWORD --set=CREATE_TEMPLATE=${CREATE_TEMPLATE:-false} -d postgres --echo-all << EOF \set DATABASE :CLIENT :APP :ENV \set OWNER :CLIENT :APP :ENV @@ -74,7 +74,7 @@ ALTER DEFAULT PRIVILEGES IN SCHEMA :SCHEMA GRANT SELECT ON TABLES TO :OWNER; \set DAGSTER :CLIENT :APP :ENV -CREATE ROLE :DAGSTER PASSWORD :'DAGSTER_PASSWORD' NOLOGIN CREATEDB NOCREATEROLE NOSUPERUSER; +CREATE ROLE :DAGSTER PASSWORD :'POSTGRES_PASSWORD' NOLOGIN CREATEDB NOCREATEROLE NOSUPERUSER; COMMENT ON ROLE :DAGSTER IS 'Main Dagster pipeline user for :CLIENT :PRJ :ENV'; GRANT :DAGSTER TO :OWNER; GRANT CONNECT, TEMPORARY, CREATE ON DATABASE :DATABASE TO :DAGSTER; From ec1fe099f09af2a3ce4b411dabf5ead200f5fa42 Mon Sep 17 00:00:00 2001 From: Roger Hunwicks Date: Sat, 4 Oct 2025 17:09:56 -0500 Subject: [PATCH 07/15] Remove Pyrseas - see HEA-370 We don't use Pyrseas at all these days - it is a legacy from before Django had full-featured SQL migrations. --- requirements/test.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/requirements/test.txt b/requirements/test.txt index 692ce0a..5a10e48 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -1,6 +1,4 @@ -r base.txt beautifulsoup4==4.12.2 coverage==7.2.7 -# Pyrseas==0.10.0 raises "KeyError: ('public', 'spatial_ref_sys')", --schema/--exclude-schema don't fix it. -Pyrseas==0.9.1 safety==3.6.1 From c2d658bbe18b4c604e2b2e727c049dbe1a4505c1 Mon Sep 17 00:00:00 2001 From: Roger Hunwicks Date: Sat, 4 Oct 2025 17:11:56 -0500 Subject: [PATCH 08/15] Remove redundant environment variables - see HEA-370 --- docker-compose.yml | 7 ------- 1 file changed, 7 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index e75dd65..bc76f3e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -110,14 +110,7 @@ services: MINIO_ENDPOINT_URL: http://minio:9000 SUPPORT_EMAIL_ADDRESS: ${SUPPORT_EMAIL_ADDRESS} DJANGO_MIGRATE: 1 - KILUIGI_INTERMEDIATETARGET_BACKEND_CLASS: ${KILUIGI_INTERMEDIATETARGET_BACKEND_CLASS} - KILUIGI_INTERMEDIATETARGET_ROOT_PATH: ${KILUIGI_INTERMEDIATETARGET_ROOT_PATH} - KILUIGI_FINALTARGET_BACKEND_CLASS: ${KILUIGI_FINALTARGET_BACKEND_CLASS} - KILUIGI_FINALTARGET_ROOT_PATH: ${KILUIGI_FINALTARGET_ROOT_PATH} - KILUIGI_REPORTTARGET_BACKEND_CLASS: ${KILUIGI_REPORTTARGET_BACKEND_CLASS} - KILUIGI_REPORTTARGET_ROOT_PATH: ${KILUIGI_REPORTTARGET_ROOT_PATH} GOOGLE_APPLICATION_CREDENTIALS: ${GOOGLE_APPLICATION_CREDENTIALS} - GOOGLE_ADMIN_EMAIL: ${GOOGLE_ADMIN_EMAIL} command: - --timeout=3600 - --workers=12 From fe8783579292155e0f27eaf35bbd2f62656231f1 Mon Sep 17 00:00:00 2001 From: Roger Hunwicks Date: Sat, 4 Oct 2025 17:50:13 -0500 Subject: [PATCH 09/15] Enable remote debugging using VSCode - see HEA-760 This changes allows setting LAUNCHER in .env and then using VSCode to attach to the Python process running inside the Docker container(s) --- README.md | 50 +++++++++++++++++++++++++++++ docker-compose.override.yml | 13 ++++++++ docker/app/run_dagster_daemon.sh | 7 ++-- docker/app/run_dagster_webserver.sh | 7 ++-- docker/app/run_django.sh | 5 ++- env.example | 5 +++ requirements/local.txt | 1 + 7 files changed, 83 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 7d78bda..2f29e30 100644 --- a/README.md +++ b/README.md @@ -23,3 +23,53 @@ baseline This produces a .puml file that can be rendered using a PlantUML server, either within your IDE or using a service like http://www.plantuml.com/. + +## Debugging inside Docker Containers + +The `LAUNCHER` environment sets a wrapper program around the Python process +(`gunicorn`, `dagster-daemon`, `dagster-webserver`). This can be used to +enable a debugger inside Docker Containers: + +1. Set `LAUNCHER="python3 -m debugpy --listen 0.0.0.0:5678"` in `.env` +2. Create Launch Configurations in Visual Studio Code like: + +```json + { + "name": "Python: Attach to app (Docker Container)", + "type": "debugpy", + "request": "attach", + "connect": { + "host": "localhost", + "port": 5678 + }, + "pathMappings": [ + { + "localRoot": "${workspaceFolder:hea}", + "remoteRoot": "/usr/src/app" + } + ], + "django": true, + "justMyCode": false + }, + { + "name": "Python: Attach to dagster-daemon (Docker Container)", + "type": "debugpy", + "request": "attach", + "connect": { + "host": "localhost", + "port": 5680 + }, + "pathMappings": [ + { + "localRoot": "${workspaceFolder:hea}", + "remoteRoot": "/usr/src/app" + } + ], + "django": true, + "justMyCode": false + } +``` + +3. Start the Docker containers as normal, and then use the Run and Debug +pane in Visual Studio code to launch the configuration that attaches to +the desired server. \ No newline at end of file diff --git a/docker-compose.override.yml b/docker-compose.override.yml index a1ffcc6..dd8e618 100644 --- a/docker-compose.override.yml +++ b/docker-compose.override.yml @@ -15,6 +15,7 @@ services: build: target: dev ports: + - "5678:5678" - "8000:8000" - "8888:8888" volumes: @@ -30,6 +31,9 @@ services: - ./env.example:/usr/src/app/.env environment: DJANGO_SETTINGS_MODULE: hea.settings.local + LAUNCHER: ${LAUNCHER} # e.g. "debugpy" or "ddtrace" + # Disable frozen modules warning + PYDEVD_DISABLE_FILE_VALIDATION: 1 # Put .coverage in a writable directory COVERAGE_FILE: log/.coverage command: @@ -41,6 +45,7 @@ services: restart: no ports: - "3000:3000" + - "5679:5678" volumes: - ./:/usr/src/app # Separate volumes for writable directories inside the container @@ -54,9 +59,14 @@ services: - ./env.example:/usr/src/app/.env environment: DJANGO_SETTINGS_MODULE: hea.settings.local + LAUNCHER: ${LAUNCHER} # e.g. "debugpy" or "ddtrace" + # Disable frozen modules warning + PYDEVD_DISABLE_FILE_VALIDATION: 1 dagster_daemon: restart: no + ports: + - "5680:5678" volumes: - ./:/usr/src/app # Separate volumes for writable directories inside the container @@ -70,4 +80,7 @@ services: - ./env.example:/usr/src/app/.env environment: DJANGO_SETTINGS_MODULE: hea.settings.local + LAUNCHER: ${LAUNCHER} # e.g. "debugpy" or "ddtrace" + # Disable frozen modules warning + PYDEVD_DISABLE_FILE_VALIDATION: 1 diff --git a/docker/app/run_dagster_daemon.sh b/docker/app/run_dagster_daemon.sh index 8beac63..87c46b3 100755 --- a/docker/app/run_dagster_daemon.sh +++ b/docker/app/run_dagster_daemon.sh @@ -13,5 +13,8 @@ echo Setting up logs touch log/django.log chown -R django:django log/* -echo Starting Dagster -gosu django dagster-daemon run $* \ No newline at end of file +echo Starting Dagster Daemon +if [ x"$LAUNCHER" != x"" ]; then + echo using ${LAUNCHER} +fi +gosu django ${LAUNCHER} /usr/local/bin/dagster-daemon run $* \ No newline at end of file diff --git a/docker/app/run_dagster_webserver.sh b/docker/app/run_dagster_webserver.sh index e6b55a1..5e27a35 100755 --- a/docker/app/run_dagster_webserver.sh +++ b/docker/app/run_dagster_webserver.sh @@ -13,5 +13,8 @@ echo Setting up logs touch log/django.log chown -R django:django log/* -echo Starting Dagster -gosu django dagster-webserver -h 0.0.0.0 -p 3000 -m pipelines --path-prefix /${DAGSTER_WEBSERVER_PREFIX} $* \ No newline at end of file +echo Starting Dagster Webserver +if [ x"$LAUNCHER" != x"" ]; then + echo using ${LAUNCHER} +fi +gosu django ${LAUNCHER} /usr/local/bin/dagster-webserver -h 0.0.0.0 -p 3000 -m pipelines --path-prefix /${DAGSTER_WEBSERVER_PREFIX} $* \ No newline at end of file diff --git a/docker/app/run_django.sh b/docker/app/run_django.sh index 73112fa..e6f2b48 100755 --- a/docker/app/run_django.sh +++ b/docker/app/run_django.sh @@ -40,7 +40,10 @@ touch log/django_sql.log chown -R django:django log/* echo Starting Gunicorn with DJANGO_SETTINGS_MODULE=${DJANGO_SETTINGS_MODULE} -gosu django gunicorn ${APP}.wsgi:application \ +if [ x"$LAUNCHER" != x"" ]; then + echo using ${LAUNCHER} +fi +gosu django ${LAUNCHER} /usr/local/bin/gunicorn ${APP}.wsgi:application \ --name ${APP}${ENV} \ --config $(dirname $(readlink -f "$0"))/gunicorn_config.py \ $* 2>&1 diff --git a/env.example b/env.example index 680e231..3c4775f 100644 --- a/env.example +++ b/env.example @@ -48,3 +48,8 @@ BSS_METADATA_WORKBOOK='gdrive://Database Design/BSS Metadata' # 15XVXFjbom1sScV BSS_METADATA_STORAGE_OPTIONS='{"token": "service_account", "access": "read_only", "creds": ${GOOGLE_APPLICATION_CREDENTIALS}, "root_file_id": "0AOJ0gJ8sjnO7Uk9PVA"}' BSS_FILES_FOLDER='gdrive://Discovery Folder/Baseline Storage Sheets (BSS)' BSS_FILES_STORAGE_OPTIONS='{"token": "service_account", "access": "read_only", "creds": ${GOOGLE_APPLICATION_CREDENTIALS}, "root_file_id": "0AOJ0gJ8sjnO7Uk9PVA"}' + +# LAUNCHER can be used to configure a wrapper program around the Python process +# For example, to add ddtrace or debugpy +# Use the VSCode debugger as a launcher +# LAUNCHER = "python3 -m debugpy --listen 0.0.0.0:5679" \ No newline at end of file diff --git a/requirements/local.txt b/requirements/local.txt index 4e95b36..8cd2465 100644 --- a/requirements/local.txt +++ b/requirements/local.txt @@ -1,2 +1,3 @@ -r test.txt -r lint.txt +debugpy \ No newline at end of file From 8c805dd7806672c8a78714f1ca5d6eb2201a689c Mon Sep 17 00:00:00 2001 From: Roger Hunwicks Date: Fri, 10 Oct 2025 13:05:15 -0400 Subject: [PATCH 10/15] Allow LivelihoodSummary labels in ActivityLabel - see HEA-572 --- ...er_activitylabel_activity_type_and_more.py | 71 +++++++++++++++++++ apps/metadata/models.py | 17 +++-- pipelines/jobs/metadata.py | 10 +-- 3 files changed, 88 insertions(+), 10 deletions(-) create mode 100644 apps/metadata/migrations/0012_alter_activitylabel_activity_type_and_more.py diff --git a/apps/metadata/migrations/0012_alter_activitylabel_activity_type_and_more.py b/apps/metadata/migrations/0012_alter_activitylabel_activity_type_and_more.py new file mode 100644 index 0000000..40ca675 --- /dev/null +++ b/apps/metadata/migrations/0012_alter_activitylabel_activity_type_and_more.py @@ -0,0 +1,71 @@ +# Generated by Django 5.2.6 on 2025-10-08 22:47 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("metadata", "0011_alter_activitylabel_additional_identifier"), + ] + + operations = [ + migrations.AlterField( + model_name="activitylabel", + name="activity_type", + field=models.CharField( + choices=[ + ("LivelihoodActivity", "Livelihood Activity"), + ("OtherCashIncome", "Other Cash Income"), + ("WildFoods", "Wild Foods, Fishing or Hunting"), + ("LivelihoodSummary", "Livelihood Summary"), + ], + default="LivelihoodActivity", + help_text="The type of Livelihood Activity the label is for: either a general Livelihood Activity, or an Other Cash Income activity from the 'Data2' worksheet, or a Wild Foods, Fishing or Hunting activity from the 'Data3' worksheet, or a label from the 'Summary' section of the 'Data' worksheet.", + max_length=20, + verbose_name="Activity Type", + ), + ), + migrations.AlterField( + model_name="activitylabel", + name="status", + field=models.CharField( + blank=True, + choices=[ + ("Regular Expression", "Processed by Regular Expression"), + ("Override", "Override automatically recognized metadata"), + ("Discussion", "Under Discussion"), + ("Correct BSS", "Correct the BSS"), + ("Ignore", "Ignore this label and associated data in the row"), + ], + max_length=20, + verbose_name="Status", + ), + ), + migrations.AlterField( + model_name="activitylabel", + name="strategy_type", + field=models.CharField( + blank=True, + choices=[ + ("MilkProduction", "Milk Production"), + ("ButterProduction", "Butter Production"), + ("MeatProduction", "Meat Production"), + ("LivestockSale", "Livestock Sale"), + ("CropProduction", "Crop Production"), + ("FoodPurchase", "Food Purchase"), + ("PaymentInKind", "Payment in Kind"), + ("ReliefGiftOther", "Relief, Gift or Other Food"), + ("Hunting", "Hunting"), + ("Fishing", "Fishing"), + ("WildFoodGathering", "Wild Food Gathering"), + ("OtherCashIncome", "Other Cash Income"), + ("OtherPurchase", "Other Purchase"), + ("LivestockProduction", "Livestock Production"), + ], + help_text="The type of livelihood strategy, such as crop production, or wild food gathering.", + max_length=30, + verbose_name="Strategy Type", + ), + ), + ] diff --git a/apps/metadata/models.py b/apps/metadata/models.py index e8bb949..bf0d939 100644 --- a/apps/metadata/models.py +++ b/apps/metadata/models.py @@ -393,11 +393,15 @@ class LabelStatus(models.TextChoices): OVERRIDE = "Override", _("Override automatically recognized metadata") DISCUSSION = "Discussion", _("Under Discussion") CORRECT_BSS = "Correct BSS", _("Correct the BSS") + IGNORE = "Ignore", _("Ignore this label and associated data in the row") class LivelihoodActivityType(models.TextChoices): LIVELIHOOD_ACTIVITY = "LivelihoodActivity", _("Livelihood Activity") # Labels from the 'Data' worksheet OTHER_CASH_INCOME = "OtherCashIncome", _("Other Cash Income") # Labels from the 'Data2' worksheet - WILD_FOODS = "WildFoods", _("Wild Foods") # Labels from the 'Data3' worksheet + WILD_FOODS = "WildFoods", _("Wild Foods, Fishing or Hunting") # Labels from the 'Data3' worksheet + LIVELIHOOD_SUMMARY = "LivelihoodSummary", _( + "Livelihood Summary" + ) # Labels from the 'Summary' section of the 'Data' worksheet activity_label = common_models.NameField(max_length=200, verbose_name=_("Activity Label")) activity_type = models.CharField( @@ -406,9 +410,9 @@ class LivelihoodActivityType(models.TextChoices): choices=LivelihoodActivityType.choices, default=LivelihoodActivityType.LIVELIHOOD_ACTIVITY, help_text=_( - "The type of Livelihood Activity, either a general Livelihood Activity, or an Other Cash Income " - "activity from the 'Data2' worksheet, or a Wild Foods, Fishing or Hunting activity from the " - "'Data3' worksheet." + "The type of Livelihood Activity the label is for: either a general Livelihood Activity, or an Other Cash " + "Income activity from the 'Data2' worksheet, or a Wild Foods, Fishing or Hunting activity from the " + "'Data3' worksheet, or a label from the 'Summary' section of the 'Data' worksheet." ), ) status = models.CharField(blank=True, max_length=20, choices=LabelStatus.choices, verbose_name=_("Status")) @@ -420,7 +424,10 @@ class LivelihoodActivityType(models.TextChoices): strategy_type = models.CharField( max_length=30, blank=True, - choices=LivelihoodStrategyType.choices, + # We add an additional choice for LivestockProduction here, which is only valid when + # activity_type is LivelihoodSummary. LivestockProduction is the total of MeatProduction, + # MilkProduction and ButterProduction, and is used in the Summary section of the Data worksheet only + choices=LivelihoodStrategyType.choices + [("LivestockProduction", _("Livestock Production"))], # type: ignore verbose_name=_("Strategy Type"), help_text=_("The type of livelihood strategy, such as crop production, or wild food gathering."), ) diff --git a/pipelines/jobs/metadata.py b/pipelines/jobs/metadata.py index ee1b681..089428f 100644 --- a/pipelines/jobs/metadata.py +++ b/pipelines/jobs/metadata.py @@ -35,7 +35,7 @@ from metadata.models import ActivityLabel # NOQA: E402 -def load_metadata_for_model(context: OpExecutionContext, model: models.Model, df: pd.DataFrame): +def load_metadata_for_model(context: OpExecutionContext, sheet_name: str, model: models.Model, df: pd.DataFrame): """ Load the metadata from a single worksheet, passed as a DataFrame, into a Django model. """ @@ -112,7 +112,7 @@ def load_metadata_for_model(context: OpExecutionContext, model: models.Model, df existing_instances.values(), fields=record.keys(), ) - context.log.info(f"Updated {num_instances} {model_name} instances") + context.log.info(f"Updated {num_instances} {sheet_name} instances") else: if model_name == "SourceOrganization": @@ -140,7 +140,7 @@ def load_metadata_for_model(context: OpExecutionContext, model: models.Model, df update_fields=[k for k in record if k not in id_fields], unique_fields=id_fields, ) - context.log.info(f"Created or updated {len(instances)} {model_name} instances") + context.log.info(f"Created or updated {len(instances)} {sheet_name} instances") @op @@ -164,7 +164,7 @@ def load_all_metadata(context: OpExecutionContext, config: ReferenceDataConfig): # Iterate over the sheets in the ReferenceData workbook, in reverse order (because the Label sheets that # need Subject Matter Expert input are at beginning, and depend on the sheets at the end). for sheet_name in reversed(sheet_names): - if sheet_name in ["ActivityLabel", "OtherCashIncomeLabel", "WildFoodsLabel"]: + if sheet_name in ["ActivityLabel", "OtherCashIncomeLabel", "WildFoodsLabel", "SummaryLabel"]: model = ActivityLabel else: # Check whether the ReferenceData worksheet matches a Django model. @@ -179,7 +179,7 @@ def load_all_metadata(context: OpExecutionContext, config: ReferenceDataConfig): # If we found a model, then update the model from the contents of the Reference Data worksheet df = pd.read_excel(f, sheet_name).fillna("") try: - load_metadata_for_model(context, model, df) + load_metadata_for_model(context, sheet_name, model, df) except Exception as e: raise RuntimeError("Failed to create/update %s" % sheet_name) from e From bf70f686767d81359c285a78a8568a38df631bb2 Mon Sep 17 00:00:00 2001 From: Roger Hunwicks Date: Fri, 10 Oct 2025 13:05:41 -0400 Subject: [PATCH 11/15] Keep row order in dataframe samples - see HEA-572 --- pipelines/assets/base.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pipelines/assets/base.py b/pipelines/assets/base.py index 23760a1..faa1eb1 100644 --- a/pipelines/assets/base.py +++ b/pipelines/assets/base.py @@ -417,7 +417,7 @@ def get_bss_dataframe( df.loc[:, "B":].apply(lambda row: sum((row != 0) & (row != "")), axis="columns").sum() ), "preview": MetadataValue.md(df.head(config.preview_rows).to_markdown()), - "sample": MetadataValue.md(sample_df.sample(sample_rows).to_markdown()), + "sample": MetadataValue.md(sample_df.sample(sample_rows).sort_index().to_markdown()), }, ) @@ -477,7 +477,7 @@ def get_bss_label_dataframe( "num_summaries": int(label_df["in_summary"].sum()), # Escape the ~ in the partition_key, otherwise it is rendered as strikethrough "preview": MetadataValue.md(label_df.head(config.preview_rows).to_markdown().replace("~", "\\~")), - "sample": MetadataValue.md(sample_df.sample(sample_rows).to_markdown().replace("~", "\\~")), + "sample": MetadataValue.md(sample_df.sample(sample_rows).sort_index().to_markdown().replace("~", "\\~")), }, ) @@ -498,7 +498,7 @@ def get_all_bss_labels_dataframe( # Escape the ~ in the partition_key, otherwise it is rendered as strikethrough "preview": MetadataValue.md(df.head(config.preview_rows).to_markdown().replace("~", "\\~")), "sample": MetadataValue.md( - df[df["in_summary"]].sample(config.preview_rows).to_markdown().replace("~", "\\~") + df[df["in_summary"]].sample(config.preview_rows).sort_index().to_markdown().replace("~", "\\~") ), }, ) @@ -587,7 +587,8 @@ def translate_label(label, langs): label_metadata_df = pd.DataFrame.from_records(queryset) # Merge the label metadata into the dataframe - df = df.merge(label_metadata_df, left_on="label", right_on="label", how="left") + if not label_metadata_df.empty: + df = df.merge(label_metadata_df, left_on="label", right_on="label", how="left") # Rename the columns to match what we need in the GSheet when we run jobs.metadata.load_all_metadata df = df.rename( From b8b41b0466b276f9e29a64557081b8fa5d1185a1 Mon Sep 17 00:00:00 2001 From: Roger Hunwicks Date: Fri, 10 Oct 2025 20:31:04 -0400 Subject: [PATCH 12/15] Add pct_income_recognized, etc to Instances metadata - see HEA-572 --- pipelines/__init__.py | 8 +- pipelines/assets/livelihood_activity.py | 316 +++++++++++++++++++++--- pipelines/assets/other_cash_income.py | 20 +- pipelines/assets/wild_foods.py | 20 +- 4 files changed, 302 insertions(+), 62 deletions(-) diff --git a/pipelines/__init__.py b/pipelines/__init__.py index 3a36531..dc6a54b 100644 --- a/pipelines/__init__.py +++ b/pipelines/__init__.py @@ -17,6 +17,7 @@ ) from .assets.livelihood_activity import ( all_livelihood_activity_labels_dataframe, + all_livelihood_summary_labels_dataframe, imported_livelihood_activities, livelihood_activity_dataframe, livelihood_activity_fixture, @@ -25,6 +26,8 @@ livelihood_activity_valid_instances, livelihood_summary_dataframe, summary_livelihood_activity_labels_dataframe, + livelihood_summary_label_dataframe, + summary_livelihood_summary_labels_dataframe, ) from .assets.other_cash_income import ( all_other_cash_income_labels_dataframe, @@ -83,10 +86,13 @@ baseline_instances, community_instances, livelihood_activity_dataframe, - livelihood_summary_dataframe, livelihood_activity_label_dataframe, all_livelihood_activity_labels_dataframe, summary_livelihood_activity_labels_dataframe, + livelihood_summary_dataframe, + livelihood_summary_label_dataframe, + all_livelihood_summary_labels_dataframe, + summary_livelihood_summary_labels_dataframe, livelihood_activity_instances, livelihood_activity_valid_instances, livelihood_activity_fixture, diff --git a/pipelines/assets/livelihood_activity.py b/pipelines/assets/livelihood_activity.py index fdd2b5b..7878d2a 100644 --- a/pipelines/assets/livelihood_activity.py +++ b/pipelines/assets/livelihood_activity.py @@ -78,6 +78,7 @@ django.setup() from baseline.models import ( # NOQA: E402 + LivelihoodActivity, LivelihoodStrategy, LivelihoodZoneBaseline, MilkProduction, @@ -98,6 +99,7 @@ ActivityLabel.LivelihoodActivityType.LIVELIHOOD_ACTIVITY: "Data", ActivityLabel.LivelihoodActivityType.OTHER_CASH_INCOME: "Data2", ActivityLabel.LivelihoodActivityType.WILD_FOODS: "Data3", + ActivityLabel.LivelihoodActivityType.LIVELIHOOD_SUMMARY: "Data", } @@ -122,13 +124,49 @@ def livelihood_activity_dataframe(config: BSSMetadataConfig, corrected_files) -> ) +@asset(partitions_def=bss_instances_partitions_def) +def livelihood_activity_label_dataframe( + context: AssetExecutionContext, + config: BSSMetadataConfig, + livelihood_activity_dataframe, +) -> Output[pd.DataFrame]: + """ + Dataframe of Livelihood Activity Label References for a single BSS. + """ + return get_bss_label_dataframe( + context, config, livelihood_activity_dataframe, "livelihood_activity_dataframe", len(HEADER_ROWS) + ) + + +@asset(io_manager_key="dataframe_csv_io_manager") +def all_livelihood_activity_labels_dataframe( + config: BSSMetadataConfig, livelihood_activity_label_dataframe: dict[str, pd.DataFrame] +) -> Output[pd.DataFrame]: + """ + Combined dataframe of the Livelihood Activity labels in use across all BSSs. + """ + return get_all_bss_labels_dataframe(config, livelihood_activity_label_dataframe) + + +@asset(io_manager_key="dataframe_csv_io_manager") +def summary_livelihood_activity_labels_dataframe( + config: BSSMetadataConfig, all_livelihood_activity_labels_dataframe: pd.DataFrame +) -> Output[pd.DataFrame]: + """ + Summary of the Livelihood Activity labels in use across all BSSs. + """ + return get_summary_bss_label_dataframe( + config, all_livelihood_activity_labels_dataframe, ActivityLabel.LivelihoodActivityType.LIVELIHOOD_ACTIVITY + ) + + @asset(partitions_def=bss_instances_partitions_def) def livelihood_summary_dataframe(config: BSSMetadataConfig, corrected_files) -> Output[pd.DataFrame]: """ DataFrame of the Livelihood Activity Summary from a BSS - The summary is at the end of the Data worksheet, after the main Livelihood Activities. - It contains the total values for income, expenditure, kcals consumed, etc. for each Wealth Group. + The summary is at the beginning of the Data worksheet, before the main Livelihood Activities. + It contains the total values for income, expenditure, kcals consumed, etc. by Strategy Type for each Wealth Group. """ return get_bss_dataframe( config, @@ -141,38 +179,38 @@ def livelihood_summary_dataframe(config: BSSMetadataConfig, corrected_files) -> @asset(partitions_def=bss_instances_partitions_def) -def livelihood_activity_label_dataframe( +def livelihood_summary_label_dataframe( context: AssetExecutionContext, config: BSSMetadataConfig, - livelihood_activity_dataframe, + livelihood_summary_dataframe, ) -> Output[pd.DataFrame]: """ - Dataframe of Livelihood Activity Label References + Dataframe of Livelihood Summary Label References for a single BSS """ return get_bss_label_dataframe( - context, config, livelihood_activity_dataframe, "livelihood_activity_dataframe", len(HEADER_ROWS) + context, config, livelihood_summary_dataframe, "livelihood_summary_dataframe", len(HEADER_ROWS) ) @asset(io_manager_key="dataframe_csv_io_manager") -def all_livelihood_activity_labels_dataframe( - config: BSSMetadataConfig, livelihood_activity_label_dataframe: dict[str, pd.DataFrame] +def all_livelihood_summary_labels_dataframe( + config: BSSMetadataConfig, livelihood_summary_label_dataframe: dict[str, pd.DataFrame] ) -> Output[pd.DataFrame]: """ - Combined dataframe of the Livelihood Activity labels in use across all BSSs. + Combined dataframe of the Livelihood Summary labels in use across all BSSs. """ - return get_all_bss_labels_dataframe(config, livelihood_activity_label_dataframe) + return get_all_bss_labels_dataframe(config, livelihood_summary_label_dataframe) @asset(io_manager_key="dataframe_csv_io_manager") -def summary_livelihood_activity_labels_dataframe( - config: BSSMetadataConfig, all_livelihood_activity_labels_dataframe: pd.DataFrame +def summary_livelihood_summary_labels_dataframe( + config: BSSMetadataConfig, all_livelihood_summary_labels_dataframe: pd.DataFrame ) -> Output[pd.DataFrame]: """ - Summary of the Livelihood Activity labels in use across all BSSs. + Summary of the Livelihood Summary labels in use across all BSSs. """ return get_summary_bss_label_dataframe( - config, all_livelihood_activity_labels_dataframe, ActivityLabel.LivelihoodActivityType.LIVELIHOOD_ACTIVITY + config, all_livelihood_summary_labels_dataframe, ActivityLabel.LivelihoodActivityType.LIVELIHOOD_ACTIVITY ) @@ -314,8 +352,14 @@ def get_all_label_attributes(labels: pd.Series, activity_type: str, country_code The country_code parameter is optional so that this function can be used to test individual labels, but it should be provided when processing a BSS because the Season lookup is country-specific. """ + # Clear caches for the functions, so that we use the lastest data from the database + get_label_attributes.cache_clear() + get_livelihood_activity_label_map.cache_clear() + # Prepare the lookups, so they cache the individual results - classifiedproductlookup = ClassifiedProductLookup() + classifiedproductlookup = ClassifiedProductLookup( + require_match=False # It is possible that there won't be any Product matches, e.g. for LivelihoodSummary labels + ) unitofmeasurelookup = UnitOfMeasureLookup( require_match=False # It is possible that there won't be any Unit of Measure matches, e.g. for OtherCashIncome ) @@ -397,7 +441,7 @@ def get_instances_from_dataframe( (df["A"].iloc[num_header_rows:] != "") & (all_label_attributes.iloc[num_header_rows:, 0].isna()) ] .groupby("A") - .apply(lambda x: ", ".join(x.index.astype(str))) + .apply(lambda x: ", ".join(x.index.astype(str)), include_groups=False) ) if unrecognized_labels.empty: unrecognized_labels = pd.DataFrame(columns=["label", "rows"]) @@ -455,6 +499,10 @@ def get_instances_from_dataframe( # Ignore rows that don't contain any relevant data (or which aren't recognized by get_label_attributes) continue + # When we process the values for the LivelihoodActivity records, we need to know the actual attribute + # that the values in this row are for + activity_attribute = label_attributes["attribute"] + if label_attributes["is_start"]: # We are starting a new livelihood activity, so append the previous livelihood strategy # to the list, provided that it has at least one Livelihood Activity where there is some income, @@ -497,6 +545,23 @@ def get_instances_from_dataframe( "Found Livelihood Activities from row %s, but there is no Livelihood Strategy defined." % row ) + # Copy the attribute from the previous livelihood strategy if this is a Livelihood Summary and the + # attribute hasn't been set by the label_attributes. + if ( + activity_type == ActivityLabel.LivelihoodActivityType.LIVELIHOOD_SUMMARY + and not activity_attribute + and previous_livelihood_strategy + and previous_livelihood_activities_for_strategy + ): + for attribute in ["income", "expenditure", "percentage_kcals"]: + if attribute in previous_livelihood_activities_for_strategy[0]: + activity_attribute = attribute + break + if not activity_attribute: + raise ValueError( + f"Could not determine attribute for Livelihood Summary strategy from row {row}" + ) + # Copy the product_id for MilkProduction and ButterProduction from the previous livelihood strategy # if necessary. if ( @@ -692,8 +757,10 @@ def get_instances_from_dataframe( # Check the Livelihood Strategy has a Season if one is required. # (e.g. for MilkProduction and ButterProduction). - if livelihood_strategy["strategy_type"] in LivelihoodStrategy.REQUIRES_SEASON and ( - "season" not in livelihood_strategy or not livelihood_strategy["season"] + if ( + livelihood_strategy["strategy_type"] in LivelihoodStrategy.REQUIRES_SEASON + and activity_type != ActivityLabel.LivelihoodActivityType.LIVELIHOOD_SUMMARY + and ("season" not in livelihood_strategy or not livelihood_strategy["season"]) ): strategy_is_valid = False # Include the header rows so that we can see which Wealth Groups are affected @@ -714,8 +781,10 @@ def get_instances_from_dataframe( errors.append(error_message) # Check the Livelihood Strategy has a product_id if one is required. - if livelihood_strategy["strategy_type"] in LivelihoodStrategy.REQUIRES_PRODUCT and ( - "product_id" not in livelihood_strategy or not livelihood_strategy["product_id"] + if ( + livelihood_strategy["strategy_type"] in LivelihoodStrategy.REQUIRES_PRODUCT + and activity_type != ActivityLabel.LivelihoodActivityType.LIVELIHOOD_SUMMARY + and ("product_id" not in livelihood_strategy or not livelihood_strategy["product_id"]) ): strategy_is_valid = False # Include the header rows so that we can see which Wealth Groups are affected @@ -760,7 +829,14 @@ def get_instances_from_dataframe( if label_attributes["strategy_type"]: strategy_type = label_attributes["strategy_type"] # Get the valid fields names so we can determine if the attribute is stored in LivelihoodActivity.extra - model = class_from_name(f"baseline.models.{strategy_type}") + # LivestockProduction is an artificial, composite strategy type representing the sum of + # MilkProduction, ButterProduction and MeatProduction. It isn't stored in the database, and it only + # requires income, expenditure and kcals_consumed, so we use the base LivelihoodActivity model. + model = ( + LivelihoodActivity + if strategy_type == "LivestockProduction" + else class_from_name(f"baseline.models.{strategy_type}") + ) activity_field_names = [field.name for field in model._meta.concrete_fields] # Also include values that point directly to the primary key of related objects activity_field_names += [ @@ -769,7 +845,10 @@ def get_instances_from_dataframe( if field.get_attname() not in activity_field_names ] - if not strategy_type: + # Raise an error if we find attributes without a strategy_type being set, unless we are processing + # the Livelihood Summary section, where we set the attribute from the section heading without wanting + # to save the actual data. + if not strategy_type and activity_type != ActivityLabel.LivelihoodActivityType.LIVELIHOOD_SUMMARY: raise ValueError( "Found attributes %s from row %s without a strategy_type set" % (label_attributes, row) ) @@ -826,9 +905,20 @@ def get_instances_from_dataframe( # We are not starting a new Livelihood Strategy, but there may be # additional attributes that need to be added to the current one. if not livelihood_strategy: + additional_attributes = [label_attributes["attribute"]] if label_attributes["attribute"] else [] + for attribute in [ + "is_start", + "product_id", + "unit_of_measure_id", + "season", + "additional_identifier", + "notes", + ]: + if label_attributes[attribute]: + additional_attributes.append(attribute) raise ValueError( - "Found additional attributes %s from row %s without an existing LivelihoodStrategy" - % (label_attributes, row) + "Found attributes from label '%s' in row %s without an existing LivelihoodStrategy: %s" + % (label_attributes["activity_label"], row, ", ".join(additional_attributes)) ) # Only update expected keys, and only if we found a value for that attribute. @@ -881,10 +971,6 @@ def get_instances_from_dataframe( # Update the LivelihoodActivity records if any(value for value in df.loc[row, "B":].astype(str).str.strip()): - # When we get the values for the LivelihoodActivity records, we just want the actual attribute - # that the values in the row are for - activity_attribute = label_attributes["attribute"] - # Some labels are ambiguous and map to different attributes depending on the strategy_type. if activity_attribute == "quantity_produced_or_purchased": if livelihood_strategy["strategy_type"] == LivelihoodStrategyType.CROP_PRODUCTION: @@ -1032,9 +1118,16 @@ def get_instances_from_dataframe( ) ) - # Add the attribute to the LivelihoodStrategy.attribute_rows - else: + # Add the attribute to the LivelihoodStrategy.attribute_rows, assuming we have a strategy_type. + # Some rows may have set attributes without setting a strategy_type, for example in the + # Livelihood Summary section. + elif strategy_type: livelihood_strategy["attribute_rows"][activity_attribute] = row + # Assertion to prevent linting from complaining about possible None values + assert activity_field_names is not None, ( + "Found activity_attribute %s from row %s, but there is no Livelihood Strategy and therefore no activity_field_names defined." + % (activity_attribute, row) + ) for i, value in enumerate(df.loc[row, "B":]): # Some attributes are stored in LivelihoodActivity.extra rather than individual fields. if activity_attribute not in activity_field_names: @@ -1090,34 +1183,179 @@ def get_instances_from_dataframe( ) -@asset(partitions_def=bss_instances_partitions_def, io_manager_key="json_io_manager") -def livelihood_activity_instances( +def get_annotated_instances_from_dataframe( context: AssetExecutionContext, - livelihood_activity_dataframe, + livelihood_activity_dataframe: pd.DataFrame, + livelihood_summary_dataframe: pd.DataFrame, + activity_type: str, + num_header_rows: int, ) -> Output[dict]: """ - LivelhoodStrategy and LivelihoodActivity instances extracted from the BSS. + Get the LivelhoodStrategy and LivelihoodActivity instances from the BSS, annotated with completeness information. + + Completeness of the recognized detail livelihood activities is calculated as a percentage of the total income, + expenditure and kcals_consumed reported in the livelihood summary section at the top of the Data worksheet. """ # Find the metadata for this BSS partition_key = context.asset_partition_key_for_output() livelihood_zone_baseline = LivelihoodZoneBaseline.objects.get_by_natural_key(*partition_key.split("~")[1:]) + # Get the detail LivelihoodStrategy and LivelihoodActivity instances output = get_instances_from_dataframe( context, livelihood_activity_dataframe, livelihood_zone_baseline, - ActivityLabel.LivelihoodActivityType.LIVELIHOOD_ACTIVITY, - len(HEADER_ROWS), + activity_type, + num_header_rows, partition_key, ) + + if output.value["LivelihoodActivity"]: + # Get the summary instances + reported_summary_output = get_instances_from_dataframe( + context, + livelihood_summary_dataframe, + livelihood_zone_baseline, + ActivityLabel.LivelihoodActivityType.LIVELIHOOD_SUMMARY, + # The summary section is on the Data worksheet, so has the same number of header rows + # regardless of the activity_type + len(HEADER_ROWS), + partition_key, + ) + + # Annotate the output metadata with completeness information + # Get the summary dataframe, grouped by strategy_type + summary_df = pd.DataFrame(reported_summary_output.value["LivelihoodActivity"]) + summary_df = ( + summary_df[["strategy_type", "income", "expenditure", "kcals_consumed"]].groupby("strategy_type").sum() + ) + + # Add the recognized Livelihood Activities, also grouped by strategy_type + recognized_activities_df = pd.DataFrame(output.value["LivelihoodActivity"]) + for column in ["income", "expenditure", "kcals_consumed"]: + if column in recognized_activities_df: + recognized_activities_df[column] = pd.to_numeric( + recognized_activities_df[column], errors="coerce" + ).fillna(0) + else: + recognized_activities_df[column] = 0 + summary_df = summary_df.join( + recognized_activities_df[["strategy_type", "income", "expenditure", "kcals_consumed"]] + .groupby("strategy_type") + .sum(), + on="strategy_type", + lsuffix="_summary", + rsuffix="_recognized", + ).fillna(0) + + # Add a totals row at the end + summary_df.loc["Total"] = summary_df.sum(numeric_only=True) + + # Add completeness percentages + summary_df = summary_df.round(0) + summary_df["income_completeness"] = summary_df.apply( + lambda row: ( + round(row["income_recognized"] / row["income_summary"] * 100, 1) + if row["income_summary"] > 0 + else pd.NA + ), + axis=1, + ) + summary_df["expenditure_completeness"] = summary_df.apply( + lambda row: ( + round(row["expenditure_recognized"] / row["expenditure_summary"] * 100, 1) + if row["expenditure_summary"] > 0 + else pd.NA + ), + axis=1, + ) + summary_df["kcals_consumed_completeness"] = summary_df.apply( + lambda row: ( + round(row["kcals_consumed_recognized"] / row["kcals_consumed_summary"] * 100, 1) + if row["kcals_consumed_summary"] > 0 + else pd.NA + ), + axis=1, + ) + # Format the numbers as integers, for better display in the markdown table + for column in ["income", "expenditure", "kcals_consumed"]: + for source in ["recognized", "summary"]: + summary_df[f"{column}_{source}"] = summary_df.apply( + lambda row: ( + int(row[f"{column}_{source}"]) + if (pd.notna(row[f"{column}_recognized"]) and row[f"{column}_recognized"] > 0) + or (pd.notna(row[f"{column}_summary"]) and row[f"{column}_summary"] > 0) + else pd.NA + ), + axis="columns", + ) + + # Transpose and reorder the columns and rows + # Sort the rows so that Strategy Types appear in the same order as in the BSS + ordered_strategy_types = ["LivestockProduction"] + [x for x in LivelihoodStrategyType] + ["Total"] + summary_df["strategy_type"] = pd.Categorical( + summary_df.reset_index(drop=False)["strategy_type"], + categories=ordered_strategy_types, + ordered=True, + ) + summary_df = summary_df.reset_index(drop=True).sort_values(by="strategy_type") + summary_df = summary_df[ + [ + "strategy_type", + "kcals_consumed_recognized", + "kcals_consumed_summary", + "kcals_consumed_completeness", + "income_recognized", + "income_summary", + "income_completeness", + "expenditure_recognized", + "expenditure_summary", + "expenditure_completeness", + ] + ] + summary_df = summary_df.set_index("strategy_type").transpose() + + # Add the completeness summary to the output metadata + output.metadata["pct_kcals_consumed_recognized"] = float( + summary_df.loc["kcals_consumed_completeness", "Total"] + ) + output.metadata["pct_income_recognized"] = float(summary_df.loc["income_completeness", "Total"]) + output.metadata["pct_expenditure_recognized"] = float(summary_df.loc["expenditure_completeness", "Total"]) + output.metadata["completeness_summary"] = MetadataValue.md( + summary_df.replace(pd.NA, None).to_markdown(floatfmt=",.0f") + ) + + # Move the preview and metadata item to the end of the dict + output.metadata["preview"] = output.metadata.pop("preview") + + return output + + +@asset(partitions_def=bss_instances_partitions_def, io_manager_key="json_io_manager") +def livelihood_activity_instances( + context: AssetExecutionContext, + livelihood_activity_dataframe: pd.DataFrame, + livelihood_summary_dataframe: pd.DataFrame, +) -> Output[dict]: + """ + LivelhoodStrategy and LivelihoodActivity instances extracted from the BSS. + """ + output = get_annotated_instances_from_dataframe( + context, + livelihood_activity_dataframe, + livelihood_summary_dataframe, + ActivityLabel.LivelihoodActivityType.LIVELIHOOD_SUMMARY, + len(HEADER_ROWS), + ) + return output @asset(partitions_def=bss_instances_partitions_def, io_manager_key="json_io_manager") def livelihood_activity_valid_instances( context: AssetExecutionContext, - livelihood_activity_instances, - wealth_characteristic_instances, + livelihood_activity_instances: dict, + wealth_characteristic_instances: dict, ) -> Output[dict]: """ Valid LivelhoodStrategy and LivelihoodActivity instances from a BSS, ready to be loaded via a Django fixture. @@ -1147,7 +1385,7 @@ def livelihood_activity_valid_instances( def livelihood_activity_fixture( context: AssetExecutionContext, config: BSSMetadataConfig, - livelihood_activity_valid_instances, + livelihood_activity_valid_instances: dict, ) -> Output[list[dict]]: """ Django fixture for the Livelihood Activities from a BSS. diff --git a/pipelines/assets/other_cash_income.py b/pipelines/assets/other_cash_income.py index edf3957..a73a0ae 100644 --- a/pipelines/assets/other_cash_income.py +++ b/pipelines/assets/other_cash_income.py @@ -54,7 +54,7 @@ get_summary_bss_label_dataframe, ) from .fixtures import get_fixture_from_instances, import_fixture, validate_instances -from .livelihood_activity import get_instances_from_dataframe +from .livelihood_activity import get_annotated_instances_from_dataframe # set the default Django settings module os.environ.setdefault("DJANGO_SETTINGS_MODULE", "hea.settings.production") @@ -128,33 +128,31 @@ def summary_other_cash_income_labels_dataframe( @asset(partitions_def=bss_instances_partitions_def, io_manager_key="json_io_manager") def other_cash_income_instances( context: AssetExecutionContext, - other_cash_income_dataframe, + other_cash_income_dataframe: pd.DataFrame, + livelihood_summary_dataframe: pd.DataFrame, ) -> Output[dict]: """ LivelhoodStrategy and LivelihoodActivity instances extracted from the BSS. """ - partition_key = context.asset_partition_key_for_output() - livelihood_zone_baseline = LivelihoodZoneBaseline.objects.get_by_natural_key(*partition_key.split("~")[1:]) - if other_cash_income_dataframe.empty: output = {} - output = get_instances_from_dataframe( + output = get_annotated_instances_from_dataframe( context, other_cash_income_dataframe, - livelihood_zone_baseline, + livelihood_summary_dataframe, ActivityLabel.LivelihoodActivityType.OTHER_CASH_INCOME, len(HEADER_ROWS), - partition_key, ) + return output @asset(partitions_def=bss_instances_partitions_def, io_manager_key="json_io_manager") def other_cash_income_valid_instances( context: AssetExecutionContext, - other_cash_income_instances, - wealth_characteristic_instances, + other_cash_income_instances: dict, + wealth_characteristic_instances: dict, ) -> Output[dict]: """ Valid LivelhoodStrategy and LivelihoodActivity instances from a BSS, ready to be loaded via a Django fixture. @@ -184,7 +182,7 @@ def other_cash_income_valid_instances( def other_cash_income_fixture( context: AssetExecutionContext, config: BSSMetadataConfig, - other_cash_income_valid_instances, + other_cash_income_valid_instances: dict, ) -> Output[list[dict]]: """ Django fixture for the Livelihood Activities from a BSS. diff --git a/pipelines/assets/wild_foods.py b/pipelines/assets/wild_foods.py index 4ab7ece..0e48ef4 100644 --- a/pipelines/assets/wild_foods.py +++ b/pipelines/assets/wild_foods.py @@ -72,7 +72,7 @@ get_summary_bss_label_dataframe, ) from .fixtures import get_fixture_from_instances, import_fixture, validate_instances -from .livelihood_activity import get_instances_from_dataframe +from .livelihood_activity import get_annotated_instances_from_dataframe # set the default Django settings module os.environ.setdefault("DJANGO_SETTINGS_MODULE", "hea.settings.production") @@ -138,33 +138,31 @@ def summary_wild_foods_labels_dataframe( @asset(partitions_def=bss_instances_partitions_def, io_manager_key="json_io_manager") def wild_foods_instances( context: AssetExecutionContext, - wild_foods_dataframe, + wild_foods_dataframe: pd.DataFrame, + livelihood_summary_dataframe: pd.DataFrame, ) -> Output[dict]: """ LivelhoodStrategy and LivelihoodActivity instances extracted from the BSS. """ - partition_key = context.asset_partition_key_for_output() - livelihood_zone_baseline = LivelihoodZoneBaseline.objects.get_by_natural_key(*partition_key.split("~")[1:]) - if wild_foods_dataframe.empty: output = {} - output = get_instances_from_dataframe( + output = get_annotated_instances_from_dataframe( context, wild_foods_dataframe, - livelihood_zone_baseline, + livelihood_summary_dataframe, ActivityLabel.LivelihoodActivityType.WILD_FOODS, len(HEADER_ROWS), - partition_key, ) + return output @asset(partitions_def=bss_instances_partitions_def, io_manager_key="json_io_manager") def wild_foods_valid_instances( context: AssetExecutionContext, - wild_foods_instances, - wealth_characteristic_instances, + wild_foods_instances: dict, + wealth_characteristic_instances: dict, ) -> Output[dict]: """ Valid LivelhoodStrategy and LivelihoodActivity instances from a BSS, ready to be loaded via a Django fixture. @@ -194,7 +192,7 @@ def wild_foods_valid_instances( def wild_foods_fixture( context: AssetExecutionContext, config: BSSMetadataConfig, - wild_foods_valid_instances, + wild_foods_valid_instances: dict, ) -> Output[list[dict]]: """ Django fixture for the Livelihood Activities from a BSS. From ef8a7de072d288268dce81e049b534e5efaee534 Mon Sep 17 00:00:00 2001 From: Roger Hunwicks Date: Sat, 11 Oct 2025 10:38:37 -0400 Subject: [PATCH 13/15] Remove redundant imports - see HEA-572 --- pipelines/assets/other_cash_income.py | 1 - pipelines/assets/wild_foods.py | 1 - 2 files changed, 2 deletions(-) diff --git a/pipelines/assets/other_cash_income.py b/pipelines/assets/other_cash_income.py index a73a0ae..2792064 100644 --- a/pipelines/assets/other_cash_income.py +++ b/pipelines/assets/other_cash_income.py @@ -62,7 +62,6 @@ # Configure Django with our custom settings before importing any Django classes django.setup() -from baseline.models import LivelihoodZoneBaseline # NOQA: E402 from metadata.models import ActivityLabel # NOQA: E402 # Indexes of header rows in the Data3 dataframe (wealth_group_category, district, village) diff --git a/pipelines/assets/wild_foods.py b/pipelines/assets/wild_foods.py index 0e48ef4..e663c2e 100644 --- a/pipelines/assets/wild_foods.py +++ b/pipelines/assets/wild_foods.py @@ -80,7 +80,6 @@ # Configure Django with our custom settings before importing any Django classes django.setup() -from baseline.models import LivelihoodZoneBaseline # NOQA: E402 from metadata.models import ActivityLabel # NOQA: E402 # Indexes of header rows in the Data3 dataframe (wealth_group_category, district, village) From 6c2c85a2c30eac9250ac820dc8ef0d98b444235f Mon Sep 17 00:00:00 2001 From: Roger Hunwicks Date: Sat, 11 Oct 2025 11:40:54 -0400 Subject: [PATCH 14/15] Fix isort - see HEA-572 --- pipelines/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/__init__.py b/pipelines/__init__.py index dc6a54b..f70caec 100644 --- a/pipelines/__init__.py +++ b/pipelines/__init__.py @@ -25,8 +25,8 @@ livelihood_activity_label_dataframe, livelihood_activity_valid_instances, livelihood_summary_dataframe, - summary_livelihood_activity_labels_dataframe, livelihood_summary_label_dataframe, + summary_livelihood_activity_labels_dataframe, summary_livelihood_summary_labels_dataframe, ) from .assets.other_cash_income import ( From 635cc91ced71104ffb83c9f7ff05ece009a05d2a Mon Sep 17 00:00:00 2001 From: Roger Hunwicks Date: Sun, 12 Oct 2025 11:54:17 -0400 Subject: [PATCH 15/15] Remove dbtoyaml calls from 01-build-then-test - see HEA-760 --- .github/workflows/01-build-then-test.yml | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/.github/workflows/01-build-then-test.yml b/.github/workflows/01-build-then-test.yml index a517324..9efe314 100644 --- a/.github/workflows/01-build-then-test.yml +++ b/.github/workflows/01-build-then-test.yml @@ -271,9 +271,6 @@ jobs: docker cp ci-${APP}-${CI_PIPELINE_ID}-${{ github.job }}:/usr/src/app/log ./ || true docker cp ci-${APP}-${CI_PIPELINE_ID}-${{ github.job }}:/usr/src/app/coverage.txt ./ || true - # Save the database schema as an artifact - docker compose run --no-deps --rm --entrypoint dbtoyaml app --no-owner --no-privileges test_${PGDATABASE} > schema.yml - diff pyrseas/schema.yaml schema.yml > schema.diff || true - name: "Upload test artifacts" if: success() || failure() uses: actions/upload-artifact@v4 @@ -400,9 +397,6 @@ jobs: # Copy the artifacts out of the Docker container to project directory docker cp ci-${APP}-${CI_PIPELINE_ID}-${{ github.job }}:/usr/src/app/log ./ || true docker cp ci-${APP}-${CI_PIPELINE_ID}-${{ github.job }}:/usr/src/app/coverage.txt ./ || true - # Save the database schema as an artifact - docker compose run --no-deps --rm --entrypoint dbtoyaml app --no-owner --no-privileges test_${PGDATABASE} > schema.yml - diff pyrseas/schema.yaml schema.yml > schema.diff || true - name: "Upload test artifacts" if: success() || failure() uses: actions/upload-artifact@v4 @@ -530,9 +524,6 @@ jobs: # Copy the artifacts out of the Docker container to project directory docker cp ci-${APP}-${CI_PIPELINE_ID}-${{ github.job }}:/usr/src/app/log ./ || true docker cp ci-${APP}-${CI_PIPELINE_ID}-${{ github.job }}:/usr/src/app/coverage.txt ./ || true - # Save the database schema as an artifact - docker compose run --no-deps --rm --entrypoint dbtoyaml app --no-owner --no-privileges test_${PGDATABASE} > schema.yml - diff pyrseas/schema.yaml schema.yml > schema.diff || true - name: "Upload test artifacts" if: success() || failure() uses: actions/upload-artifact@v4 @@ -661,11 +652,6 @@ jobs: # Copy the artifacts out of the Docker container to project directory docker cp ci-${APP}-${CI_PIPELINE_ID}-${{ github.job }}:/usr/src/app/log ./ || true docker cp ci-${APP}-${CI_PIPELINE_ID}-${{ github.job }}:/usr/src/app/coverage.txt ./ || true - # The prod image does not include pyrseas/dbtoyaml. Building a test image to include that - docker compose build app - # Save the database schema as an artifact - docker compose run --no-deps --rm --entrypoint dbtoyaml app --no-owner --no-privileges test_${PGDATABASE} > schema.yml - diff pyrseas/schema.yaml schema.yml > schema.diff || true - name: "Upload test artifacts" if: success() || failure() uses: actions/upload-artifact@v4