From 19bc481d2608b64f7c9d773ff5255443d4a95d41 Mon Sep 17 00:00:00 2001
From: Roger Hunwicks <roger@tonic-solutions.com>
Date: Wed, 24 Sep 2025 14:02:25 -0400
Subject: [PATCH 01/15] +Ignore summary section on DRC Data worksheets - see
 HEA-740

---
 pipelines/assets/livelihood_activity.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pipelines/assets/livelihood_activity.py b/pipelines/assets/livelihood_activity.py
index ec46276..242b7da 100644
--- a/pipelines/assets/livelihood_activity.py
+++ b/pipelines/assets/livelihood_activity.py
@@ -116,6 +116,7 @@ def livelihood_activity_dataframe(config: BSSMetadataConfig, corrected_files) ->
             "Revenus moins dépenses",
             "Revenu moins dépense",
             "revenu moins dépenses",  # 2023 Mali BSSs
+            "revenu mois dépenses",  # 2024 DRC BSSs
         ],
         header_rows=HEADER_ROWS,
     )

From 759b9176991f7c858fd1decdede9f9d44e5b91f1 Mon Sep 17 00:00:00 2001
From: Roger Hunwicks <roger@tonic-solutions.com>
Date: Wed, 24 Sep 2025 21:48:10 -0400
Subject: [PATCH 02/15] Fix unicode output in JSON previews - see HEA-573

---
 pipelines/assets/baseline.py              | 4 ++--
 pipelines/assets/fixtures.py              | 8 ++++----
 pipelines/assets/livelihood_activity.py   | 6 ++++--
 pipelines/assets/other_cash_income.py     | 4 +++-
 pipelines/assets/wealth_characteristic.py | 6 ++++--
 pipelines/assets/wild_foods.py            | 4 +++-
 pipelines/resources.py                    | 2 +-
 7 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/pipelines/assets/baseline.py b/pipelines/assets/baseline.py
index c170f15..78eb47f 100644
--- a/pipelines/assets/baseline.py
+++ b/pipelines/assets/baseline.py
@@ -266,7 +266,7 @@ def baseline_instances(
     }
 
     try:
-        preview = json.dumps(result, indent=4)
+        preview = json.dumps(result, indent=4, ensure_ascii=False)
     except TypeError as e:
         raise ValueError("Cannot serialize Community fixture to JSON. Failing dict is\n %s" % result) from e
 
@@ -359,7 +359,7 @@ def community_instances(context: AssetExecutionContext, config: BSSMetadataConfi
     result = {"Community": community_df.to_dict(orient="records")}
 
     try:
-        preview = json.dumps(result, indent=4)
+        preview = json.dumps(result, indent=4, ensure_ascii=False)
     except TypeError as e:
         raise ValueError("Cannot serialize Community fixture to JSON. Failing dict is\n %s" % result) from e
 
diff --git a/pipelines/assets/fixtures.py b/pipelines/assets/fixtures.py
index 13be77b..5e0574c 100644
--- a/pipelines/assets/fixtures.py
+++ b/pipelines/assets/fixtures.py
@@ -220,7 +220,7 @@ def validate_instances(
 
     metadata = {f"num_{key.lower()}": len(value) for key, value in instances.items()}
     metadata["total_instances"] = sum(len(value) for value in instances.values())
-    metadata["preview"] = MetadataValue.md(f"```json\n{json.dumps(instances, indent=4)}\n```")
+    metadata["preview"] = MetadataValue.md(f"```json\n{json.dumps(instances, indent=4, ensure_ascii=False)}\n```")
     return instances, metadata
 
 
@@ -287,7 +287,7 @@ def get_fixture_from_instances(instance_dict: dict[str, list[dict]]) -> tuple[li
             metadata[f'num_{str(model._meta).split(".")[-1]}'] += 1
 
     metadata["total_instances"] = len(fixture)
-    metadata["preview"] = MetadataValue.md(f"```json\n{json.dumps(fixture, indent=4)}\n```")
+    metadata["preview"] = MetadataValue.md(f"```json\n{json.dumps(fixture, indent=4, ensure_ascii=False)}\n```")
     return fixture, metadata
 
 
@@ -300,7 +300,7 @@ def import_fixture(fixture: list[dict]) -> dict:
     # We need to use a .verbose_json file extension for Django to use the correct serializer.
     with tempfile.NamedTemporaryFile(mode="w+", suffix=".verbose_json") as f:
         # Write the fixture to a temporary file so that Django can access it
-        f.write(json.dumps(fixture))
+        f.write(json.dumps(fixture, indent=4, ensure_ascii=False))
         f.seek(0)
         call_command(verbose_load_data.Command(), f.name, verbosity=2, format="verbose_json", stdout=output_buffer)
 
@@ -309,7 +309,7 @@ def import_fixture(fixture: list[dict]) -> dict:
     for instance in fixture:
         metadata[f'num_{instance["model"].split(".")[-1]}'] += 1
     metadata["total_instances"] = len(fixture)
-    metadata["preview"] = MetadataValue.md(f"```json\n{json.dumps(fixture, indent=4)}\n```")
+    metadata["preview"] = MetadataValue.md(f"```json\n{json.dumps(fixture, indent=4, ensure_ascii=False)}\n```")
     metadata["output"] = MetadataValue.md(f"```\n{output_buffer.getvalue()}\n```")
     return metadata
 
diff --git a/pipelines/assets/livelihood_activity.py b/pipelines/assets/livelihood_activity.py
index 242b7da..b20d184 100644
--- a/pipelines/assets/livelihood_activity.py
+++ b/pipelines/assets/livelihood_activity.py
@@ -1059,7 +1059,7 @@ def get_instances_from_dataframe(
             )
             * 100
         ),
-        "preview": MetadataValue.md(f"```json\n{json.dumps(result, indent=4)}\n```"),
+        "preview": MetadataValue.md(f"```json\n{json.dumps(result, indent=4, ensure_ascii=False)}\n```"),
     }
     if not unrecognized_labels.empty:
         metadata["unrecognized_labels"] = MetadataValue.md(unrecognized_labels.to_markdown(index=False))
@@ -1114,7 +1114,9 @@ def livelihood_activity_valid_instances(
     valid_instances, metadata = validate_instances(context, livelihood_activity_instances, partition_key)
     metadata = {f"num_{key.lower()}": len(value) for key, value in valid_instances.items()}
     metadata["total_instances"] = sum(len(value) for value in valid_instances.values())
-    metadata["preview"] = MetadataValue.md(f"```json\n{json.dumps(valid_instances, indent=4)}\n```")
+    metadata["preview"] = MetadataValue.md(
+        f"```json\n{json.dumps(valid_instances, indent=4, ensure_ascii=False)}\n```"
+    )
     return Output(
         valid_instances,
         metadata=metadata,
diff --git a/pipelines/assets/other_cash_income.py b/pipelines/assets/other_cash_income.py
index b6d0c48..edf3957 100644
--- a/pipelines/assets/other_cash_income.py
+++ b/pipelines/assets/other_cash_income.py
@@ -171,7 +171,9 @@ def other_cash_income_valid_instances(
     valid_instances, metadata = validate_instances(context, other_cash_income_instances, partition_key)
     metadata = {f"num_{key.lower()}": len(value) for key, value in valid_instances.items()}
     metadata["total_instances"] = sum(len(value) for value in valid_instances.values())
-    metadata["preview"] = MetadataValue.md(f"```json\n{json.dumps(valid_instances, indent=4)}\n```")
+    metadata["preview"] = MetadataValue.md(
+        f"```json\n{json.dumps(valid_instances, indent=4, ensure_ascii=False)}\n```"
+    )
     return Output(
         valid_instances,
         metadata=metadata,
diff --git a/pipelines/assets/wealth_characteristic.py b/pipelines/assets/wealth_characteristic.py
index 5a0d12f..9641f5f 100644
--- a/pipelines/assets/wealth_characteristic.py
+++ b/pipelines/assets/wealth_characteristic.py
@@ -450,7 +450,7 @@ def wealth_characteristic_instances(
             )
             * 100
         ),
-        "preview": MetadataValue.md(f"```json\n{json.dumps(result, indent=4)}\n```"),
+        "preview": MetadataValue.md(f"```json\n{json.dumps(result, indent=4, ensure_ascii=False)}\n```"),
     }
     if not unrecognized_labels.empty:
         metadata["unrecognized_labels"] = MetadataValue.md(unrecognized_labels.to_markdown(index=False))
@@ -473,7 +473,9 @@ def wealth_characteristic_valid_instances(
     valid_instances, metadata = validate_instances(context, wealth_characteristic_instances, partition_key)
     metadata = {f"num_{key.lower()}": len(value) for key, value in valid_instances.items()}
     metadata["total_instances"] = sum(len(value) for value in valid_instances.values())
-    metadata["preview"] = MetadataValue.md(f"```json\n{json.dumps(valid_instances, indent=4)}\n```")
+    metadata["preview"] = MetadataValue.md(
+        f"```json\n{json.dumps(valid_instances, indent=4, ensure_ascii=False)}\n```"
+    )
     return Output(
         valid_instances,
         metadata=metadata,
diff --git a/pipelines/assets/wild_foods.py b/pipelines/assets/wild_foods.py
index c1f3a71..4ab7ece 100644
--- a/pipelines/assets/wild_foods.py
+++ b/pipelines/assets/wild_foods.py
@@ -181,7 +181,9 @@ def wild_foods_valid_instances(
     valid_instances, metadata = validate_instances(context, wild_foods_instances, partition_key)
     metadata = {f"num_{key.lower()}": len(value) for key, value in valid_instances.items()}
     metadata["total_instances"] = sum(len(value) for value in valid_instances.values())
-    metadata["preview"] = MetadataValue.md(f"```json\n{json.dumps(valid_instances, indent=4)}\n```")
+    metadata["preview"] = MetadataValue.md(
+        f"```json\n{json.dumps(valid_instances, indent=4, ensure_ascii=False)}\n```"
+    )
     return Output(
         valid_instances,
         metadata=metadata,
diff --git a/pipelines/resources.py b/pipelines/resources.py
index 36c3109..3260241 100644
--- a/pipelines/resources.py
+++ b/pipelines/resources.py
@@ -70,7 +70,7 @@ def dump_to_path(self, context: OutputContext, obj: Any, path: "UPath"):
             self.unlink(path)
 
         with path.open("w") as file:
-            file.write(json.dumps(obj, indent=4))
+            file.write(json.dumps(obj, indent=4, ensure_ascii=False))
 
     def load_from_path(self, context: InputContext, path: "UPath") -> Any:
         with path.open("r") as file:

From 81b8ebdde87d1d73d7b3819bcee3c45f678245e3 Mon Sep 17 00:00:00 2001
From: Roger Hunwicks <roger@tonic-solutions.com>
Date: Thu, 25 Sep 2025 12:21:17 -0400
Subject: [PATCH 03/15] Add income, expenditure and kcals totals to WealthGroup
 in wealth_characteristic_instances - see HEA-573

---
 pipelines/__init__.py                     |  2 +
 pipelines/assets/livelihood_activity.py   | 18 ++++++
 pipelines/assets/wealth_characteristic.py | 76 ++++++++++++++++++++---
 3 files changed, 88 insertions(+), 8 deletions(-)

diff --git a/pipelines/__init__.py b/pipelines/__init__.py
index 7c49004..3a36531 100644
--- a/pipelines/__init__.py
+++ b/pipelines/__init__.py
@@ -23,6 +23,7 @@
     livelihood_activity_instances,
     livelihood_activity_label_dataframe,
     livelihood_activity_valid_instances,
+    livelihood_summary_dataframe,
     summary_livelihood_activity_labels_dataframe,
 )
 from .assets.other_cash_income import (
@@ -82,6 +83,7 @@
         baseline_instances,
         community_instances,
         livelihood_activity_dataframe,
+        livelihood_summary_dataframe,
         livelihood_activity_label_dataframe,
         all_livelihood_activity_labels_dataframe,
         summary_livelihood_activity_labels_dataframe,
diff --git a/pipelines/assets/livelihood_activity.py b/pipelines/assets/livelihood_activity.py
index b20d184..5608288 100644
--- a/pipelines/assets/livelihood_activity.py
+++ b/pipelines/assets/livelihood_activity.py
@@ -122,6 +122,24 @@ def livelihood_activity_dataframe(config: BSSMetadataConfig, corrected_files) ->
     )
 
 
+@asset(partitions_def=bss_instances_partitions_def)
+def livelihood_summary_dataframe(config: BSSMetadataConfig, corrected_files) -> Output[pd.DataFrame]:
+    """
+    DataFrame of the Livelihood Activity Summary from a BSS
+
+    The summary is at the end of the Data worksheet, after the main Livelihood Activities.
+    It contains the total values for income, expenditure, kcals consumed, etc. for each Wealth Group.
+    """
+    return get_bss_dataframe(
+        config,
+        corrected_files,
+        "Data",
+        start_strings=["food summary: total (%)", "synthèse de nourriture : total (%)"],
+        end_strings=["wealth characteristics", "caractéristiques socio-économiques"],
+        header_rows=HEADER_ROWS,
+    )
+
+
 @asset(partitions_def=bss_instances_partitions_def)
 def livelihood_activity_label_dataframe(
     context: AssetExecutionContext,
diff --git a/pipelines/assets/wealth_characteristic.py b/pipelines/assets/wealth_characteristic.py
index 9641f5f..eec7314 100644
--- a/pipelines/assets/wealth_characteristic.py
+++ b/pipelines/assets/wealth_characteristic.py
@@ -172,6 +172,7 @@ def wealth_characteristic_instances(
     context: AssetExecutionContext,
     config: BSSMetadataConfig,
     wealth_characteristic_dataframe,
+    livelihood_summary_dataframe,
 ) -> Output[dict]:
     """
     WealthGroup and WealthGroupCharacteristicValue instances extracted from the BSS.
@@ -382,17 +383,29 @@ def wealth_characteristic_instances(
     wealth_group_df = wealth_group_df[wealth_group_df["wealth_group_category"].notnull()]
     # We also need to add an extra row for each Wealth Group Category with a null Community, to create the
     # Baseline Wealth Groups.
+    baseline_wealth_group_df = wealth_group_df[wealth_group_df["community"] == wealth_group_df.iloc[0]["community"]][
+        [
+            "wealth_group_category_original",
+            "wealth_group_category",
+            "livelihood_zone_baseline",
+        ]
+    ].reset_index(drop=True)
+    baseline_wealth_group_df["community"] = None
+    baseline_wealth_group_df["district"] = ""
+    baseline_wealth_group_df["name"] = ""
+    baseline_wealth_group_df["full_name"] = ""
+    baseline_wealth_group_df["natural_key"] = baseline_wealth_group_df["wealth_group_category"].apply(
+        lambda wealth_group_category: [
+            livelihood_zone_baseline.livelihood_zone_id,
+            livelihood_zone_baseline.reference_year_end_date.isoformat(),
+            wealth_group_category,
+            "",
+        ]
+    )
     wealth_group_df = pd.concat(
         [
             wealth_group_df,
-            wealth_group_df[wealth_group_df["community"] == wealth_group_df.iloc[0]["community"]][
-                [
-                    "wealth_group_category_original",
-                    "wealth_group_category",
-                    "livelihood_zone_baseline",
-                    "community",
-                ]
-            ].assign(community=None),
+            baseline_wealth_group_df,
         ]
     )
 
@@ -434,6 +447,53 @@ def wealth_characteristic_instances(
         wealth_group_df, extra_attributes_df, on=["full_name", "wealth_group_category"], how="left"
     )
 
+    # We also need total income, expenditure and kcals from the summary section on the Data worksheet
+    # First drop any rows that aren't the header rows except the totals. The totals rows are identified by
+    # having a label that starts with "Total" or "Synthèse"
+    summary_df = livelihood_summary_dataframe[
+        (livelihood_summary_dataframe.index.isin(HEADER_ROWS))
+        | (livelihood_summary_dataframe["A"].str.lower().str.startswith("total"))
+        | (livelihood_summary_dataframe["A"].str.lower().str.startswith("synthèse"))
+        | (livelihood_summary_dataframe["A"].str.lower().str.startswith("food summary"))
+        | (livelihood_summary_dataframe["A"].str.lower().str.startswith("income summary"))
+        | (livelihood_summary_dataframe["A"].str.lower().str.startswith("expenditure summary"))
+    ]
+    # Check we found the expected number of rows
+    if summary_df.shape[0] != 6:
+        raise ValueError(
+            f'Expected 6 rows in summary_df, but found {summary_df.shape[0]}: {", ".join(summary_df.iloc[:, 0].tolist())}'
+        )
+    # Rename the headings in column A for the totals rows
+    summary_df.iloc[3, 0] = "percentage_kcals"
+    summary_df.iloc[4, 0] = "income"
+    summary_df.iloc[5, 0] = "expenditure"
+
+    # Now transpose the dataframe and then join it to the wealth groups so we can access
+    # the real full_name and wealth_category
+    summary_df = pd.merge(
+        summary_df.set_index("A").transpose(),
+        get_wealth_group_dataframe(summary_df, livelihood_zone_baseline, "Data", partition_key).set_index(
+            "bss_column"
+        ),
+        left_index=True,
+        right_index=True,
+    )
+
+    # Add the summary attributes to the Wealth Groups
+    wealth_group_df = pd.merge(
+        wealth_group_df,
+        summary_df[["full_name", "wealth_group_category", "income", "expenditure", "percentage_kcals"]],
+        on=["full_name", "wealth_group_category"],
+        how="left",
+    )
+
+    # Calculate the kcals_consumed
+    # Derive it by multiplying percentage_kcals by:
+    #   2100 (kcals per person per day) * 365 (days per year) * average_household_size
+    wealth_group_df["kcals_consumed"] = (
+        wealth_group_df["percentage_kcals"] * 2100 * 365 * wealth_group_df["average_household_size"]
+    )
+
     result = {
         "WealthGroup": wealth_group_df.to_dict(orient="records"),
         "WealthGroupCharacteristicValue": wealth_group_characteristic_values,

From 9c2057f4ed46ad9782703698befb8bf47c7a85a7 Mon Sep 17 00:00:00 2001
From: Roger Hunwicks <roger@tonic-solutions.com>
Date: Thu, 25 Sep 2025 12:26:01 -0400
Subject: [PATCH 04/15] Fix bug in get_wealth_group_dataframe for WB - see
 HEA-573
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Francophone BSS have `à` in row 4 in the WB worksheet in the summary columns
(`synthèse | de | à`). And `a` is an alias for the B/O Wealth Group Category (from
Aisé). So the previous code was matching that and thinking the data was for B/O
instead of part of the WB summary.
---
 pipelines/assets/baseline.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/pipelines/assets/baseline.py b/pipelines/assets/baseline.py
index 78eb47f..67ebb1a 100644
--- a/pipelines/assets/baseline.py
+++ b/pipelines/assets/baseline.py
@@ -78,17 +78,15 @@ def get_wealth_group_dataframe(
         # In the Summary columns in the Data, Data2, Data3 worksheets, the Wealth
         # Group Category is in Row 4 (District)rather than Row 3 (Wealth Group Category)
         # so do a second lookup to update the blank rows.
-        # If this doesn't find any new values, then it's because in a WB worksheet
-        # there are no extra Wealth Group Categories on Row 4
-        try:
+        # Note that in a WB worksheet there are no extra Wealth Group Categories on Row 4
+        if worksheet_name != "WB":
             wealth_group_df = wealthgroupcategorylookup.do_lookup(
                 wealth_group_df, "district", "wealth_group_category", update=True
             )
             # Remove the duplicate wealth_group_category_original column created by the second do_lookup(),
             # which otherwise causes problems when trying to merge dataframes, e.g. when building the wealth_group_df.
             wealth_group_df = wealth_group_df.loc[:, ~wealth_group_df.columns.duplicated()]
-        except ValueError:
-            pass
+
         # Check if there are unrecognized wealth group categories and report
         wealth_group_missing_category_df = wealth_group_df[
             wealth_group_df["wealth_group_category"].isnull()

From 13b8755206bd5976415640dc00bb2fdb22bbe01f Mon Sep 17 00:00:00 2001
From: Roger Hunwicks <rhunwicks@air.org>
Date: Sat, 4 Oct 2025 17:06:40 -0500
Subject: [PATCH 05/15] Remove amd64 platform requirement - see HEA-760

---
 docker/app/Dockerfile | 2 +-
 docker/db/Dockerfile  | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/docker/app/Dockerfile b/docker/app/Dockerfile
index cf47a02..61a383b 100644
--- a/docker/app/Dockerfile
+++ b/docker/app/Dockerfile
@@ -1,4 +1,4 @@
-FROM --platform=linux/amd64 python:3.12-bookworm as base
+FROM python:3.12-bookworm as base
 
 # set up apt repositories for postgres installation
 RUN curl -s https://www.postgresql.org/media/keys/ACCC4CF8.asc | gpg --dearmor | tee /usr/share/keyrings/pgdg.gpg >/dev/null && \
diff --git a/docker/db/Dockerfile b/docker/db/Dockerfile
index dbccb48..ea0a4f0 100644
--- a/docker/db/Dockerfile
+++ b/docker/db/Dockerfile
@@ -1,3 +1,5 @@
-FROM --platform=linux/amd64 postgis/postgis:17-3.5
+# Use a third party multicarch base image for compatibility with both ARM and AMD architectures
+# until PostGIS fix https://github.com/postgis/docker-postgis/issues/216
+FROM ghcr.io/baosystems/postgis:17-3.5
 
 COPY create_db.sh /docker-entrypoint-initdb.d/create_db.sh

From d747ea102872c46986bba9350f9640b1d31477b3 Mon Sep 17 00:00:00 2001
From: Roger Hunwicks <rhunwicks@air.org>
Date: Sat, 4 Oct 2025 17:07:20 -0500
Subject: [PATCH 06/15] Fix create_db.sh - see HEA-760

---
 docker/db/create_db.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docker/db/create_db.sh b/docker/db/create_db.sh
index fd78e59..08ac769 100755
--- a/docker/db/create_db.sh
+++ b/docker/db/create_db.sh
@@ -1,5 +1,5 @@
 #!/bin/sh
-psql --set=CLIENT=$CLIENT --set=APP=$APP --set=ENV=$ENV --set=POSTGRES_PASSWORD=$POSTGRES_PASSWORD --set=DAGSTER_PASSWORD=$DAGSTER_PASSWORD --set=CREATE_TEMPLATE=${CREATE_TEMPLATE:-false} -d postgres --echo-all << EOF
+psql --set=CLIENT=$CLIENT --set=APP=$APP --set=ENV=$ENV --set=POSTGRES_PASSWORD=$POSTGRES_PASSWORD --set=CREATE_TEMPLATE=${CREATE_TEMPLATE:-false} -d postgres --echo-all << EOF
 
 \set DATABASE :CLIENT :APP :ENV
 \set OWNER :CLIENT :APP :ENV
@@ -74,7 +74,7 @@ ALTER DEFAULT PRIVILEGES IN SCHEMA :SCHEMA
   GRANT SELECT ON TABLES TO :OWNER;
 
 \set DAGSTER :CLIENT :APP :ENV
-CREATE ROLE :DAGSTER PASSWORD :'DAGSTER_PASSWORD' NOLOGIN CREATEDB NOCREATEROLE NOSUPERUSER;
+CREATE ROLE :DAGSTER PASSWORD :'POSTGRES_PASSWORD' NOLOGIN CREATEDB NOCREATEROLE NOSUPERUSER;
 COMMENT ON ROLE :DAGSTER IS 'Main Dagster pipeline user for :CLIENT :PRJ :ENV';
 GRANT :DAGSTER TO :OWNER;
 GRANT CONNECT, TEMPORARY, CREATE ON DATABASE :DATABASE TO :DAGSTER;

From ec1fe099f09af2a3ce4b411dabf5ead200f5fa42 Mon Sep 17 00:00:00 2001
From: Roger Hunwicks <rhunwicks@air.org>
Date: Sat, 4 Oct 2025 17:09:56 -0500
Subject: [PATCH 07/15] Remove Pyrseas - see HEA-370

We don't use Pyrseas at all these days -
it is a legacy from before Django had full-featured
SQL migrations.
---
 requirements/test.txt | 2 --
 1 file changed, 2 deletions(-)

diff --git a/requirements/test.txt b/requirements/test.txt
index 692ce0a..5a10e48 100644
--- a/requirements/test.txt
+++ b/requirements/test.txt
@@ -1,6 +1,4 @@
 -r base.txt
 beautifulsoup4==4.12.2
 coverage==7.2.7
-# Pyrseas==0.10.0 raises "KeyError: ('public', 'spatial_ref_sys')", --schema/--exclude-schema don't fix it.
-Pyrseas==0.9.1
 safety==3.6.1

From c2d658bbe18b4c604e2b2e727c049dbe1a4505c1 Mon Sep 17 00:00:00 2001
From: Roger Hunwicks <rhunwicks@air.org>
Date: Sat, 4 Oct 2025 17:11:56 -0500
Subject: [PATCH 08/15] Remove redundant environment variables - see HEA-370

---
 docker-compose.yml | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/docker-compose.yml b/docker-compose.yml
index e75dd65..bc76f3e 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -110,14 +110,7 @@ services:
       MINIO_ENDPOINT_URL: http://minio:9000
       SUPPORT_EMAIL_ADDRESS: ${SUPPORT_EMAIL_ADDRESS}
       DJANGO_MIGRATE: 1
-      KILUIGI_INTERMEDIATETARGET_BACKEND_CLASS: ${KILUIGI_INTERMEDIATETARGET_BACKEND_CLASS}
-      KILUIGI_INTERMEDIATETARGET_ROOT_PATH: ${KILUIGI_INTERMEDIATETARGET_ROOT_PATH}
-      KILUIGI_FINALTARGET_BACKEND_CLASS: ${KILUIGI_FINALTARGET_BACKEND_CLASS}
-      KILUIGI_FINALTARGET_ROOT_PATH: ${KILUIGI_FINALTARGET_ROOT_PATH}
-      KILUIGI_REPORTTARGET_BACKEND_CLASS: ${KILUIGI_REPORTTARGET_BACKEND_CLASS}
-      KILUIGI_REPORTTARGET_ROOT_PATH: ${KILUIGI_REPORTTARGET_ROOT_PATH}      
       GOOGLE_APPLICATION_CREDENTIALS: ${GOOGLE_APPLICATION_CREDENTIALS}
-      GOOGLE_ADMIN_EMAIL: ${GOOGLE_ADMIN_EMAIL}
     command:
       - --timeout=3600
       - --workers=12

From fe8783579292155e0f27eaf35bbd2f62656231f1 Mon Sep 17 00:00:00 2001
From: Roger Hunwicks <rhunwicks@air.org>
Date: Sat, 4 Oct 2025 17:50:13 -0500
Subject: [PATCH 09/15] Enable remote debugging using VSCode - see HEA-760

This changes allows setting LAUNCHER in .env
and then using VSCode to attach to the Python
process running inside the Docker container(s)
---
 README.md                           | 50 +++++++++++++++++++++++++++++
 docker-compose.override.yml         | 13 ++++++++
 docker/app/run_dagster_daemon.sh    |  7 ++--
 docker/app/run_dagster_webserver.sh |  7 ++--
 docker/app/run_django.sh            |  5 ++-
 env.example                         |  5 +++
 requirements/local.txt              |  1 +
 7 files changed, 83 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 7d78bda..2f29e30 100644
--- a/README.md
+++ b/README.md
@@ -23,3 +23,53 @@ baseline
 
 This produces a .puml file that can be rendered using a PlantUML
 server, either within your IDE or using a service like http://www.plantuml.com/.
+
+## Debugging inside Docker Containers
+
+The `LAUNCHER` environment sets a wrapper program around the Python process
+(`gunicorn`, `dagster-daemon`, `dagster-webserver`). This can be used to
+enable a debugger inside Docker Containers:
+
+1. Set `LAUNCHER="python3 -m debugpy --listen 0.0.0.0:5678"` in `.env`
+2. Create Launch Configurations in Visual Studio Code like:
+
+```json
+			{
+				"name": "Python: Attach to app (Docker Container)",
+				"type": "debugpy",
+				"request": "attach",
+				"connect": {
+					"host": "localhost",
+					"port": 5678
+				},
+				"pathMappings": [
+					{
+						"localRoot": "${workspaceFolder:hea}",
+						"remoteRoot": "/usr/src/app"
+					}
+				],
+				"django": true,
+				"justMyCode": false
+			},
+			{
+				"name": "Python: Attach to dagster-daemon (Docker Container)",
+				"type": "debugpy",
+				"request": "attach",
+				"connect": {
+					"host": "localhost",
+					"port": 5680
+				},
+				"pathMappings": [
+					{
+						"localRoot": "${workspaceFolder:hea}",
+						"remoteRoot": "/usr/src/app"
+					}
+				],
+				"django": true,
+				"justMyCode": false
+			}
+```
+
+3. Start the Docker containers as normal, and then use the Run and Debug
+pane in Visual Studio code to launch the configuration that attaches to
+the desired server.
\ No newline at end of file
diff --git a/docker-compose.override.yml b/docker-compose.override.yml
index a1ffcc6..dd8e618 100644
--- a/docker-compose.override.yml
+++ b/docker-compose.override.yml
@@ -15,6 +15,7 @@ services:
     build:
       target: dev
     ports:
+      - "5678:5678"
       - "8000:8000"
       - "8888:8888"
     volumes:
@@ -30,6 +31,9 @@ services:
       - ./env.example:/usr/src/app/.env
     environment:
       DJANGO_SETTINGS_MODULE: hea.settings.local
+      LAUNCHER: ${LAUNCHER}  # e.g. "debugpy" or "ddtrace"
+      # Disable frozen modules warning
+      PYDEVD_DISABLE_FILE_VALIDATION: 1
       # Put .coverage in a writable directory
       COVERAGE_FILE: log/.coverage
     command:
@@ -41,6 +45,7 @@ services:
     restart: no
     ports:
       - "3000:3000"
+      - "5679:5678"
     volumes:
       - ./:/usr/src/app
       # Separate volumes for writable directories inside the container
@@ -54,9 +59,14 @@ services:
       - ./env.example:/usr/src/app/.env
     environment:
       DJANGO_SETTINGS_MODULE: hea.settings.local
+      LAUNCHER: ${LAUNCHER}  # e.g. "debugpy" or "ddtrace"
+      # Disable frozen modules warning
+      PYDEVD_DISABLE_FILE_VALIDATION: 1
 
   dagster_daemon:
     restart: no
+    ports:
+      - "5680:5678"
     volumes:
       - ./:/usr/src/app
       # Separate volumes for writable directories inside the container
@@ -70,4 +80,7 @@ services:
       - ./env.example:/usr/src/app/.env
     environment:
       DJANGO_SETTINGS_MODULE: hea.settings.local
+      LAUNCHER: ${LAUNCHER}  # e.g. "debugpy" or "ddtrace"
+      # Disable frozen modules warning
+      PYDEVD_DISABLE_FILE_VALIDATION: 1
 
diff --git a/docker/app/run_dagster_daemon.sh b/docker/app/run_dagster_daemon.sh
index 8beac63..87c46b3 100755
--- a/docker/app/run_dagster_daemon.sh
+++ b/docker/app/run_dagster_daemon.sh
@@ -13,5 +13,8 @@ echo Setting up logs
 touch log/django.log
 chown -R django:django log/*
 
-echo Starting Dagster
-gosu django dagster-daemon run $*
\ No newline at end of file
+echo Starting Dagster Daemon
+if [ x"$LAUNCHER" != x"" ]; then
+    echo using ${LAUNCHER}
+fi
+gosu django ${LAUNCHER} /usr/local/bin/dagster-daemon run $*
\ No newline at end of file
diff --git a/docker/app/run_dagster_webserver.sh b/docker/app/run_dagster_webserver.sh
index e6b55a1..5e27a35 100755
--- a/docker/app/run_dagster_webserver.sh
+++ b/docker/app/run_dagster_webserver.sh
@@ -13,5 +13,8 @@ echo Setting up logs
 touch log/django.log
 chown -R django:django log/*
 
-echo Starting Dagster
-gosu django dagster-webserver -h 0.0.0.0 -p 3000 -m pipelines --path-prefix /${DAGSTER_WEBSERVER_PREFIX} $*
\ No newline at end of file
+echo Starting Dagster Webserver
+if [ x"$LAUNCHER" != x"" ]; then
+    echo using ${LAUNCHER}
+fi
+gosu django ${LAUNCHER} /usr/local/bin/dagster-webserver -h 0.0.0.0 -p 3000 -m pipelines --path-prefix /${DAGSTER_WEBSERVER_PREFIX} $*
\ No newline at end of file
diff --git a/docker/app/run_django.sh b/docker/app/run_django.sh
index 73112fa..e6f2b48 100755
--- a/docker/app/run_django.sh
+++ b/docker/app/run_django.sh
@@ -40,7 +40,10 @@ touch log/django_sql.log
 chown -R django:django log/*
 
 echo Starting Gunicorn with DJANGO_SETTINGS_MODULE=${DJANGO_SETTINGS_MODULE}
-gosu django  gunicorn ${APP}.wsgi:application \
+if [ x"$LAUNCHER" != x"" ]; then
+    echo using ${LAUNCHER}
+fi
+gosu django ${LAUNCHER} /usr/local/bin/gunicorn ${APP}.wsgi:application \
     --name ${APP}${ENV} \
     --config $(dirname $(readlink -f "$0"))/gunicorn_config.py \
     $* 2>&1
diff --git a/env.example b/env.example
index 680e231..3c4775f 100644
--- a/env.example
+++ b/env.example
@@ -48,3 +48,8 @@ BSS_METADATA_WORKBOOK='gdrive://Database Design/BSS Metadata'  # 15XVXFjbom1sScV
 BSS_METADATA_STORAGE_OPTIONS='{"token": "service_account", "access": "read_only", "creds": ${GOOGLE_APPLICATION_CREDENTIALS}, "root_file_id": "0AOJ0gJ8sjnO7Uk9PVA"}'
 BSS_FILES_FOLDER='gdrive://Discovery Folder/Baseline Storage Sheets (BSS)'
 BSS_FILES_STORAGE_OPTIONS='{"token": "service_account", "access": "read_only", "creds": ${GOOGLE_APPLICATION_CREDENTIALS}, "root_file_id": "0AOJ0gJ8sjnO7Uk9PVA"}'
+
+# LAUNCHER can be used to configure a wrapper program around the Python process
+# For example, to add ddtrace or debugpy
+# Use the VSCode debugger as a launcher
+# LAUNCHER = "python3 -m debugpy --listen 0.0.0.0:5679"
\ No newline at end of file
diff --git a/requirements/local.txt b/requirements/local.txt
index 4e95b36..8cd2465 100644
--- a/requirements/local.txt
+++ b/requirements/local.txt
@@ -1,2 +1,3 @@
 -r test.txt
 -r lint.txt
+debugpy
\ No newline at end of file

From 8c805dd7806672c8a78714f1ca5d6eb2201a689c Mon Sep 17 00:00:00 2001
From: Roger Hunwicks <rhunwicks@air.org>
Date: Fri, 10 Oct 2025 13:05:15 -0400
Subject: [PATCH 10/15] Allow LivelihoodSummary labels in ActivityLabel - see
 HEA-572

---
 ...er_activitylabel_activity_type_and_more.py | 71 +++++++++++++++++++
 apps/metadata/models.py                       | 17 +++--
 pipelines/jobs/metadata.py                    | 10 +--
 3 files changed, 88 insertions(+), 10 deletions(-)
 create mode 100644 apps/metadata/migrations/0012_alter_activitylabel_activity_type_and_more.py

diff --git a/apps/metadata/migrations/0012_alter_activitylabel_activity_type_and_more.py b/apps/metadata/migrations/0012_alter_activitylabel_activity_type_and_more.py
new file mode 100644
index 0000000..40ca675
--- /dev/null
+++ b/apps/metadata/migrations/0012_alter_activitylabel_activity_type_and_more.py
@@ -0,0 +1,71 @@
+# Generated by Django 5.2.6 on 2025-10-08 22:47
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("metadata", "0011_alter_activitylabel_additional_identifier"),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name="activitylabel",
+            name="activity_type",
+            field=models.CharField(
+                choices=[
+                    ("LivelihoodActivity", "Livelihood Activity"),
+                    ("OtherCashIncome", "Other Cash Income"),
+                    ("WildFoods", "Wild Foods, Fishing or Hunting"),
+                    ("LivelihoodSummary", "Livelihood Summary"),
+                ],
+                default="LivelihoodActivity",
+                help_text="The type of Livelihood Activity the label is for: either a general Livelihood Activity, or an Other Cash Income activity from the 'Data2' worksheet, or a Wild Foods, Fishing or Hunting activity from the 'Data3' worksheet, or a label from the 'Summary' section of the 'Data' worksheet.",
+                max_length=20,
+                verbose_name="Activity Type",
+            ),
+        ),
+        migrations.AlterField(
+            model_name="activitylabel",
+            name="status",
+            field=models.CharField(
+                blank=True,
+                choices=[
+                    ("Regular Expression", "Processed by Regular Expression"),
+                    ("Override", "Override automatically recognized metadata"),
+                    ("Discussion", "Under Discussion"),
+                    ("Correct BSS", "Correct the BSS"),
+                    ("Ignore", "Ignore this label and associated data in the row"),
+                ],
+                max_length=20,
+                verbose_name="Status",
+            ),
+        ),
+        migrations.AlterField(
+            model_name="activitylabel",
+            name="strategy_type",
+            field=models.CharField(
+                blank=True,
+                choices=[
+                    ("MilkProduction", "Milk Production"),
+                    ("ButterProduction", "Butter Production"),
+                    ("MeatProduction", "Meat Production"),
+                    ("LivestockSale", "Livestock Sale"),
+                    ("CropProduction", "Crop Production"),
+                    ("FoodPurchase", "Food Purchase"),
+                    ("PaymentInKind", "Payment in Kind"),
+                    ("ReliefGiftOther", "Relief, Gift or Other Food"),
+                    ("Hunting", "Hunting"),
+                    ("Fishing", "Fishing"),
+                    ("WildFoodGathering", "Wild Food Gathering"),
+                    ("OtherCashIncome", "Other Cash Income"),
+                    ("OtherPurchase", "Other Purchase"),
+                    ("LivestockProduction", "Livestock Production"),
+                ],
+                help_text="The type of livelihood strategy, such as crop production, or wild food gathering.",
+                max_length=30,
+                verbose_name="Strategy Type",
+            ),
+        ),
+    ]
diff --git a/apps/metadata/models.py b/apps/metadata/models.py
index e8bb949..bf0d939 100644
--- a/apps/metadata/models.py
+++ b/apps/metadata/models.py
@@ -393,11 +393,15 @@ class LabelStatus(models.TextChoices):
         OVERRIDE = "Override", _("Override automatically recognized metadata")
         DISCUSSION = "Discussion", _("Under Discussion")
         CORRECT_BSS = "Correct BSS", _("Correct the BSS")
+        IGNORE = "Ignore", _("Ignore this label and associated data in the row")
 
     class LivelihoodActivityType(models.TextChoices):
         LIVELIHOOD_ACTIVITY = "LivelihoodActivity", _("Livelihood Activity")  # Labels from the 'Data' worksheet
         OTHER_CASH_INCOME = "OtherCashIncome", _("Other Cash Income")  # Labels from the 'Data2' worksheet
-        WILD_FOODS = "WildFoods", _("Wild Foods")  # Labels from the 'Data3' worksheet
+        WILD_FOODS = "WildFoods", _("Wild Foods, Fishing or Hunting")  # Labels from the 'Data3' worksheet
+        LIVELIHOOD_SUMMARY = "LivelihoodSummary", _(
+            "Livelihood Summary"
+        )  # Labels from the 'Summary' section of the 'Data' worksheet
 
     activity_label = common_models.NameField(max_length=200, verbose_name=_("Activity Label"))
     activity_type = models.CharField(
@@ -406,9 +410,9 @@ class LivelihoodActivityType(models.TextChoices):
         choices=LivelihoodActivityType.choices,
         default=LivelihoodActivityType.LIVELIHOOD_ACTIVITY,
         help_text=_(
-            "The type of Livelihood Activity, either a general Livelihood Activity, or an Other Cash Income "
-            "activity from the 'Data2' worksheet, or a Wild Foods, Fishing or Hunting activity from the "
-            "'Data3' worksheet."
+            "The type of Livelihood Activity the label is for: either a general Livelihood Activity, or an Other Cash "
+            "Income activity from the 'Data2' worksheet, or a Wild Foods, Fishing or Hunting activity from the "
+            "'Data3' worksheet, or a label from the 'Summary' section of the 'Data' worksheet."
         ),
     )
     status = models.CharField(blank=True, max_length=20, choices=LabelStatus.choices, verbose_name=_("Status"))
@@ -420,7 +424,10 @@ class LivelihoodActivityType(models.TextChoices):
     strategy_type = models.CharField(
         max_length=30,
         blank=True,
-        choices=LivelihoodStrategyType.choices,
+        # We add an additional choice for LivestockProduction here, which is only valid when
+        # activity_type is LivelihoodSummary. LivestockProduction is the total of MeatProduction,
+        # MilkProduction and ButterProduction, and is used in the Summary section of the Data worksheet only
+        choices=LivelihoodStrategyType.choices + [("LivestockProduction", _("Livestock Production"))],  # type: ignore
         verbose_name=_("Strategy Type"),
         help_text=_("The type of livelihood strategy, such as crop production, or wild food gathering."),
     )
diff --git a/pipelines/jobs/metadata.py b/pipelines/jobs/metadata.py
index ee1b681..089428f 100644
--- a/pipelines/jobs/metadata.py
+++ b/pipelines/jobs/metadata.py
@@ -35,7 +35,7 @@
 from metadata.models import ActivityLabel  # NOQA: E402
 
 
-def load_metadata_for_model(context: OpExecutionContext, model: models.Model, df: pd.DataFrame):
+def load_metadata_for_model(context: OpExecutionContext, sheet_name: str, model: models.Model, df: pd.DataFrame):
     """
     Load the metadata from a single worksheet, passed as a DataFrame, into a Django model.
     """
@@ -112,7 +112,7 @@ def load_metadata_for_model(context: OpExecutionContext, model: models.Model, df
             existing_instances.values(),
             fields=record.keys(),
         )
-        context.log.info(f"Updated {num_instances} {model_name} instances")
+        context.log.info(f"Updated {num_instances} {sheet_name} instances")
 
     else:
         if model_name == "SourceOrganization":
@@ -140,7 +140,7 @@ def load_metadata_for_model(context: OpExecutionContext, model: models.Model, df
             update_fields=[k for k in record if k not in id_fields],
             unique_fields=id_fields,
         )
-        context.log.info(f"Created or updated {len(instances)} {model_name} instances")
+        context.log.info(f"Created or updated {len(instances)} {sheet_name} instances")
 
 
 @op
@@ -164,7 +164,7 @@ def load_all_metadata(context: OpExecutionContext, config: ReferenceDataConfig):
             # Iterate over the sheets in the ReferenceData workbook, in reverse order (because the Label sheets that
             # need Subject Matter Expert input are at beginning, and depend on the sheets at the end).
             for sheet_name in reversed(sheet_names):
-                if sheet_name in ["ActivityLabel", "OtherCashIncomeLabel", "WildFoodsLabel"]:
+                if sheet_name in ["ActivityLabel", "OtherCashIncomeLabel", "WildFoodsLabel", "SummaryLabel"]:
                     model = ActivityLabel
                 else:
                     # Check whether the ReferenceData worksheet matches a Django model.
@@ -179,7 +179,7 @@ def load_all_metadata(context: OpExecutionContext, config: ReferenceDataConfig):
                     # If we found a model, then update the model from the contents of the Reference Data worksheet
                     df = pd.read_excel(f, sheet_name).fillna("")
                     try:
-                        load_metadata_for_model(context, model, df)
+                        load_metadata_for_model(context, sheet_name, model, df)
                     except Exception as e:
                         raise RuntimeError("Failed to create/update %s" % sheet_name) from e
 

From bf70f686767d81359c285a78a8568a38df631bb2 Mon Sep 17 00:00:00 2001
From: Roger Hunwicks <rhunwicks@air.org>
Date: Fri, 10 Oct 2025 13:05:41 -0400
Subject: [PATCH 11/15] Keep row order in dataframe samples - see HEA-572

---
 pipelines/assets/base.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/pipelines/assets/base.py b/pipelines/assets/base.py
index 23760a1..faa1eb1 100644
--- a/pipelines/assets/base.py
+++ b/pipelines/assets/base.py
@@ -417,7 +417,7 @@ def get_bss_dataframe(
                 df.loc[:, "B":].apply(lambda row: sum((row != 0) & (row != "")), axis="columns").sum()
             ),
             "preview": MetadataValue.md(df.head(config.preview_rows).to_markdown()),
-            "sample": MetadataValue.md(sample_df.sample(sample_rows).to_markdown()),
+            "sample": MetadataValue.md(sample_df.sample(sample_rows).sort_index().to_markdown()),
         },
     )
 
@@ -477,7 +477,7 @@ def get_bss_label_dataframe(
             "num_summaries": int(label_df["in_summary"].sum()),
             # Escape the ~ in the partition_key, otherwise it is rendered as strikethrough
             "preview": MetadataValue.md(label_df.head(config.preview_rows).to_markdown().replace("~", "\\~")),
-            "sample": MetadataValue.md(sample_df.sample(sample_rows).to_markdown().replace("~", "\\~")),
+            "sample": MetadataValue.md(sample_df.sample(sample_rows).sort_index().to_markdown().replace("~", "\\~")),
         },
     )
 
@@ -498,7 +498,7 @@ def get_all_bss_labels_dataframe(
             # Escape the ~ in the partition_key, otherwise it is rendered as strikethrough
             "preview": MetadataValue.md(df.head(config.preview_rows).to_markdown().replace("~", "\\~")),
             "sample": MetadataValue.md(
-                df[df["in_summary"]].sample(config.preview_rows).to_markdown().replace("~", "\\~")
+                df[df["in_summary"]].sample(config.preview_rows).sort_index().to_markdown().replace("~", "\\~")
             ),
         },
     )
@@ -587,7 +587,8 @@ def translate_label(label, langs):
     label_metadata_df = pd.DataFrame.from_records(queryset)
 
     # Merge the label metadata into the dataframe
-    df = df.merge(label_metadata_df, left_on="label", right_on="label", how="left")
+    if not label_metadata_df.empty:
+        df = df.merge(label_metadata_df, left_on="label", right_on="label", how="left")
 
     # Rename the columns to match what we need in the GSheet when we run jobs.metadata.load_all_metadata
     df = df.rename(

From b8b41b0466b276f9e29a64557081b8fa5d1185a1 Mon Sep 17 00:00:00 2001
From: Roger Hunwicks <rhunwicks@air.org>
Date: Fri, 10 Oct 2025 20:31:04 -0400
Subject: [PATCH 12/15] Add pct_income_recognized, etc to Instances metadata -
 see HEA-572

---
 pipelines/__init__.py                   |   8 +-
 pipelines/assets/livelihood_activity.py | 316 +++++++++++++++++++++---
 pipelines/assets/other_cash_income.py   |  20 +-
 pipelines/assets/wild_foods.py          |  20 +-
 4 files changed, 302 insertions(+), 62 deletions(-)

diff --git a/pipelines/__init__.py b/pipelines/__init__.py
index 3a36531..dc6a54b 100644
--- a/pipelines/__init__.py
+++ b/pipelines/__init__.py
@@ -17,6 +17,7 @@
 )
 from .assets.livelihood_activity import (
     all_livelihood_activity_labels_dataframe,
+    all_livelihood_summary_labels_dataframe,
     imported_livelihood_activities,
     livelihood_activity_dataframe,
     livelihood_activity_fixture,
@@ -25,6 +26,8 @@
     livelihood_activity_valid_instances,
     livelihood_summary_dataframe,
     summary_livelihood_activity_labels_dataframe,
+    livelihood_summary_label_dataframe,
+    summary_livelihood_summary_labels_dataframe,
 )
 from .assets.other_cash_income import (
     all_other_cash_income_labels_dataframe,
@@ -83,10 +86,13 @@
         baseline_instances,
         community_instances,
         livelihood_activity_dataframe,
-        livelihood_summary_dataframe,
         livelihood_activity_label_dataframe,
         all_livelihood_activity_labels_dataframe,
         summary_livelihood_activity_labels_dataframe,
+        livelihood_summary_dataframe,
+        livelihood_summary_label_dataframe,
+        all_livelihood_summary_labels_dataframe,
+        summary_livelihood_summary_labels_dataframe,
         livelihood_activity_instances,
         livelihood_activity_valid_instances,
         livelihood_activity_fixture,
diff --git a/pipelines/assets/livelihood_activity.py b/pipelines/assets/livelihood_activity.py
index fdd2b5b..7878d2a 100644
--- a/pipelines/assets/livelihood_activity.py
+++ b/pipelines/assets/livelihood_activity.py
@@ -78,6 +78,7 @@
 django.setup()
 
 from baseline.models import (  # NOQA: E402
+    LivelihoodActivity,
     LivelihoodStrategy,
     LivelihoodZoneBaseline,
     MilkProduction,
@@ -98,6 +99,7 @@
     ActivityLabel.LivelihoodActivityType.LIVELIHOOD_ACTIVITY: "Data",
     ActivityLabel.LivelihoodActivityType.OTHER_CASH_INCOME: "Data2",
     ActivityLabel.LivelihoodActivityType.WILD_FOODS: "Data3",
+    ActivityLabel.LivelihoodActivityType.LIVELIHOOD_SUMMARY: "Data",
 }
 
 
@@ -122,13 +124,49 @@ def livelihood_activity_dataframe(config: BSSMetadataConfig, corrected_files) ->
     )
 
 
+@asset(partitions_def=bss_instances_partitions_def)
+def livelihood_activity_label_dataframe(
+    context: AssetExecutionContext,
+    config: BSSMetadataConfig,
+    livelihood_activity_dataframe,
+) -> Output[pd.DataFrame]:
+    """
+    Dataframe of Livelihood Activity Label References for a single BSS.
+    """
+    return get_bss_label_dataframe(
+        context, config, livelihood_activity_dataframe, "livelihood_activity_dataframe", len(HEADER_ROWS)
+    )
+
+
+@asset(io_manager_key="dataframe_csv_io_manager")
+def all_livelihood_activity_labels_dataframe(
+    config: BSSMetadataConfig, livelihood_activity_label_dataframe: dict[str, pd.DataFrame]
+) -> Output[pd.DataFrame]:
+    """
+    Combined dataframe of the Livelihood Activity labels in use across all BSSs.
+    """
+    return get_all_bss_labels_dataframe(config, livelihood_activity_label_dataframe)
+
+
+@asset(io_manager_key="dataframe_csv_io_manager")
+def summary_livelihood_activity_labels_dataframe(
+    config: BSSMetadataConfig, all_livelihood_activity_labels_dataframe: pd.DataFrame
+) -> Output[pd.DataFrame]:
+    """
+    Summary of the Livelihood Activity labels in use across all BSSs.
+    """
+    return get_summary_bss_label_dataframe(
+        config, all_livelihood_activity_labels_dataframe, ActivityLabel.LivelihoodActivityType.LIVELIHOOD_ACTIVITY
+    )
+
+
 @asset(partitions_def=bss_instances_partitions_def)
 def livelihood_summary_dataframe(config: BSSMetadataConfig, corrected_files) -> Output[pd.DataFrame]:
     """
     DataFrame of the Livelihood Activity Summary from a BSS
 
-    The summary is at the end of the Data worksheet, after the main Livelihood Activities.
-    It contains the total values for income, expenditure, kcals consumed, etc. for each Wealth Group.
+    The summary is at the beginning of the Data worksheet, before the main Livelihood Activities.
+    It contains the total values for income, expenditure, kcals consumed, etc. by Strategy Type for each Wealth Group.
     """
     return get_bss_dataframe(
         config,
@@ -141,38 +179,38 @@ def livelihood_summary_dataframe(config: BSSMetadataConfig, corrected_files) ->
 
 
 @asset(partitions_def=bss_instances_partitions_def)
-def livelihood_activity_label_dataframe(
+def livelihood_summary_label_dataframe(
     context: AssetExecutionContext,
     config: BSSMetadataConfig,
-    livelihood_activity_dataframe,
+    livelihood_summary_dataframe,
 ) -> Output[pd.DataFrame]:
     """
-    Dataframe of Livelihood Activity Label References
+    Dataframe of Livelihood Summary Label References for a single BSS
     """
     return get_bss_label_dataframe(
-        context, config, livelihood_activity_dataframe, "livelihood_activity_dataframe", len(HEADER_ROWS)
+        context, config, livelihood_summary_dataframe, "livelihood_summary_dataframe", len(HEADER_ROWS)
     )
 
 
 @asset(io_manager_key="dataframe_csv_io_manager")
-def all_livelihood_activity_labels_dataframe(
-    config: BSSMetadataConfig, livelihood_activity_label_dataframe: dict[str, pd.DataFrame]
+def all_livelihood_summary_labels_dataframe(
+    config: BSSMetadataConfig, livelihood_summary_label_dataframe: dict[str, pd.DataFrame]
 ) -> Output[pd.DataFrame]:
     """
-    Combined dataframe of the Livelihood Activity labels in use across all BSSs.
+    Combined dataframe of the Livelihood Summary labels in use across all BSSs.
     """
-    return get_all_bss_labels_dataframe(config, livelihood_activity_label_dataframe)
+    return get_all_bss_labels_dataframe(config, livelihood_summary_label_dataframe)
 
 
 @asset(io_manager_key="dataframe_csv_io_manager")
-def summary_livelihood_activity_labels_dataframe(
-    config: BSSMetadataConfig, all_livelihood_activity_labels_dataframe: pd.DataFrame
+def summary_livelihood_summary_labels_dataframe(
+    config: BSSMetadataConfig, all_livelihood_summary_labels_dataframe: pd.DataFrame
 ) -> Output[pd.DataFrame]:
     """
-    Summary of the Livelihood Activity labels in use across all BSSs.
+    Summary of the Livelihood Summary labels in use across all BSSs.
     """
     return get_summary_bss_label_dataframe(
-        config, all_livelihood_activity_labels_dataframe, ActivityLabel.LivelihoodActivityType.LIVELIHOOD_ACTIVITY
+        config, all_livelihood_summary_labels_dataframe, ActivityLabel.LivelihoodActivityType.LIVELIHOOD_ACTIVITY
     )
 
 
@@ -314,8 +352,14 @@ def get_all_label_attributes(labels: pd.Series, activity_type: str, country_code
     The country_code parameter is optional so that this function can be used to test individual labels,
     but it should be provided when processing a BSS because the Season lookup is country-specific.
     """
+    # Clear caches for the functions, so that we use the lastest data from the database
+    get_label_attributes.cache_clear()
+    get_livelihood_activity_label_map.cache_clear()
+
     # Prepare the lookups, so they cache the individual results
-    classifiedproductlookup = ClassifiedProductLookup()
+    classifiedproductlookup = ClassifiedProductLookup(
+        require_match=False  # It is possible that there won't be any Product matches, e.g. for LivelihoodSummary labels
+    )
     unitofmeasurelookup = UnitOfMeasureLookup(
         require_match=False  # It is possible that there won't be any Unit of Measure matches, e.g. for OtherCashIncome
     )
@@ -397,7 +441,7 @@ def get_instances_from_dataframe(
             (df["A"].iloc[num_header_rows:] != "") & (all_label_attributes.iloc[num_header_rows:, 0].isna())
         ]
         .groupby("A")
-        .apply(lambda x: ", ".join(x.index.astype(str)))
+        .apply(lambda x: ", ".join(x.index.astype(str)), include_groups=False)
     )
     if unrecognized_labels.empty:
         unrecognized_labels = pd.DataFrame(columns=["label", "rows"])
@@ -455,6 +499,10 @@ def get_instances_from_dataframe(
                 # Ignore rows that don't contain any relevant data (or which aren't recognized by get_label_attributes)
                 continue
 
+            # When we process the values for the LivelihoodActivity records, we need to know the actual attribute
+            # that the values in this row are for
+            activity_attribute = label_attributes["attribute"]
+
             if label_attributes["is_start"]:
                 # We are starting a new livelihood activity, so append the previous livelihood strategy
                 # to the list, provided that it has at least one Livelihood Activity where there is some income,
@@ -497,6 +545,23 @@ def get_instances_from_dataframe(
                         "Found Livelihood Activities from row %s, but there is no Livelihood Strategy defined." % row
                     )
 
+                    # Copy the attribute from the previous livelihood strategy if this is a Livelihood Summary and the
+                    # attribute hasn't been set by the label_attributes.
+                    if (
+                        activity_type == ActivityLabel.LivelihoodActivityType.LIVELIHOOD_SUMMARY
+                        and not activity_attribute
+                        and previous_livelihood_strategy
+                        and previous_livelihood_activities_for_strategy
+                    ):
+                        for attribute in ["income", "expenditure", "percentage_kcals"]:
+                            if attribute in previous_livelihood_activities_for_strategy[0]:
+                                activity_attribute = attribute
+                                break
+                        if not activity_attribute:
+                            raise ValueError(
+                                f"Could not determine attribute for Livelihood Summary strategy from row {row}"
+                            )
+
                     # Copy the product_id for MilkProduction and ButterProduction from the previous livelihood strategy
                     # if necessary.
                     if (
@@ -692,8 +757,10 @@ def get_instances_from_dataframe(
 
                     # Check the Livelihood Strategy has a Season if one is required.
                     # (e.g. for MilkProduction and ButterProduction).
-                    if livelihood_strategy["strategy_type"] in LivelihoodStrategy.REQUIRES_SEASON and (
-                        "season" not in livelihood_strategy or not livelihood_strategy["season"]
+                    if (
+                        livelihood_strategy["strategy_type"] in LivelihoodStrategy.REQUIRES_SEASON
+                        and activity_type != ActivityLabel.LivelihoodActivityType.LIVELIHOOD_SUMMARY
+                        and ("season" not in livelihood_strategy or not livelihood_strategy["season"])
                     ):
                         strategy_is_valid = False
                         # Include the header rows so that we can see which Wealth Groups are affected
@@ -714,8 +781,10 @@ def get_instances_from_dataframe(
                             errors.append(error_message)
 
                     # Check the Livelihood Strategy has a product_id if one is required.
-                    if livelihood_strategy["strategy_type"] in LivelihoodStrategy.REQUIRES_PRODUCT and (
-                        "product_id" not in livelihood_strategy or not livelihood_strategy["product_id"]
+                    if (
+                        livelihood_strategy["strategy_type"] in LivelihoodStrategy.REQUIRES_PRODUCT
+                        and activity_type != ActivityLabel.LivelihoodActivityType.LIVELIHOOD_SUMMARY
+                        and ("product_id" not in livelihood_strategy or not livelihood_strategy["product_id"])
                     ):
                         strategy_is_valid = False
                         # Include the header rows so that we can see which Wealth Groups are affected
@@ -760,7 +829,14 @@ def get_instances_from_dataframe(
                 if label_attributes["strategy_type"]:
                     strategy_type = label_attributes["strategy_type"]
                     # Get the valid fields names so we can determine if the attribute is stored in LivelihoodActivity.extra
-                    model = class_from_name(f"baseline.models.{strategy_type}")
+                    # LivestockProduction is an artificial, composite strategy type representing the sum of
+                    # MilkProduction, ButterProduction and MeatProduction. It isn't stored in the database, and it only
+                    # requires income, expenditure and kcals_consumed, so we use the base LivelihoodActivity model.
+                    model = (
+                        LivelihoodActivity
+                        if strategy_type == "LivestockProduction"
+                        else class_from_name(f"baseline.models.{strategy_type}")
+                    )
                     activity_field_names = [field.name for field in model._meta.concrete_fields]
                     # Also include values that point directly to the primary key of related objects
                     activity_field_names += [
@@ -769,7 +845,10 @@ def get_instances_from_dataframe(
                         if field.get_attname() not in activity_field_names
                     ]
 
-                if not strategy_type:
+                # Raise an error if we find attributes without a strategy_type being set, unless we are processing
+                # the Livelihood Summary section, where we set the attribute from the section heading without wanting
+                # to save the actual data.
+                if not strategy_type and activity_type != ActivityLabel.LivelihoodActivityType.LIVELIHOOD_SUMMARY:
                     raise ValueError(
                         "Found attributes %s from row %s without a strategy_type set" % (label_attributes, row)
                     )
@@ -826,9 +905,20 @@ def get_instances_from_dataframe(
                 # We are not starting a new Livelihood Strategy, but there may be
                 # additional attributes that need to be added to the current one.
                 if not livelihood_strategy:
+                    additional_attributes = [label_attributes["attribute"]] if label_attributes["attribute"] else []
+                    for attribute in [
+                        "is_start",
+                        "product_id",
+                        "unit_of_measure_id",
+                        "season",
+                        "additional_identifier",
+                        "notes",
+                    ]:
+                        if label_attributes[attribute]:
+                            additional_attributes.append(attribute)
                     raise ValueError(
-                        "Found additional attributes %s from row %s without an existing LivelihoodStrategy"
-                        % (label_attributes, row)
+                        "Found attributes from label '%s' in row %s without an existing LivelihoodStrategy: %s"
+                        % (label_attributes["activity_label"], row, ", ".join(additional_attributes))
                     )
 
                 # Only update expected keys, and only if we found a value for that attribute.
@@ -881,10 +971,6 @@ def get_instances_from_dataframe(
             # Update the LivelihoodActivity records
             if any(value for value in df.loc[row, "B":].astype(str).str.strip()):
 
-                # When we get the values for the LivelihoodActivity records, we just want the actual attribute
-                # that the values in the row are for
-                activity_attribute = label_attributes["attribute"]
-
                 # Some labels are ambiguous and map to different attributes depending on the strategy_type.
                 if activity_attribute == "quantity_produced_or_purchased":
                     if livelihood_strategy["strategy_type"] == LivelihoodStrategyType.CROP_PRODUCTION:
@@ -1032,9 +1118,16 @@ def get_instances_from_dataframe(
                             )
                         )
 
-                # Add the attribute to the LivelihoodStrategy.attribute_rows
-                else:
+                # Add the attribute to the LivelihoodStrategy.attribute_rows, assuming we have a strategy_type.
+                # Some rows may have set attributes without setting a strategy_type, for example in the
+                # Livelihood Summary section.
+                elif strategy_type:
                     livelihood_strategy["attribute_rows"][activity_attribute] = row
+                    # Assertion to prevent linting from complaining about possible None values
+                    assert activity_field_names is not None, (
+                        "Found activity_attribute %s from row %s, but there is no Livelihood Strategy and therefore no activity_field_names defined."
+                        % (activity_attribute, row)
+                    )
                     for i, value in enumerate(df.loc[row, "B":]):
                         # Some attributes are stored in LivelihoodActivity.extra rather than individual fields.
                         if activity_attribute not in activity_field_names:
@@ -1090,34 +1183,179 @@ def get_instances_from_dataframe(
     )
 
 
-@asset(partitions_def=bss_instances_partitions_def, io_manager_key="json_io_manager")
-def livelihood_activity_instances(
+def get_annotated_instances_from_dataframe(
     context: AssetExecutionContext,
-    livelihood_activity_dataframe,
+    livelihood_activity_dataframe: pd.DataFrame,
+    livelihood_summary_dataframe: pd.DataFrame,
+    activity_type: str,
+    num_header_rows: int,
 ) -> Output[dict]:
     """
-    LivelhoodStrategy and LivelihoodActivity instances extracted from the BSS.
+    Get the LivelhoodStrategy and LivelihoodActivity instances from the BSS, annotated with completeness information.
+
+    Completeness of the recognized detail livelihood activities is calculated as a percentage of the total income,
+    expenditure and kcals_consumed reported in the livelihood summary section at the top of the Data worksheet.
     """
     # Find the metadata for this BSS
     partition_key = context.asset_partition_key_for_output()
     livelihood_zone_baseline = LivelihoodZoneBaseline.objects.get_by_natural_key(*partition_key.split("~")[1:])
 
+    # Get the detail LivelihoodStrategy and LivelihoodActivity instances
     output = get_instances_from_dataframe(
         context,
         livelihood_activity_dataframe,
         livelihood_zone_baseline,
-        ActivityLabel.LivelihoodActivityType.LIVELIHOOD_ACTIVITY,
-        len(HEADER_ROWS),
+        activity_type,
+        num_header_rows,
         partition_key,
     )
+
+    if output.value["LivelihoodActivity"]:
+        # Get the summary instances
+        reported_summary_output = get_instances_from_dataframe(
+            context,
+            livelihood_summary_dataframe,
+            livelihood_zone_baseline,
+            ActivityLabel.LivelihoodActivityType.LIVELIHOOD_SUMMARY,
+            # The summary section is on the Data worksheet, so has the same number of header rows
+            # regardless of the activity_type
+            len(HEADER_ROWS),
+            partition_key,
+        )
+
+        # Annotate the output metadata with completeness information
+        # Get the summary dataframe, grouped by strategy_type
+        summary_df = pd.DataFrame(reported_summary_output.value["LivelihoodActivity"])
+        summary_df = (
+            summary_df[["strategy_type", "income", "expenditure", "kcals_consumed"]].groupby("strategy_type").sum()
+        )
+
+        # Add the recognized Livelihood Activities, also grouped by strategy_type
+        recognized_activities_df = pd.DataFrame(output.value["LivelihoodActivity"])
+        for column in ["income", "expenditure", "kcals_consumed"]:
+            if column in recognized_activities_df:
+                recognized_activities_df[column] = pd.to_numeric(
+                    recognized_activities_df[column], errors="coerce"
+                ).fillna(0)
+            else:
+                recognized_activities_df[column] = 0
+        summary_df = summary_df.join(
+            recognized_activities_df[["strategy_type", "income", "expenditure", "kcals_consumed"]]
+            .groupby("strategy_type")
+            .sum(),
+            on="strategy_type",
+            lsuffix="_summary",
+            rsuffix="_recognized",
+        ).fillna(0)
+
+        # Add a totals row at the end
+        summary_df.loc["Total"] = summary_df.sum(numeric_only=True)
+
+        # Add completeness percentages
+        summary_df = summary_df.round(0)
+        summary_df["income_completeness"] = summary_df.apply(
+            lambda row: (
+                round(row["income_recognized"] / row["income_summary"] * 100, 1)
+                if row["income_summary"] > 0
+                else pd.NA
+            ),
+            axis=1,
+        )
+        summary_df["expenditure_completeness"] = summary_df.apply(
+            lambda row: (
+                round(row["expenditure_recognized"] / row["expenditure_summary"] * 100, 1)
+                if row["expenditure_summary"] > 0
+                else pd.NA
+            ),
+            axis=1,
+        )
+        summary_df["kcals_consumed_completeness"] = summary_df.apply(
+            lambda row: (
+                round(row["kcals_consumed_recognized"] / row["kcals_consumed_summary"] * 100, 1)
+                if row["kcals_consumed_summary"] > 0
+                else pd.NA
+            ),
+            axis=1,
+        )
+        # Format the numbers as integers, for better display in the markdown table
+        for column in ["income", "expenditure", "kcals_consumed"]:
+            for source in ["recognized", "summary"]:
+                summary_df[f"{column}_{source}"] = summary_df.apply(
+                    lambda row: (
+                        int(row[f"{column}_{source}"])
+                        if (pd.notna(row[f"{column}_recognized"]) and row[f"{column}_recognized"] > 0)
+                        or (pd.notna(row[f"{column}_summary"]) and row[f"{column}_summary"] > 0)
+                        else pd.NA
+                    ),
+                    axis="columns",
+                )
+
+        # Transpose and reorder the columns and rows
+        # Sort the rows so that Strategy Types appear in the same order as in the BSS
+        ordered_strategy_types = ["LivestockProduction"] + [x for x in LivelihoodStrategyType] + ["Total"]
+        summary_df["strategy_type"] = pd.Categorical(
+            summary_df.reset_index(drop=False)["strategy_type"],
+            categories=ordered_strategy_types,
+            ordered=True,
+        )
+        summary_df = summary_df.reset_index(drop=True).sort_values(by="strategy_type")
+        summary_df = summary_df[
+            [
+                "strategy_type",
+                "kcals_consumed_recognized",
+                "kcals_consumed_summary",
+                "kcals_consumed_completeness",
+                "income_recognized",
+                "income_summary",
+                "income_completeness",
+                "expenditure_recognized",
+                "expenditure_summary",
+                "expenditure_completeness",
+            ]
+        ]
+        summary_df = summary_df.set_index("strategy_type").transpose()
+
+        # Add the completeness summary to the output metadata
+        output.metadata["pct_kcals_consumed_recognized"] = float(
+            summary_df.loc["kcals_consumed_completeness", "Total"]
+        )
+        output.metadata["pct_income_recognized"] = float(summary_df.loc["income_completeness", "Total"])
+        output.metadata["pct_expenditure_recognized"] = float(summary_df.loc["expenditure_completeness", "Total"])
+        output.metadata["completeness_summary"] = MetadataValue.md(
+            summary_df.replace(pd.NA, None).to_markdown(floatfmt=",.0f")
+        )
+
+        # Move the preview and metadata item to the end of the dict
+        output.metadata["preview"] = output.metadata.pop("preview")
+
+    return output
+
+
+@asset(partitions_def=bss_instances_partitions_def, io_manager_key="json_io_manager")
+def livelihood_activity_instances(
+    context: AssetExecutionContext,
+    livelihood_activity_dataframe: pd.DataFrame,
+    livelihood_summary_dataframe: pd.DataFrame,
+) -> Output[dict]:
+    """
+    LivelhoodStrategy and LivelihoodActivity instances extracted from the BSS.
+    """
+    output = get_annotated_instances_from_dataframe(
+        context,
+        livelihood_activity_dataframe,
+        livelihood_summary_dataframe,
+        ActivityLabel.LivelihoodActivityType.LIVELIHOOD_SUMMARY,
+        len(HEADER_ROWS),
+    )
+
     return output
 
 
 @asset(partitions_def=bss_instances_partitions_def, io_manager_key="json_io_manager")
 def livelihood_activity_valid_instances(
     context: AssetExecutionContext,
-    livelihood_activity_instances,
-    wealth_characteristic_instances,
+    livelihood_activity_instances: dict,
+    wealth_characteristic_instances: dict,
 ) -> Output[dict]:
     """
     Valid LivelhoodStrategy and LivelihoodActivity instances from a BSS, ready to be loaded via a Django fixture.
@@ -1147,7 +1385,7 @@ def livelihood_activity_valid_instances(
 def livelihood_activity_fixture(
     context: AssetExecutionContext,
     config: BSSMetadataConfig,
-    livelihood_activity_valid_instances,
+    livelihood_activity_valid_instances: dict,
 ) -> Output[list[dict]]:
     """
     Django fixture for the Livelihood Activities from a BSS.
diff --git a/pipelines/assets/other_cash_income.py b/pipelines/assets/other_cash_income.py
index edf3957..a73a0ae 100644
--- a/pipelines/assets/other_cash_income.py
+++ b/pipelines/assets/other_cash_income.py
@@ -54,7 +54,7 @@
     get_summary_bss_label_dataframe,
 )
 from .fixtures import get_fixture_from_instances, import_fixture, validate_instances
-from .livelihood_activity import get_instances_from_dataframe
+from .livelihood_activity import get_annotated_instances_from_dataframe
 
 # set the default Django settings module
 os.environ.setdefault("DJANGO_SETTINGS_MODULE", "hea.settings.production")
@@ -128,33 +128,31 @@ def summary_other_cash_income_labels_dataframe(
 @asset(partitions_def=bss_instances_partitions_def, io_manager_key="json_io_manager")
 def other_cash_income_instances(
     context: AssetExecutionContext,
-    other_cash_income_dataframe,
+    other_cash_income_dataframe: pd.DataFrame,
+    livelihood_summary_dataframe: pd.DataFrame,
 ) -> Output[dict]:
     """
     LivelhoodStrategy and LivelihoodActivity instances extracted from the BSS.
     """
-    partition_key = context.asset_partition_key_for_output()
-    livelihood_zone_baseline = LivelihoodZoneBaseline.objects.get_by_natural_key(*partition_key.split("~")[1:])
-
     if other_cash_income_dataframe.empty:
         output = {}
 
-    output = get_instances_from_dataframe(
+    output = get_annotated_instances_from_dataframe(
         context,
         other_cash_income_dataframe,
-        livelihood_zone_baseline,
+        livelihood_summary_dataframe,
         ActivityLabel.LivelihoodActivityType.OTHER_CASH_INCOME,
         len(HEADER_ROWS),
-        partition_key,
     )
+
     return output
 
 
 @asset(partitions_def=bss_instances_partitions_def, io_manager_key="json_io_manager")
 def other_cash_income_valid_instances(
     context: AssetExecutionContext,
-    other_cash_income_instances,
-    wealth_characteristic_instances,
+    other_cash_income_instances: dict,
+    wealth_characteristic_instances: dict,
 ) -> Output[dict]:
     """
     Valid LivelhoodStrategy and LivelihoodActivity instances from a BSS, ready to be loaded via a Django fixture.
@@ -184,7 +182,7 @@ def other_cash_income_valid_instances(
 def other_cash_income_fixture(
     context: AssetExecutionContext,
     config: BSSMetadataConfig,
-    other_cash_income_valid_instances,
+    other_cash_income_valid_instances: dict,
 ) -> Output[list[dict]]:
     """
     Django fixture for the Livelihood Activities from a BSS.
diff --git a/pipelines/assets/wild_foods.py b/pipelines/assets/wild_foods.py
index 4ab7ece..0e48ef4 100644
--- a/pipelines/assets/wild_foods.py
+++ b/pipelines/assets/wild_foods.py
@@ -72,7 +72,7 @@
     get_summary_bss_label_dataframe,
 )
 from .fixtures import get_fixture_from_instances, import_fixture, validate_instances
-from .livelihood_activity import get_instances_from_dataframe
+from .livelihood_activity import get_annotated_instances_from_dataframe
 
 # set the default Django settings module
 os.environ.setdefault("DJANGO_SETTINGS_MODULE", "hea.settings.production")
@@ -138,33 +138,31 @@ def summary_wild_foods_labels_dataframe(
 @asset(partitions_def=bss_instances_partitions_def, io_manager_key="json_io_manager")
 def wild_foods_instances(
     context: AssetExecutionContext,
-    wild_foods_dataframe,
+    wild_foods_dataframe: pd.DataFrame,
+    livelihood_summary_dataframe: pd.DataFrame,
 ) -> Output[dict]:
     """
     LivelhoodStrategy and LivelihoodActivity instances extracted from the BSS.
     """
-    partition_key = context.asset_partition_key_for_output()
-    livelihood_zone_baseline = LivelihoodZoneBaseline.objects.get_by_natural_key(*partition_key.split("~")[1:])
-
     if wild_foods_dataframe.empty:
         output = {}
 
-    output = get_instances_from_dataframe(
+    output = get_annotated_instances_from_dataframe(
         context,
         wild_foods_dataframe,
-        livelihood_zone_baseline,
+        livelihood_summary_dataframe,
         ActivityLabel.LivelihoodActivityType.WILD_FOODS,
         len(HEADER_ROWS),
-        partition_key,
     )
+
     return output
 
 
 @asset(partitions_def=bss_instances_partitions_def, io_manager_key="json_io_manager")
 def wild_foods_valid_instances(
     context: AssetExecutionContext,
-    wild_foods_instances,
-    wealth_characteristic_instances,
+    wild_foods_instances: dict,
+    wealth_characteristic_instances: dict,
 ) -> Output[dict]:
     """
     Valid LivelhoodStrategy and LivelihoodActivity instances from a BSS, ready to be loaded via a Django fixture.
@@ -194,7 +192,7 @@ def wild_foods_valid_instances(
 def wild_foods_fixture(
     context: AssetExecutionContext,
     config: BSSMetadataConfig,
-    wild_foods_valid_instances,
+    wild_foods_valid_instances: dict,
 ) -> Output[list[dict]]:
     """
     Django fixture for the Livelihood Activities from a BSS.

From ef8a7de072d288268dce81e049b534e5efaee534 Mon Sep 17 00:00:00 2001
From: Roger Hunwicks <rhunwicks@air.org>
Date: Sat, 11 Oct 2025 10:38:37 -0400
Subject: [PATCH 13/15] Remove redundant imports - see HEA-572

---
 pipelines/assets/other_cash_income.py | 1 -
 pipelines/assets/wild_foods.py        | 1 -
 2 files changed, 2 deletions(-)

diff --git a/pipelines/assets/other_cash_income.py b/pipelines/assets/other_cash_income.py
index a73a0ae..2792064 100644
--- a/pipelines/assets/other_cash_income.py
+++ b/pipelines/assets/other_cash_income.py
@@ -62,7 +62,6 @@
 # Configure Django with our custom settings before importing any Django classes
 django.setup()
 
-from baseline.models import LivelihoodZoneBaseline  # NOQA: E402
 from metadata.models import ActivityLabel  # NOQA: E402
 
 # Indexes of header rows in the Data3 dataframe (wealth_group_category, district, village)
diff --git a/pipelines/assets/wild_foods.py b/pipelines/assets/wild_foods.py
index 0e48ef4..e663c2e 100644
--- a/pipelines/assets/wild_foods.py
+++ b/pipelines/assets/wild_foods.py
@@ -80,7 +80,6 @@
 # Configure Django with our custom settings before importing any Django classes
 django.setup()
 
-from baseline.models import LivelihoodZoneBaseline  # NOQA: E402
 from metadata.models import ActivityLabel  # NOQA: E402
 
 # Indexes of header rows in the Data3 dataframe (wealth_group_category, district, village)

From 6c2c85a2c30eac9250ac820dc8ef0d98b444235f Mon Sep 17 00:00:00 2001
From: Roger Hunwicks <rhunwicks@air.org>
Date: Sat, 11 Oct 2025 11:40:54 -0400
Subject: [PATCH 14/15] Fix isort - see HEA-572

---
 pipelines/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipelines/__init__.py b/pipelines/__init__.py
index dc6a54b..f70caec 100644
--- a/pipelines/__init__.py
+++ b/pipelines/__init__.py
@@ -25,8 +25,8 @@
     livelihood_activity_label_dataframe,
     livelihood_activity_valid_instances,
     livelihood_summary_dataframe,
-    summary_livelihood_activity_labels_dataframe,
     livelihood_summary_label_dataframe,
+    summary_livelihood_activity_labels_dataframe,
     summary_livelihood_summary_labels_dataframe,
 )
 from .assets.other_cash_income import (

From 635cc91ced71104ffb83c9f7ff05ece009a05d2a Mon Sep 17 00:00:00 2001
From: Roger Hunwicks <rhunwicks@air.org>
Date: Sun, 12 Oct 2025 11:54:17 -0400
Subject: [PATCH 15/15] Remove dbtoyaml calls from 01-build-then-test - see
 HEA-760

---
 .github/workflows/01-build-then-test.yml | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/.github/workflows/01-build-then-test.yml b/.github/workflows/01-build-then-test.yml
index a517324..9efe314 100644
--- a/.github/workflows/01-build-then-test.yml
+++ b/.github/workflows/01-build-then-test.yml
@@ -271,9 +271,6 @@ jobs:
           docker cp ci-${APP}-${CI_PIPELINE_ID}-${{ github.job }}:/usr/src/app/log ./ || true
           docker cp ci-${APP}-${CI_PIPELINE_ID}-${{ github.job }}:/usr/src/app/coverage.txt ./ || true
 
-          # Save the database schema as an artifact
-          docker compose run --no-deps --rm --entrypoint dbtoyaml app --no-owner --no-privileges test_${PGDATABASE} > schema.yml
-          diff pyrseas/schema.yaml schema.yml > schema.diff || true
       - name: "Upload test artifacts"
         if: success() || failure()
         uses: actions/upload-artifact@v4
@@ -400,9 +397,6 @@ jobs:
           # Copy the artifacts out of the Docker container to project directory
           docker cp ci-${APP}-${CI_PIPELINE_ID}-${{ github.job }}:/usr/src/app/log ./ || true
           docker cp ci-${APP}-${CI_PIPELINE_ID}-${{ github.job }}:/usr/src/app/coverage.txt ./ || true
-          # Save the database schema as an artifact
-          docker compose run --no-deps --rm --entrypoint dbtoyaml app --no-owner --no-privileges test_${PGDATABASE} > schema.yml
-          diff pyrseas/schema.yaml schema.yml > schema.diff || true
       - name: "Upload test artifacts"
         if: success() || failure()
         uses: actions/upload-artifact@v4
@@ -530,9 +524,6 @@ jobs:
           # Copy the artifacts out of the Docker container to project directory
           docker cp ci-${APP}-${CI_PIPELINE_ID}-${{ github.job }}:/usr/src/app/log ./ || true
           docker cp ci-${APP}-${CI_PIPELINE_ID}-${{ github.job }}:/usr/src/app/coverage.txt ./ || true
-          # Save the database schema as an artifact
-          docker compose run --no-deps --rm --entrypoint dbtoyaml app --no-owner --no-privileges test_${PGDATABASE} > schema.yml
-          diff pyrseas/schema.yaml schema.yml > schema.diff || true
       - name: "Upload test artifacts"
         if: success() || failure()
         uses: actions/upload-artifact@v4
@@ -661,11 +652,6 @@ jobs:
           # Copy the artifacts out of the Docker container to project directory
           docker cp ci-${APP}-${CI_PIPELINE_ID}-${{ github.job }}:/usr/src/app/log ./ || true
           docker cp ci-${APP}-${CI_PIPELINE_ID}-${{ github.job }}:/usr/src/app/coverage.txt ./ || true
-          # The prod image does not include pyrseas/dbtoyaml. Building a test image to include that
-          docker compose build app
-          # Save the database schema as an artifact
-          docker compose run --no-deps --rm --entrypoint dbtoyaml app --no-owner --no-privileges test_${PGDATABASE} > schema.yml
-          diff pyrseas/schema.yaml schema.yml > schema.diff || true
       - name: "Upload test artifacts"
         if: success() || failure()
         uses: actions/upload-artifact@v4