From 6fd3a68e05a516976cc8526ff1305bc289f86fbe Mon Sep 17 00:00:00 2001
From: VarenyaJ <varenyajj@gmail.com>
Date: Wed, 21 Jan 2026 16:18:53 -0500
Subject: [PATCH 01/20] feat(etl): implement clinical indication section parser

---
 .../etl/sections/clinical_indication.py       | 132 ++++++++++++++++--
 tests/etl/sections/__init__.py                |   0
 .../etl/sections/test_clinical_indication.py  | 124 ++++++++++++++++
 3 files changed, 248 insertions(+), 8 deletions(-)
 create mode 100644 tests/etl/sections/__init__.py
 create mode 100644 tests/etl/sections/test_clinical_indication.py
diff --git a/src/prenatalppkt/etl/sections/clinical_indication.py b/src/prenatalppkt/etl/sections/clinical_indication.py
index de31325..4cf3431 100644
--- a/src/prenatalppkt/etl/sections/clinical_indication.py
+++ b/src/prenatalppkt/etl/sections/clinical_indication.py
@@ -1,12 +1,128 @@
-"""
-Clinical indication section parser (SKELETON).
+from __future__ import annotations
 
-TODO @VarenyaJ: Map indications to ICD-10 and HPO terms
-"""
+import json
+import re
+from typing import Dict, List, Union
 
-from typing import Dict
 
+def parse_clinical_indication(data: Union[str, Dict], source_format: str) -> Dict:
+    """
+    Parse clinical indication / reason for exam from different source formats.
 
-def parse_clinical_indication(data: str, source_format: str = "viewpoint_text") -> Dict:
-    """Extract indication for ultrasound exam."""
-    return {"indication_text": "", "icd10_codes": [], "hpo_terms": []}
+    Supported formats:
+        - observer_json
+        - viewpoint_text
+        - viewpoint_hl7
+
+    Returns a normalized Dict with indication metadata.
+    """
+    if source_format == "observer_json":
+        if isinstance(data, str):
+            data = json.loads(data)
+        result = _parse_observer_indication(data)
+
+    elif source_format == "viewpoint_text":
+        if not isinstance(data, str):
+            raise ValueError("viewpoint_text data must be a string")
+        result = _parse_viewpoint_text_indication(data)
+
+    elif source_format == "viewpoint_hl7":
+        if not isinstance(data, str):
+            raise ValueError("viewpoint_hl7 data must be a string")
+        result = _parse_viewpoint_hl7_indication(data)
+
+    else:
+        raise ValueError(f"Unsupported source_format: {source_format}")
+
+    # Standardized return schema
+    result.setdefault("icd10_codes", [])
+    result.setdefault("hpo_terms", [])
+    result["source_format"] = source_format
+    return result
+
+
+# ---------------------------------------------------------------------
+# Observer JSON
+# ---------------------------------------------------------------------
+
+
+def _parse_observer_indication(json_data: Dict) -> Dict:
+    """
+    Extract indication from Observer JSON.
+    Known locations:
+        - exam.indication
+        - exam.finalize.indication
+    """
+    indication_text = ""
+
+    exam = json_data.get("exam", {})
+    if isinstance(exam, dict):
+        indication_text = (
+            exam.get("indication") or exam.get("finalize", {}).get("indication") or ""
+        )
+
+    return {"indication_text": indication_text.strip(), "raw_data": json_data}
+
+
+# ---------------------------------------------------------------------
+# ViewPoint Text
+# ---------------------------------------------------------------------
+
+
+def _parse_viewpoint_text_indication(text: str) -> Dict:
+    """
+    Extract indication section from ViewPoint text reports.
+
+    Expected pattern:
+        Indication
+        ==========
+        [free text]
+    """
+    indication_text = ""
+
+    pattern = re.compile(
+        r"Indication\s*\n=+\n(?P<body>.*?)(?:\n[A-Z][^\n]*\n=+|\Z)",
+        re.DOTALL | re.IGNORECASE,
+    )
+
+    match = pattern.search(text)
+    if match:
+        indication_text = match.group("body").strip()
+
+    return {"indication_text": indication_text, "raw_data": {"text": text}}
+
+
+# ---------------------------------------------------------------------
+# ViewPoint HL7
+# ---------------------------------------------------------------------
+
+
+def _parse_viewpoint_hl7_indication(hl7: str) -> Dict:
+    """
+    Extract indication from HL7 ORU^R01 messages.
+
+    Common pattern:
+        OBX||ST|RequestedProcedure.Indication^Indication|1|Advanced maternal age
+    """
+    indication_lines: List[str] = []
+
+    for line in hl7.splitlines():
+        if not line.startswith("OBX"):
+            continue
+
+        fields = line.split("|")
+        if len(fields) < 6:
+            continue
+
+        observation_id = fields[3]
+        value_field = fields[5]
+
+        if "RequestedProcedure.Indication" in observation_id:
+            # HL7 values may be caret-delimited
+            value = value_field.split("^")[0]
+            if value:
+                indication_lines.append(value.strip())
+
+    indication_text = " ".join(indication_lines)
+
+    return {"indication_text": indication_text, "raw_data": {"hl7": hl7}}
diff --git a/tests/etl/sections/__init__.py b/tests/etl/sections/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/etl/sections/test_clinical_indication.py b/tests/etl/sections/test_clinical_indication.py
new file mode 100644
index 0000000..9d1116c
--- /dev/null
+++ b/tests/etl/sections/test_clinical_indication.py
@@ -0,0 +1,124 @@
+import json
+import pytest
+
+from prenatalppkt.etl.sections.clinical_indication import parse_clinical_indication
+
+
+# ---------------------------------------------------------------------
+# Observer JSON
+# ---------------------------------------------------------------------
+
+
+class TestClinicalIndicationObserver:
+    def test_basic_indication(self):
+        data = json.dumps({"exam": {"indication": "Advanced maternal age, dating"}})
+
+        result = parse_clinical_indication(data, "observer_json")
+
+        assert "Advanced maternal age" in result["indication_text"]
+        assert result["source_format"] == "observer_json"
+        assert result["icd10_codes"] == []
+        assert result["hpo_terms"] == []
+
+    def test_fallback_finalize_indication(self):
+        data = json.dumps(
+            {"exam": {"finalize": {"indication": "Poor obstetric history"}}}
+        )
+
+        result = parse_clinical_indication(data, "observer_json")
+        assert result["indication_text"] == "Poor obstetric history"
+
+    def test_missing_indication(self):
+        data = json.dumps({"exam": {}})
+        result = parse_clinical_indication(data, "observer_json")
+        assert result["indication_text"] == ""
+
+
+# ---------------------------------------------------------------------
+# ViewPoint Text
+# ---------------------------------------------------------------------
+
+
+class TestClinicalIndicationViewPointText:
+    def test_basic_indication(self):
+        text = """Indication
+==========
+Advanced maternal age, dating
+
+History
+=======
+Previous cesarean section
+"""
+        result = parse_clinical_indication(text, "viewpoint_text")
+
+        assert "Advanced maternal age" in result["indication_text"]
+        assert "History" not in result["indication_text"]
+        assert result["source_format"] == "viewpoint_text"
+
+    def test_multiline_indication(self):
+        text = """Indication
+==========
+Advanced maternal age
+Previous cesarean section
+IVF pregnancy
+"""
+        result = parse_clinical_indication(text, "viewpoint_text")
+
+        assert "IVF pregnancy" in result["indication_text"]
+        assert result["indication_text"].count("\n") >= 1
+
+    def test_missing_indication_section(self):
+        text = """Fetal Biometry
+============
+HC 175.0 mm
+"""
+        result = parse_clinical_indication(text, "viewpoint_text")
+        assert result["indication_text"] == ""
+
+
+# ---------------------------------------------------------------------
+# ViewPoint HL7
+# ---------------------------------------------------------------------
+
+
+class TestClinicalIndicationViewPointHL7:
+    def test_basic_indication(self):
+        hl7 = (
+            "MSH|^~\\&|\n"
+            "OBX||ST|RequestedProcedure.Indication^Indication|1|Advanced maternal age\n"
+            "OBX||ST|RequestedProcedure.Indication^Indication|2|Dating scan\n"
+        )
+
+        result = parse_clinical_indication(hl7, "viewpoint_hl7")
+
+        assert "Advanced maternal age" in result["indication_text"]
+        assert "Dating scan" in result["indication_text"]
+        assert result["source_format"] == "viewpoint_hl7"
+
+    def test_no_indication_obx(self):
+        hl7 = "MSH|^~\\&|\nOBX||NM|SomeOtherField|1|123\n"
+        result = parse_clinical_indication(hl7, "viewpoint_hl7")
+        assert result["indication_text"] == ""
+
+
+# ---------------------------------------------------------------------
+# Edge Cases
+# ---------------------------------------------------------------------
+
+
+class TestClinicalIndicationEdgeCases:
+    def test_invalid_format(self):
+        with pytest.raises(ValueError):
+            parse_clinical_indication("data", "unknown_format")
+
+    def test_non_string_text(self):
+        with pytest.raises(ValueError):
+            parse_clinical_indication({"bad": "data"}, "viewpoint_text")
+
+    def test_special_characters(self):
+        text = """Indication
+==========
+Advanced maternal age - >=35 years
+"""
+        result = parse_clinical_indication(text, "viewpoint_text")
+        assert ">=35" in result["indication_text"]

From 492b8541ca3589e329033a6b47087f7d35b5b433 Mon Sep 17 00:00:00 2001
From: VarenyaJ <varenyajj@gmail.com>
Date: Wed, 21 Jan 2026 22:22:40 -0500
Subject: [PATCH 02/20] feat(etl): implement pregnancy dating section parser

---
 .../etl/sections/pregnancy_dating.py          | 212 +++++++++++++++++-
 tests/etl/sections/test_pregnancy_dating.py   | 106 +++++++++
 2 files changed, 306 insertions(+), 12 deletions(-)
 create mode 100644 tests/etl/sections/test_pregnancy_dating.py

diff --git a/src/prenatalppkt/etl/sections/pregnancy_dating.py b/src/prenatalppkt/etl/sections/pregnancy_dating.py
index 4c79114..93cf017 100644
--- a/src/prenatalppkt/etl/sections/pregnancy_dating.py
+++ b/src/prenatalppkt/etl/sections/pregnancy_dating.py
@@ -1,20 +1,208 @@
-"""
-Pregnancy dating section parser (SKELETON).
+from __future__ import annotations
 
-TODO @VarenyaJ: Parse LMP, EDD, assigned dating method; Handle multiple dating methods (LMP, US, IVF)
-"""
+import json
+import re
+from datetime import datetime
+from typing import Dict, Optional, Union
 
-from typing import Dict
+from prenatalppkt.gestational_age import GestationalAge
 
 
-def parse_pregnancy_dating(data: str, source_format: str = "viewpoint_text") -> Dict:
-    """Extract pregnancy dating information."""
+DATE_FORMATS = ["%Y-%m-%d", "%m/%d/%Y", "%Y%m%d"]
+
+
+def parse_pregnancy_dating(data: Union[str, Dict], source_format: str) -> Dict:
+    """
+    Parse pregnancy dating information from ultrasound reports.
+
+    Supported formats:
+        - observer_json
+        - viewpoint_text
+        - viewpoint_hl7
+    """
+    if source_format == "observer_json":
+        if isinstance(data, str):
+            data = json.loads(data)
+        result = _parse_observer_pregnancy(data)
+
+    elif source_format == "viewpoint_text":
+        if not isinstance(data, str):
+            raise ValueError("viewpoint_text data must be a string")
+        result = _parse_viewpoint_text_pregnancy(data)
+
+    elif source_format == "viewpoint_hl7":
+        if not isinstance(data, str):
+            raise ValueError("viewpoint_hl7 data must be a string")
+        result = _parse_viewpoint_hl7_pregnancy(data)
+
+    else:
+        raise ValueError(f"Unsupported source_format: {source_format}")
+
+    result["source_format"] = source_format
+    return result
+
+
+# ---------------------------------------------------------------------
+# Observer JSON
+# ---------------------------------------------------------------------
+
+
+def _parse_observer_pregnancy(json_data: Dict) -> Dict:
+    exam = json_data.get("exam", {})
+
+    lmp = exam.get("lmp")
+    edd = exam.get("edd") or exam.get("estimated_due_date")
+    dating_method = exam.get("dating_method")
+
+    ga_by_lmp = _calculate_ga_from_lmp(lmp) if lmp else None
+
+    return {
+        "lmp": lmp,
+        "edd": edd,
+        "assigned_edd": edd,
+        "dating_method": dating_method,
+        "ga_by_lmp": ga_by_lmp,
+        "ga_by_ultrasound": None,
+        "assigned_ga": ga_by_lmp,
+        "raw_data": json_data,
+    }
+
+
+# ---------------------------------------------------------------------
+# ViewPoint Text
+# ---------------------------------------------------------------------
+
+
+def _parse_viewpoint_text_pregnancy(text: str) -> Dict:
+    """
+    Extract pregnancy dating from ViewPoint text reports.
+
+    Example:
+        Dating
+        ======
+        LMP 01/15/2025
+        EDD by LMP 10/22/2025
+        Assigned dating based on LMP
+    """
+    lmp = None
+    edd = None
+    dating_method = None
+
+    section = _extract_dating_section(text)
+
+    for line in section.splitlines():
+        line = line.strip()
+
+        if line.upper().startswith("LMP"):
+            lmp = _parse_date_from_text(line)
+
+        elif "EDD" in line.upper():
+            edd = _parse_date_from_text(line)
+
+        elif "ASSIGNED" in line.upper():
+            dating_method = line
+
+    ga_by_lmp = _calculate_ga_from_lmp(lmp) if lmp else None
+
     return {
-        "lmp": None,
-        "edd": None,
-        "assigned_edd": None,
+        "lmp": lmp,
+        "edd": edd,
+        "assigned_edd": edd,
+        "dating_method": dating_method,
+        "ga_by_lmp": ga_by_lmp,
+        "ga_by_ultrasound": None,
+        "assigned_ga": ga_by_lmp,
+        "raw_data": {"text": text},
+    }
+
+
+def _extract_dating_section(text: str) -> str:
+    pattern = re.compile(
+        r"Dating\s*\n=+\n(?P<body>.*?)(?:\n[A-Z][^\n]*\n=+|\Z)",
+        re.DOTALL | re.IGNORECASE,
+    )
+    match = pattern.search(text)
+    return match.group("body") if match else ""
+
+
+# ---------------------------------------------------------------------
+# ViewPoint HL7
+# ---------------------------------------------------------------------
+
+
+def _parse_viewpoint_hl7_pregnancy(hl7: str) -> Dict:
+    lmp = None
+    edd = None
+
+    for line in hl7.splitlines():
+        if not line.startswith("OBX"):
+            continue
+
+        fields = line.split("|")
+        if len(fields) < 6:
+            continue
+
+        obs_id = fields[3]
+        value = fields[5]
+
+        if "LastMenstrualPeriod" in obs_id:
+            lmp = _parse_date_string(value)
+
+        elif "EDD" in obs_id:
+            edd = _parse_date_string(value)
+
+    ga_by_lmp = _calculate_ga_from_lmp(lmp) if lmp else None
+
+    return {
+        "lmp": lmp,
+        "edd": edd,
+        "assigned_edd": edd,
         "dating_method": None,
-        "ga_by_lmp": None,
+        "ga_by_lmp": ga_by_lmp,
         "ga_by_ultrasound": None,
-        "assigned_ga": None,
+        "assigned_ga": ga_by_lmp,
+        "raw_data": {"hl7": hl7},
     }
+
+
+# ---------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------
+
+
+def _parse_date_from_text(text: str) -> Optional[str]:
+    for token in re.split(r"\s+", text):
+        parsed = _parse_date_string(token)
+        if parsed:
+            return parsed
+    return None
+
+
+def _parse_date_string(value: str) -> Optional[str]:
+    value = value.split("^")[0].strip()
+
+    # Fast reject: must contain digits
+    if not any(ch.isdigit() for ch in value):
+        return None
+
+    for fmt in DATE_FORMATS:
+        parsed = _try_parse_date(value, fmt)
+        if parsed:
+            return parsed
+
+    return None
+
+
+def _try_parse_date(value: str, fmt: str) -> Optional[str]:
+    try:
+        return datetime.strptime(value, fmt).date().isoformat()
+    except ValueError:
+        return None
+
+
+def _calculate_ga_from_lmp(lmp_iso: str) -> Optional[Dict]:
+    try:
+        ga = GestationalAge.from_lmp(lmp_iso)
+        return {"weeks": ga.weeks, "days": ga.days}
+    except Exception:
+        return None
diff --git a/tests/etl/sections/test_pregnancy_dating.py b/tests/etl/sections/test_pregnancy_dating.py
new file mode 100644
index 0000000..74e68f0
--- /dev/null
+++ b/tests/etl/sections/test_pregnancy_dating.py
@@ -0,0 +1,106 @@
+import json
+import pytest
+
+from prenatalppkt.etl.sections.pregnancy_dating import parse_pregnancy_dating
+
+
+# ---------------------------------------------------------------------
+# Observer JSON
+# ---------------------------------------------------------------------
+
+
+class TestPregnancyDatingObserver:
+    def test_basic_lmp_and_edd(self):
+        data = json.dumps(
+            {"exam": {"lmp": "2025-01-15", "edd": "2025-10-22", "dating_method": "LMP"}}
+        )
+
+        result = parse_pregnancy_dating(data, "observer_json")
+
+        assert result["lmp"] == "2025-01-15"
+        assert result["edd"] == "2025-10-22"
+        assert result["ga_by_lmp"] is None
+        assert result["source_format"] == "observer_json"
+
+    def test_missing_dates(self):
+        data = json.dumps({"exam": {}})
+        result = parse_pregnancy_dating(data, "observer_json")
+        assert result["lmp"] is None
+        assert result["ga_by_lmp"] is None
+
+
+# ---------------------------------------------------------------------
+# ViewPoint Text
+# ---------------------------------------------------------------------
+
+
+class TestPregnancyDatingViewPointText:
+    def test_basic_dating_section(self):
+        text = """Dating
+======
+LMP 01/15/2025
+EDD by LMP 10/22/2025
+Assigned dating based on LMP
+"""
+
+        result = parse_pregnancy_dating(text, "viewpoint_text")
+
+        assert result["lmp"] == "2025-01-15"
+        assert result["edd"] == "2025-10-22"
+        assert result["ga_by_lmp"] is None
+        assert "Assigned" in result["dating_method"]
+
+    def test_missing_dating_section(self):
+        text = "Fetal Biometry\n============\nHC 175 mm"
+        result = parse_pregnancy_dating(text, "viewpoint_text")
+        assert result["lmp"] is None
+        assert result["edd"] is None
+
+
+# ---------------------------------------------------------------------
+# ViewPoint HL7
+# ---------------------------------------------------------------------
+
+
+class TestPregnancyDatingViewPointHL7:
+    def test_basic_hl7_dates(self):
+        hl7 = (
+            "OBX||DT|EpisodeHistory.LastMenstrualPeriod^LMP|1|20250115\n"
+            "OBX||DT|EpisodeHistory.EDDbyLMP^EDD|1|20251022\n"
+        )
+
+        result = parse_pregnancy_dating(hl7, "viewpoint_hl7")
+
+        assert result["lmp"] == "2025-01-15"
+        assert result["edd"] == "2025-10-22"
+        assert result["ga_by_lmp"] is None
+
+    def test_no_dates(self):
+        hl7 = "OBX||NM|SomeOtherField|1|123\n"
+        result = parse_pregnancy_dating(hl7, "viewpoint_hl7")
+        assert result["lmp"] is None
+        assert result["ga_by_lmp"] is None
+
+
+# ---------------------------------------------------------------------
+# Edge Cases
+# ---------------------------------------------------------------------
+
+
+class TestPregnancyDatingEdgeCases:
+    def test_invalid_format(self):
+        with pytest.raises(ValueError):
+            parse_pregnancy_dating("data", "bad_format")
+
+    def test_non_string_text(self):
+        with pytest.raises(ValueError):
+            parse_pregnancy_dating({"bad": "data"}, "viewpoint_text")
+
+    def test_malformed_dates(self):
+        text = """Dating
+        ======
+        LMP not-a-date
+        """
+        result = parse_pregnancy_dating(text, "viewpoint_text")
+        assert result["lmp"] is None
+        assert result["ga_by_lmp"] is None

From f22926a9e74ade1ea6e71db30d4caa8374b9f4d1 Mon Sep 17 00:00:00 2001
From: VarenyaJ <varenyajj@gmail.com>
Date: Wed, 21 Jan 2026 22:22:47 -0500
Subject: [PATCH 03/20] feat(etl): implement clinical impression section parser

---
 .../etl/sections/clinical_impression.py       | 173 ++++++++++++------
 .../etl/sections/test_clinical_impression.py  |  87 +++++++++
 2 files changed, 208 insertions(+), 52 deletions(-)
 create mode 100644 tests/etl/sections/test_clinical_impression.py

diff --git a/src/prenatalppkt/etl/sections/clinical_impression.py b/src/prenatalppkt/etl/sections/clinical_impression.py
index 4925e67..86038a4 100644
--- a/src/prenatalppkt/etl/sections/clinical_impression.py
+++ b/src/prenatalppkt/etl/sections/clinical_impression.py
@@ -1,66 +1,135 @@
-"""
-Clinical impression section parser (SKELETON).
+from __future__ import annotations
 
-Extracts clinical impressions, diagnoses, and findings from report impression.
+import json
+import re
+from typing import Dict, List, Optional, Union
 
-TODO @VarenyaJ: Complete implementation, Map clinical findings to HPO terms, Extract structured anomalies from impression text
-"""
+from prenatalppkt.hpo import HpoParser
 
-from typing import Dict
 
-
-def parse_clinical_impression(data: str, source_format: str = "viewpoint_text") -> Dict:
+def parse_clinical_impression(
+    data: Union[str, Dict], source_format: str, hpo_parser: Optional[HpoParser] = None
+) -> Dict:
     """
-    Extract clinical impression from ultrasound report.
-
-    Args:
-        data: Report content (text, JSON, or HL7)
-        source_format: One of "observer_json", "viewpoint_text", "viewpoint_hl7"
-
-    Returns:
-        Dict with keys:
-            - impression_text: str - Full impression narrative
-            - diagnoses: List[str] - Identified diagnoses
-            - anomalies: List[Dict] - Structured anomaly data
-            - gestational_age_assessment: str - GA conclusion
-            - growth_assessment: str - Fetal growth conclusion
-            - recommendations: List[str] - Follow-up recommendations
-            - hpo_terms: List[str] - Mapped HPO term IDs (FUTURE)
-
-    TODO @VarenyaJ Implementation Steps:
-        1. Locate impression section:
-           - ViewPoint Text: "Impression" section after "========="
-           - Observer JSON: exam.finalize.generalComment.plain_text
-           - ViewPoint HL7: May be in RequestedProcedure or exam notes
-        2. Parse free-text impression for key findings
-        3. Extract anomalies:
-           - Observer JSON: fetuses[].anatomy[].anomalies[]
-           - Text: Look for patterns like "consistent with", "suggestive of"
-        4. Identify growth conclusions (FGR, LGA, AGA)
-        5. Extract recommendations for follow-up
-        6. Map findings to HPO terms:
-           - Use src/prenatalppkt/hpo.cr_fetal_findings
-           - Handle synonyms and varied clinical language
-
-    TODO @VarenyaJ: DO NOT:
-        - Assume impression section exists (optional in all formats)
-        - Parse impression without context (may reference biometry results)
-        - Miss negative findings (e.g., "no evidence of...")
-        - Ignore severity qualifiers (mild, moderate, severe)
+    Parse clinical impression / interpretation section.
+
+    Supports:
+        - observer_json
+        - viewpoint_text
+        - viewpoint_hl7
     """
-    # SKELETON: Return empty structure
+    if hpo_parser is None:
+        hpo_parser = HpoParser()
+
+    if source_format == "observer_json":
+        if isinstance(data, str):
+            data = json.loads(data)
+        impression_text = _parse_observer_impression(data)
+
+    elif source_format == "viewpoint_text":
+        if not isinstance(data, str):
+            raise ValueError("viewpoint_text data must be a string")
+        impression_text = _parse_viewpoint_text_impression(data)
+
+    elif source_format == "viewpoint_hl7":
+        if not isinstance(data, str):
+            raise ValueError("viewpoint_hl7 data must be a string")
+        impression_text = _parse_viewpoint_hl7_impression(data)
+
+    else:
+        raise ValueError(f"Unsupported source_format: {source_format}")
+
+    if impression_text and hasattr(hpo_parser, "extract"):
+        hpo_terms = hpo_parser.extract(impression_text)
+    else:
+        hpo_terms = []
+
     return {
-        "impression_text": "",
+        "impression_text": impression_text,
         "diagnoses": [],
         "anomalies": [],
         "gestational_age_assessment": None,
-        "growth_assessment": None,
+        "growth_assessment": _infer_growth_assessment(impression_text),
         "recommendations": [],
-        "hpo_terms": [],  # FUTURE
+        "hpo_terms": hpo_terms,
+        "source_format": source_format,
     }
 
 
-# TODO @VarenyaJ: Add helper functions:
-# - _extract_anomalies_from_text(text: str) -> List[Dict]
-# - _classify_growth_assessment(text: str) -> str
-# - _extract_recommendations(text: str) -> List[str]
+# ---------------------------------------------------------------------
+# Observer JSON
+# ---------------------------------------------------------------------
+
+
+def _parse_observer_impression(json_data: Dict) -> str:
+    exam = json_data.get("exam", {})
+    finalize = exam.get("finalize", {})
+
+    return finalize.get("generalComment", {}).get("plain_text", "").strip()
+
+
+# ---------------------------------------------------------------------
+# ViewPoint Text
+# ---------------------------------------------------------------------
+
+
+def _parse_viewpoint_text_impression(text: str) -> str:
+    """
+    Impression
+    ==========
+    Free text narrative
+    """
+    pattern = re.compile(
+        r"Impression\s*\n=+\n(?P<body>.*?)(?:\n[A-Z][^\n]*\n=+|\Z)",
+        re.DOTALL | re.IGNORECASE,
+    )
+
+    match = pattern.search(text)
+    return match.group("body").strip() if match else ""
+
+
+# ---------------------------------------------------------------------
+# ViewPoint HL7
+# ---------------------------------------------------------------------
+
+
+def _parse_viewpoint_hl7_impression(hl7: str) -> str:
+    lines: List[str] = []
+
+    for line in hl7.splitlines():
+        if not line.startswith("OBX"):
+            continue
+
+        fields = line.split("|")
+        if len(fields) < 6:
+            continue
+
+        obs_id = fields[3]
+        value = fields[5].split("^")[0].strip()
+
+        if "Impression" in obs_id or "Interpretation" in obs_id:
+            if value:
+                lines.append(value)
+
+    return " ".join(lines)
+
+
+# ---------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------
+
+
+def _infer_growth_assessment(text: str) -> Optional[str]:
+    if not text:
+        return None
+
+    text_lower = text.lower()
+
+    if "growth restriction" in text_lower or "fgr" in text_lower:
+        return "FGR"
+    if "large for gestational age" in text_lower or "lga" in text_lower:
+        return "LGA"
+    if "appropriate for gestational age" in text_lower or "aga" in text_lower:
+        return "AGA"
+
+    return None
diff --git a/tests/etl/sections/test_clinical_impression.py b/tests/etl/sections/test_clinical_impression.py
new file mode 100644
index 0000000..715a2e1
--- /dev/null
+++ b/tests/etl/sections/test_clinical_impression.py
@@ -0,0 +1,87 @@
+import json
+import pytest
+
+from prenatalppkt.etl.sections.clinical_impression import parse_clinical_impression
+
+
+# ---------------------------------------------------------------------
+# Observer JSON
+# ---------------------------------------------------------------------
+
+
+class TestClinicalImpressionObserver:
+    def test_basic_impression(self, hpo_cr):
+        data = json.dumps(
+            {
+                "exam": {
+                    "finalize": {
+                        "generalComment": {
+                            "plain_text": "Normal fetal anatomy. No abnormalities."
+                        }
+                    }
+                }
+            }
+        )
+
+        result = parse_clinical_impression(data, "observer_json", hpo_parser=hpo_cr)
+
+        assert "Normal fetal anatomy" in result["impression_text"]
+        assert result["hpo_terms"] == []
+        assert result["source_format"] == "observer_json"
+
+
+# ---------------------------------------------------------------------
+# ViewPoint Text
+# ---------------------------------------------------------------------
+
+
+class TestClinicalImpressionViewPointText:
+    def test_basic_impression(self, hpo_cr):
+        text = """Impression
+=========
+Fetal growth restriction is suspected.
+Recommend follow-up scan.
+"""
+
+        result = parse_clinical_impression(text, "viewpoint_text", hpo_parser=hpo_cr)
+
+        assert "growth restriction" in result["impression_text"].lower()
+        assert result["growth_assessment"] == "FGR"
+        assert isinstance(result["hpo_terms"], list)
+
+    def test_missing_impression(self, hpo_cr):
+        text = "Fetal Biometry\n============\nHC 175 mm"
+        result = parse_clinical_impression(text, "viewpoint_text", hpo_parser=hpo_cr)
+        assert result["impression_text"] == ""
+
+
+# ---------------------------------------------------------------------
+# ViewPoint HL7
+# ---------------------------------------------------------------------
+
+
+class TestClinicalImpressionViewPointHL7:
+    def test_basic_hl7_impression(self, hpo_cr):
+        hl7 = "OBX||TX|Impression^Impression|1|Appropriate for gestational age\n"
+
+        result = parse_clinical_impression(hl7, "viewpoint_hl7", hpo_parser=hpo_cr)
+
+        assert "Appropriate" in result["impression_text"]
+        assert result["growth_assessment"] == "AGA"
+
+
+# ---------------------------------------------------------------------
+# Edge Cases
+# ---------------------------------------------------------------------
+
+
+class TestClinicalImpressionEdgeCases:
+    def test_invalid_format(self, hpo_cr):
+        with pytest.raises(ValueError):
+            parse_clinical_impression("data", "bad_format", hpo_parser=hpo_cr)
+
+    def test_non_string_text(self, hpo_cr):
+        with pytest.raises(ValueError):
+            parse_clinical_impression(
+                {"bad": "data"}, "viewpoint_text", hpo_parser=hpo_cr
+            )

From bcd527fcdd826adcbd841307e2301db03c6d3a39 Mon Sep 17 00:00:00 2001
From: VarenyaJ <varenyajj@gmail.com>
Date: Fri, 23 Jan 2026 10:19:20 -0500
Subject: [PATCH 04/20] WIP

---
 .../etl/sections/clinical_impression.py       | 231 +++++++++++-------
 1 file changed, 144 insertions(+), 87 deletions(-)

diff --git a/src/prenatalppkt/etl/sections/clinical_impression.py b/src/prenatalppkt/etl/sections/clinical_impression.py
index 86038a4..0003973 100644
--- a/src/prenatalppkt/etl/sections/clinical_impression.py
+++ b/src/prenatalppkt/etl/sections/clinical_impression.py
@@ -1,59 +1,80 @@
+"""
+Clinical impression / interpretation section parser.
+
+Extracts clinical narrative text and optionally extracts HPO terms
+from free text using the HPO Concept Recognizer.
+"""
+
 from __future__ import annotations
 
 import json
 import re
 from typing import Dict, List, Optional, Union
 
-from prenatalppkt.hpo import HpoParser
-
 
 def parse_clinical_impression(
-    data: Union[str, Dict], source_format: str, hpo_parser: Optional[HpoParser] = None
+   data: Union[str, Dict], source_format: str, hpo_cr=None
 ) -> Dict:
-    """
-    Parse clinical impression / interpretation section.
-
-    Supports:
-        - observer_json
-        - viewpoint_text
-        - viewpoint_hl7
-    """
-    if hpo_parser is None:
-        hpo_parser = HpoParser()
-
-    if source_format == "observer_json":
-        if isinstance(data, str):
-            data = json.loads(data)
-        impression_text = _parse_observer_impression(data)
-
-    elif source_format == "viewpoint_text":
-        if not isinstance(data, str):
-            raise ValueError("viewpoint_text data must be a string")
-        impression_text = _parse_viewpoint_text_impression(data)
-
-    elif source_format == "viewpoint_hl7":
-        if not isinstance(data, str):
-            raise ValueError("viewpoint_hl7 data must be a string")
-        impression_text = _parse_viewpoint_hl7_impression(data)
-
-    else:
-        raise ValueError(f"Unsupported source_format: {source_format}")
-
-    if impression_text and hasattr(hpo_parser, "extract"):
-        hpo_terms = hpo_parser.extract(impression_text)
-    else:
-        hpo_terms = []
-
-    return {
-        "impression_text": impression_text,
-        "diagnoses": [],
-        "anomalies": [],
-        "gestational_age_assessment": None,
-        "growth_assessment": _infer_growth_assessment(impression_text),
-        "recommendations": [],
-        "hpo_terms": hpo_terms,
-        "source_format": source_format,
-    }
+   """
+   Parse clinical impression / interpretation section.
+
+   Supports:
+       - observer_json
+       - viewpoint_text
+       - viewpoint_hl7
+
+   Args:
+       data: Raw input data (JSON string, dict, or text)
+       source_format: One of "observer_json", "viewpoint_text", "viewpoint_hl7"
+       hpo_cr: Optional HpoExactConceptRecognizer for HPO term extraction.
+               If provided, will extract HPO terms from impression text.
+
+   Returns:
+       Dict with keys:
+           - impression_text: str - Full impression narrative
+           - diagnoses: List[str] - Identified diagnoses (future)
+           - anomalies: List[Dict] - Structured anomaly data (future)
+           - gestational_age_assessment: Optional[str] - GA conclusion
+           - growth_assessment: Optional[str] - FGR, LGA, AGA, or None
+           - recommendations: List[str] - Follow-up recommendations (future)
+           - hpo_terms: List[SimpleTerm] - HPO terms extracted via CR
+           - source_format: str
+   """
+   if source_format == "observer_json":
+       if isinstance(data, str):
+           data = json.loads(data)
+       impression_text = _parse_observer_impression(data)
+
+   elif source_format == "viewpoint_text":
+       if not isinstance(data, str):
+           raise ValueError("viewpoint_text data must be a string")
+       impression_text = _parse_viewpoint_text_impression(data)
+
+   elif source_format == "viewpoint_hl7":
+       if not isinstance(data, str):
+           raise ValueError("viewpoint_hl7 data must be a string")
+       impression_text = _parse_viewpoint_hl7_impression(data)
+
+   else:
+       raise ValueError(f"Unsupported source_format: {source_format}")
+
+   # Extract HPO terms if concept recognizer is provided
+   hpo_terms = []
+   if impression_text and hpo_cr is not None:
+       # HpoExactConceptRecognizer uses parse() method, not extract()
+       if hasattr(hpo_cr, "parse"):
+           hpo_terms = hpo_cr.parse(impression_text)
+
+   return {
+       "impression_text": impression_text,
+       "diagnoses": [],
+       "anomalies": [],
+       "gestational_age_assessment": None,
+       "growth_assessment": _infer_growth_assessment(impression_text),
+       "recommendations": [],
+       "hpo_terms": hpo_terms,
+       "source_format": source_format,
+   }
 
 
 # ---------------------------------------------------------------------
@@ -62,10 +83,28 @@ def parse_clinical_impression(
 
 
 def _parse_observer_impression(json_data: Dict) -> str:
-    exam = json_data.get("exam", {})
-    finalize = exam.get("finalize", {})
+   """
+   Extract impression from Observer JSON.
+
+   The finalize block can be at:
+   - Root level: json_data["finalize"]["generalComment"]["plain_text"]
+   - Under exam: json_data["exam"]["finalize"]["generalComment"]["plain_text"]
+
+   We check the root level first (most common), then fall back to exam.
+   """
+   impression = ""
+
+   # Check root level first (this is where Apple_Sally has it)
+   finalize = json_data.get("finalize", {})
+   impression = finalize.get("generalComment", {}).get("plain_text", "").strip()
 
-    return finalize.get("generalComment", {}).get("plain_text", "").strip()
+   # Fall back to exam.finalize if not found at root
+   if not impression:
+       exam = json_data.get("exam", {})
+       finalize = exam.get("finalize", {})
+       impression = finalize.get("generalComment", {}).get("plain_text", "").strip()
+
+   return impression
 
 
 # ---------------------------------------------------------------------
@@ -74,18 +113,21 @@ def _parse_observer_impression(json_data: Dict) -> str:
 
 
 def _parse_viewpoint_text_impression(text: str) -> str:
-    """
-    Impression
-    ==========
-    Free text narrative
-    """
-    pattern = re.compile(
-        r"Impression\s*\n=+\n(?P<body>.*?)(?:\n[A-Z][^\n]*\n=+|\Z)",
-        re.DOTALL | re.IGNORECASE,
-    )
+   """
+   Extract impression from ViewPoint text reports.
+
+   Expected pattern:
+       Impression
+       ==========
+       [free text narrative]
+   """
+   pattern = re.compile(
+       r"Impression\s*\n=+\n(?P<body>.*?)(?:\n[A-Z][^\n]*\n=+|\Z)",
+       re.DOTALL | re.IGNORECASE,
+   )
 
-    match = pattern.search(text)
-    return match.group("body").strip() if match else ""
+   match = pattern.search(text)
+   return match.group("body").strip() if match else ""
 
 
 # ---------------------------------------------------------------------
@@ -94,24 +136,30 @@ def _parse_viewpoint_text_impression(text: str) -> str:
 
 
 def _parse_viewpoint_hl7_impression(hl7: str) -> str:
-    lines: List[str] = []
+   """
+   Extract impression from HL7 ORU^R01 messages.
+
+   Looks for OBX segments containing "Impression" or "Interpretation"
+   in the observation identifier field.
+   """
+   lines: List[str] = []
 
-    for line in hl7.splitlines():
-        if not line.startswith("OBX"):
-            continue
+   for line in hl7.splitlines():
+       if not line.startswith("OBX"):
+           continue
 
-        fields = line.split("|")
-        if len(fields) < 6:
-            continue
+       fields = line.split("|")
+       if len(fields) < 6:
+           continue
 
-        obs_id = fields[3]
-        value = fields[5].split("^")[0].strip()
+       obs_id = fields[3]
+       value = fields[5].split("^")[0].strip()
 
-        if "Impression" in obs_id or "Interpretation" in obs_id:
-            if value:
-                lines.append(value)
+       if "Impression" in obs_id or "Interpretation" in obs_id:
+           if value:
+               lines.append(value)
 
-    return " ".join(lines)
+   return " ".join(lines)
 
 
 # ---------------------------------------------------------------------
@@ -120,16 +168,25 @@ def _parse_viewpoint_hl7_impression(hl7: str) -> str:
 
 
 def _infer_growth_assessment(text: str) -> Optional[str]:
-    if not text:
-        return None
-
-    text_lower = text.lower()
-
-    if "growth restriction" in text_lower or "fgr" in text_lower:
-        return "FGR"
-    if "large for gestational age" in text_lower or "lga" in text_lower:
-        return "LGA"
-    if "appropriate for gestational age" in text_lower or "aga" in text_lower:
-        return "AGA"
-
-    return None
+   """
+   Infer fetal growth assessment from impression text.
+
+   Returns:
+       "FGR" - Fetal Growth Restriction
+       "LGA" - Large for Gestational Age
+       "AGA" - Appropriate for Gestational Age
+       None - No assessment detected
+   """
+   if not text:
+       return None
+
+   text_lower = text.lower()
+
+   if "growth restriction" in text_lower or "fgr" in text_lower:
+       return "FGR"
+   if "large for gestational age" in text_lower or "lga" in text_lower:
+       return "LGA"
+   if "appropriate for gestational age" in text_lower or "aga" in text_lower:
+       return "AGA"
+
+   return None
\ No newline at end of file

From 07a2cb9a5ba95c24524d2e72174c7b8a038116a7 Mon Sep 17 00:00:00 2001
From: VarenyaJ <varenyajj@gmail.com>
Date: Fri, 23 Jan 2026 10:20:08 -0500
Subject: [PATCH 05/20] add current draft of notebook

---
 prenatalppkt.ipynb | 1525 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 1525 insertions(+)
 create mode 100644 prenatalppkt.ipynb

diff --git a/prenatalppkt.ipynb b/prenatalppkt.ipynb
new file mode 100644
index 0000000..2426c29
--- /dev/null
+++ b/prenatalppkt.ipynb
@@ -0,0 +1,1525 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "8b89cf52",
+   "metadata": {},
+   "source": [
+    "# Demo"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "d8f2cfce",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "DEBUG:prenatalppkt.mapping_loader:Loaded 8 bins for head_circumference\n",
+      "DEBUG:prenatalppkt.mapping_loader:Loaded 8 bins for biparietal_diameter\n",
+      "DEBUG:prenatalppkt.mapping_loader:Loaded 8 bins for femur_length\n",
+      "DEBUG:prenatalppkt.mapping_loader:Loaded 8 bins for abdominal_circumference\n",
+      "DEBUG:prenatalppkt.mapping_loader:Loaded 8 bins for occipitofrontal_diameter\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Loaded mappings for: ['head_circumference', 'biparietal_diameter', 'femur_length', 'abdominal_circumference', 'occipitofrontal_diameter']\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:Starting Observer JSON extraction\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:Processing fetus 1\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:Found 6 measurements\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:Processing measurement: AC\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:AC has percentile=55.6% (valid)\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:Creating TermBin for AC: value=226.20000000000002mm, percentile=55.6%, ga=<GestationalAge: 26 weeks, 6 days>\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Creating TermBin: name=AC, value=226.20000000000002mm, percentile=55.6%, ga=<GestationalAge: 26 weeks, 6 days>, method=None\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Selected HPO: HP:0034207 - Abnormal fetal gastrointestinal system morphology\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Created TermBin: HP:0034207 - normal=True\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:Processing measurement: BPD\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:BPD has percentile=51.2% (valid)\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:Creating TermBin for BPD: value=66.8mm, percentile=51.2%, ga=<GestationalAge: 26 weeks, 6 days>\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Creating TermBin: name=BPD, value=66.8mm, percentile=51.2%, ga=<GestationalAge: 26 weeks, 6 days>, method=None\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Selected HPO: HP:0000240 - Abnormality of skull size\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Created TermBin: HP:0000240 - normal=True\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:Processing measurement: HC\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:HC has percentile=42.5% (valid)\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:Creating TermBin for HC: value=250.0mm, percentile=42.5%, ga=<GestationalAge: 26 weeks, 6 days>\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Creating TermBin: name=HC, value=250.0mm, percentile=42.5%, ga=<GestationalAge: 26 weeks, 6 days>, method=None\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Selected HPO: HP:0000240 - Abnormality of skull size\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Created TermBin: HP:0000240 - normal=True\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:Processing measurement: Femur\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:Femur has percentile=46.8% (valid)\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:Creating TermBin for Femur: value=50.099999999999994mm, percentile=46.8%, ga=<GestationalAge: 27 weeks, 0 days>\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Creating TermBin: name=Femur, value=50.099999999999994mm, percentile=46.8%, ga=<GestationalAge: 27 weeks, 0 days>, method=None\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Selected HPO: HP:0002823 - Abnormal femur morphology\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Created TermBin: HP:0002823 - normal=True\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:Processing measurement: Nuchal Fold\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:Nuchal Fold has percentile=0% (valid)\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:Creating TermBin for Nuchal Fold: value=10.0mm, percentile=0%, ga=<GestationalAge: 0 weeks, 0 days>\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Creating TermBin: name=Nuchal Fold, value=10.0mm, percentile=0.0%, ga=<GestationalAge: 0 weeks, 0 days>, method=None\n",
+      "WARNING:prenatalppkt.etl.term_bin_factory:No HPO mapping for optional measurement 'Nuchal Fold' - skipping. TODO(@VarenyaJ): Add HPO terms when available\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:Processing measurement: Cerebellum\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:Cerebellum has percentile=0% (valid)\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:Creating TermBin for Cerebellum: value=30.0mm, percentile=0%, ga=<GestationalAge: 27 weeks, 2 days>\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Creating TermBin: name=Cerebellum, value=30.0mm, percentile=0.0%, ga=<GestationalAge: 27 weeks, 2 days>, method=None\n",
+      "WARNING:prenatalppkt.etl.term_bin_factory:No HPO mapping for optional measurement 'Cerebellum' - skipping. TODO(@VarenyaJ): Add HPO terms when available\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:Successfully parsed 4 measurements\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Validating 4 TermBins for required measurements\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Found measurement: AC\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Found measurement: BPD\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Found measurement: HC\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Found measurement: Femur\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Present: {'Femur', 'AC', 'BPD', 'HC'}\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Required: {'Femur', 'AC', 'BPD', 'HC'}\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:All required measurements present\n",
+      "INFO:prenatalppkt.etl.extractors.observer:Extracted 4 TermBins from Observer JSON\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "================================================================================\n",
+      "PRENATALPPKT ETL PIPELINE\n",
+      "Observer JSON → TermBins → Phenopacket v2.0\n",
+      "================================================================================\n",
+      "\n",
+      " STEP 1: Loading Observer JSON...\n",
+      "Loaded: tests/data/Apple_Sally_pretty.json\n",
+      "Fetuses: 1\n",
+      "Measurements: 6\n",
+      "Sample: AC = 22.62 cm\n",
+      "\n",
+      "  STEP 2: Extracting biometry measurements to TermBins...\n",
+      " Extracted 4 TermBins\n",
+      "\n",
+      "  [1] AC: 226.2 mm (55.6%) at 26w6d [Fetus 1]\n",
+      "      HPO: HP:0034207 - Abnormal fetal gastrointestinal system morphology\n",
+      "      Normal: True\n",
+      "\n",
+      "  [2] BPD: 66.8 mm (51.2%) at 26w6d [Fetus 1]\n",
+      "      HPO: HP:0000240 - Abnormality of skull size\n",
+      "      Normal: True\n",
+      "\n",
+      "  [3] HC: 250.0 mm (42.5%) at 26w6d [Fetus 1]\n",
+      "      HPO: HP:0000240 - Abnormality of skull size\n",
+      "      Normal: True\n",
+      "\n",
+      "  [4] Femur: 50.1 mm (46.8%) at 27w0d [Fetus 1]\n",
+      "      HPO: HP:0002823 - Abnormal femur morphology\n",
+      "      Normal: True\n",
+      "\n",
+      " STEP 3: Converting TermBins to PhenotypicFeatures...\n",
+      " Generated 4 PhenotypicFeatures\n",
+      "\n",
+      "  [1] HP:0034207\n",
+      "      Status: EXCLUDED (normal)\n",
+      "      Description: AC: 226.2 mm (55.6%) at 26w6d [Fetus 1]\n",
+      "\n",
+      "  [2] HP:0000240\n",
+      "      Status: EXCLUDED (normal)\n",
+      "      Description: BPD: 66.8 mm (51.2%) at 26w6d [Fetus 1]\n",
+      "\n",
+      "  [3] HP:0000240\n",
+      "      Status: EXCLUDED (normal)\n",
+      "      Description: HC: 250.0 mm (42.5%) at 26w6d [Fetus 1]\n",
+      "\n",
+      "  [4] HP:0002823\n",
+      "      Status: EXCLUDED (normal)\n",
+      "      Description: Femur: 50.1 mm (46.8%) at 27w0d [Fetus 1]\n",
+      "\n",
+      " STEP 4: Building Phenopacket v2.0...\n",
+      "✓ Phenopacket created successfully\n",
+      "\n",
+      "================================================================================\n",
+      " PHENOPACKET v2.0 OUTPUT (JSON)\n",
+      "================================================================================\n",
+      "{\n",
+      "  \"id\": \"apple-sally-fetus-1\",\n",
+      "  \"subject\": {\n",
+      "    \"id\": \"fetus-1\",\n",
+      "    \"time_at_last_encounter\": {\n",
+      "      \"gestational_age\": {\n",
+      "        \"weeks\": 26,\n",
+      "        \"days\": 6\n",
+      "      }\n",
+      "    }\n",
+      "  },\n",
+      "  \"phenotypic_features\": [\n",
+      "    {\n",
+      "      \"description\": \"AC: 226.2 mm (55.6%) at 26w6d [Fetus 1]\",\n",
+      "      \"type\": {\n",
+      "        \"id\": \"HP:0034207\",\n",
+      "        \"label\": \"Abnormal fetal gastrointestinal system morphology\"\n",
+      "      },\n",
+      "      \"excluded\": true,\n",
+      "      \"onset\": {\n",
+      "        \"gestational_age\": {\n",
+      "          \"weeks\": 26,\n",
+      "          \"days\": 6\n",
+      "        }\n",
+      "      }\n",
+      "    },\n",
+      "    {\n",
+      "      \"description\": \"BPD: 66.8 mm (51.2%) at 26w6d [Fetus 1]\",\n",
+      "      \"type\": {\n",
+      "        \"id\": \"HP:0000240\",\n",
+      "        \"label\": \"Abnormality of skull size\"\n",
+      "      },\n",
+      "      \"excluded\": true,\n",
+      "      \"onset\": {\n",
+      "        \"gestational_age\": {\n",
+      "          \"weeks\": 26,\n",
+      "          \"days\": 6\n",
+      "        }\n",
+      "      }\n",
+      "    },\n",
+      "    {\n",
+      "      \"description\": \"HC: 250.0 mm (42.5%) at 26w6d [Fetus 1]\",\n",
+      "      \"type\": {\n",
+      "        \"id\": \"HP:0000240\",\n",
+      "        \"label\": \"Abnormality of skull size\"\n",
+      "      },\n",
+      "      \"excluded\": true,\n",
+      "      \"onset\": {\n",
+      "        \"gestational_age\": {\n",
+      "          \"weeks\": 26,\n",
+      "          \"days\": 6\n",
+      "        }\n",
+      "      }\n",
+      "    },\n",
+      "    {\n",
+      "      \"description\": \"Femur: 50.1 mm (46.8%) at 27w0d [Fetus 1]\",\n",
+      "      \"type\": {\n",
+      "        \"id\": \"HP:0002823\",\n",
+      "        \"label\": \"Abnormal femur morphology\"\n",
+      "      },\n",
+      "      \"excluded\": true,\n",
+      "      \"onset\": {\n",
+      "        \"gestational_age\": {\n",
+      "          \"weeks\": 27\n",
+      "        }\n",
+      "      }\n",
+      "    }\n",
+      "  ],\n",
+      "  \"meta_data\": {\n",
+      "    \"created\": \"2026-01-23T14:56:52.244568Z\",\n",
+      "    \"created_by\": \"prenatalppkt-etl-pipeline\",\n",
+      "    \"resources\": [\n",
+      "      {\n",
+      "        \"id\": \"hp\",\n",
+      "        \"name\": \"Human Phenotype Ontology\",\n",
+      "        \"url\": \"http://purl.obolibrary.org/obo/hp.owl\",\n",
+      "        \"version\": \"2025-11-24\",\n",
+      "        \"namespace_prefix\": \"HP\",\n",
+      "        \"iri_prefix\": \"http://purl.obolibrary.org/obo/HP_\"\n",
+      "      }\n",
+      "    ],\n",
+      "    \"phenopacket_schema_version\": \"2.0\"\n",
+      "  }\n",
+      "}\n",
+      "\n",
+      "================================================================================\n",
+      " VALIDATION SUMMARY\n",
+      "================================================================================\n",
+      "\n",
+      " Phenopacket Structure:\n",
+      "   ID: apple-sally-fetus-1\n",
+      "   Subject ID: fetus-1\n",
+      "   Subject GA: 26w6d\n",
+      "   Sex: UNKNOWN_SEX\n",
+      "   Phenotypic Features: 4\n",
+      "   Schema Version: 2.0\n",
+      "   HPO Resource: 2025-11-24\n",
+      "\n",
+      " Phenotypic Features Detail:\n",
+      "\n",
+      "  [1] HP:0034207 - Abnormal fetal gastrointestinal system morphology\n",
+      "       Normal (excluded)\n",
+      "      Onset: 26w6d\n",
+      "      Detail: AC: 226.2 mm (55.6%) at 26w6d [Fetus 1]\n",
+      "\n",
+      "  [2] HP:0000240 - Abnormality of skull size\n",
+      "       Normal (excluded)\n",
+      "      Onset: 26w6d\n",
+      "      Detail: BPD: 66.8 mm (51.2%) at 26w6d [Fetus 1]\n",
+      "\n",
+      "  [3] HP:0000240 - Abnormality of skull size\n",
+      "       Normal (excluded)\n",
+      "      Onset: 26w6d\n",
+      "      Detail: HC: 250.0 mm (42.5%) at 26w6d [Fetus 1]\n",
+      "\n",
+      "  [4] HP:0002823 - Abnormal femur morphology\n",
+      "       Normal (excluded)\n",
+      "      Onset: 27w0d\n",
+      "      Detail: Femur: 50.1 mm (46.8%) at 27w0d [Fetus 1]\n",
+      "\n",
+      " Summary Statistics:\n",
+      "  Total features: 4\n",
+      "  Normal (excluded): 4\n",
+      "  Abnormal (observed): 0\n",
+      "\n",
+      "================================================================================\n",
+      " SUCCESS: Valid Phenopacket v2.0 generated\n",
+      "================================================================================\n",
+      "\n",
+      " Phenopacket saved to: output/apple_sally_phenopacket_v2.json\n",
+      "\n",
+      " Validation: Round-trip test...\n",
+      " Validation passed\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Initial Demo\n",
+    "\"\"\"\n",
+    "PRENATALPPKT ETL PIPELINE\n",
+    "Observer JSON → TermBins → Phenopacket v2.0\n",
+    "\n",
+    "Uses the official GA4GH phenopackets library per:\n",
+    "https://phenopacket-schema.readthedocs.io/en/latest/python.html\n",
+    "\"\"\"\n",
+    "\n",
+    "import json\n",
+    "import re\n",
+    "from datetime import datetime, timezone\n",
+    "from pathlib import Path\n",
+    "\n",
+    "from google.protobuf.json_format import MessageToJson\n",
+    "from google.protobuf.timestamp_pb2 import Timestamp\n",
+    "import phenopackets.schema.v2 as pps2\n",
+    "\n",
+    "from prenatalppkt.etl.extractors import observer\n",
+    "from prenatalppkt.gestational_age import GestationalAge\n",
+    "\n",
+    "print(\"=\" * 80)\n",
+    "print(\"PRENATALPPKT ETL PIPELINE\")\n",
+    "print(\"Observer JSON → TermBins → Phenopacket v2.0\")\n",
+    "print(\"=\" * 80)\n",
+    "\n",
+    "# -----------------------------------------------------------------------------\n",
+    "# STEP 1: Load Apple Sally Observer JSON\n",
+    "# -----------------------------------------------------------------------------\n",
+    "print(\"\\n STEP 1: Loading Observer JSON...\")\n",
+    "\n",
+    "data_path = Path(\"tests/data/Apple_Sally_pretty.json\")\n",
+    "with open(data_path) as f:\n",
+    "    observer_data = json.load(f)\n",
+    "\n",
+    "print(f\"Loaded: {data_path}\")\n",
+    "print(f\"Fetuses: {len(observer_data.get('fetuses', []))}\")\n",
+    "\n",
+    "first_fetus = observer_data[\"fetuses\"][0]\n",
+    "measurements = first_fetus.get(\"measurements\", [])\n",
+    "print(f\"Measurements: {len(measurements)}\")\n",
+    "print(\n",
+    "    f\"Sample: {measurements[0]['label']} = \"\n",
+    "    f\"{measurements[0]['value']} {measurements[0]['unit_of_measure']}\"\n",
+    ")\n",
+    "\n",
+    "# -----------------------------------------------------------------------------\n",
+    "# STEP 2: Extract TermBins using Observer extractor\n",
+    "# -----------------------------------------------------------------------------\n",
+    "print(\"\\n  STEP 2: Extracting biometry measurements to TermBins...\")\n",
+    "\n",
+    "term_bins = observer.extract(observer_data)\n",
+    "print(f\" Extracted {len(term_bins)} TermBins\")\n",
+    "\n",
+    "for i, tb in enumerate(term_bins, 1):\n",
+    "    print(f\"\\n  [{i}] {tb.description}\")\n",
+    "    print(f\"      HPO: {tb.hpo_id} - {tb.hpo_label}\")\n",
+    "    print(f\"      Normal: {tb.normal}\")\n",
+    "\n",
+    "# -----------------------------------------------------------------------------\n",
+    "# STEP 3: Convert TermBins → Phenotypic Features (using phenopackets library)\n",
+    "# -----------------------------------------------------------------------------\n",
+    "print(\"\\n STEP 3: Converting TermBins to PhenotypicFeatures...\")\n",
+    "\n",
+    "\n",
+    "def parse_ga_from_description(description: str) -> tuple[int, int]:\n",
+    "    \"\"\"Extract weeks and days from TermBin description.\"\"\"\n",
+    "    match = re.search(r\"at (\\d+)w(\\d+)d\", description)\n",
+    "    if match:\n",
+    "        return int(match.group(1)), int(match.group(2))\n",
+    "    # Fallback\n",
+    "    first_m = observer_data[\"fetuses\"][0][\"measurements\"][0]\n",
+    "    ga = GestationalAge.from_weeks(first_m.get(\"calculated_ega\", 26.9))\n",
+    "    return ga.weeks, ga.days\n",
+    "\n",
+    "\n",
+    "phenotypic_features = []\n",
+    "\n",
+    "for tb in term_bins:\n",
+    "    weeks, days = parse_ga_from_description(tb.description)\n",
+    "\n",
+    "    # Create GestationalAge message\n",
+    "    gestational_age = pps2.GestationalAge(weeks=weeks, days=days)\n",
+    "\n",
+    "    # Create TimeElement with gestational_age\n",
+    "    onset = pps2.TimeElement(gestational_age=gestational_age)\n",
+    "\n",
+    "    # Create OntologyClass for the HPO term\n",
+    "    hpo_type = pps2.OntologyClass(id=tb.hpo_id, label=tb.hpo_label)\n",
+    "\n",
+    "    # Create PhenotypicFeature\n",
+    "    pf = pps2.PhenotypicFeature(\n",
+    "        type=hpo_type,\n",
+    "        excluded=tb.normal,  # If normal=True, abnormality is excluded\n",
+    "        onset=onset,\n",
+    "        description=tb.description,\n",
+    "    )\n",
+    "\n",
+    "    phenotypic_features.append(pf)\n",
+    "\n",
+    "print(f\" Generated {len(phenotypic_features)} PhenotypicFeatures\")\n",
+    "\n",
+    "for i, pf in enumerate(phenotypic_features, 1):\n",
+    "    status = \"EXCLUDED (normal)\" if pf.excluded else \"OBSERVED (abnormal)\"\n",
+    "    print(f\"\\n  [{i}] {pf.type.id}\")\n",
+    "    print(f\"      Status: {status}\")\n",
+    "    print(f\"      Description: {pf.description}\")\n",
+    "\n",
+    "# -----------------------------------------------------------------------------\n",
+    "# STEP 4: Build Complete Phenopacket v2.0\n",
+    "# -----------------------------------------------------------------------------\n",
+    "print(\"\\n STEP 4: Building Phenopacket v2.0...\")\n",
+    "\n",
+    "# Get subject GA from first measurement\n",
+    "first_measurement = observer_data[\"fetuses\"][0][\"measurements\"][0]\n",
+    "subject_ga_weeks = first_measurement.get(\"calculated_ega\", 26.9)\n",
+    "subject_ga = GestationalAge.from_weeks(subject_ga_weeks)\n",
+    "\n",
+    "# Create Individual (subject) with GestationalAge\n",
+    "subject_time = pps2.TimeElement(\n",
+    "    gestational_age=pps2.GestationalAge(weeks=subject_ga.weeks, days=subject_ga.days)\n",
+    ")\n",
+    "\n",
+    "subject = pps2.Individual(\n",
+    "    id=\"fetus-1\",\n",
+    "    sex=pps2.Sex.UNKNOWN_SEX,\n",
+    "    time_at_last_encounter=subject_time,\n",
+    ")\n",
+    "\n",
+    "# Create timestamp for metadata\n",
+    "now = datetime.now(timezone.utc)\n",
+    "created_timestamp = Timestamp()\n",
+    "created_timestamp.FromDatetime(now)\n",
+    "\n",
+    "# Create HPO Resource\n",
+    "hpo_resource = pps2.Resource(\n",
+    "    id=\"hp\",\n",
+    "    name=\"Human Phenotype Ontology\",\n",
+    "    url=\"http://purl.obolibrary.org/obo/hp.owl\",\n",
+    "    version=\"2025-11-24\",\n",
+    "    namespace_prefix=\"HP\",\n",
+    "    iri_prefix=\"http://purl.obolibrary.org/obo/HP_\",\n",
+    ")\n",
+    "\n",
+    "# Create MetaData\n",
+    "metadata = pps2.MetaData(\n",
+    "    created=created_timestamp,\n",
+    "    created_by=\"prenatalppkt-etl-pipeline\",\n",
+    "    phenopacket_schema_version=\"2.0\",\n",
+    ")\n",
+    "metadata.resources.append(hpo_resource)\n",
+    "\n",
+    "# Create the Phenopacket\n",
+    "phenopacket = pps2.Phenopacket(\n",
+    "    id=\"apple-sally-fetus-1\",\n",
+    "    subject=subject,\n",
+    "    meta_data=metadata,\n",
+    ")\n",
+    "phenopacket.phenotypic_features.extend(phenotypic_features)\n",
+    "\n",
+    "print(\"✓ Phenopacket created successfully\")\n",
+    "\n",
+    "# -----------------------------------------------------------------------------\n",
+    "# STEP 5: Display Results as JSON\n",
+    "# -----------------------------------------------------------------------------\n",
+    "print(\"\\n\" + \"=\" * 80)\n",
+    "print(\" PHENOPACKET v2.0 OUTPUT (JSON)\")\n",
+    "print(\"=\" * 80)\n",
+    "\n",
+    "# Convert protobuf message to JSON using official method\n",
+    "phenopacket_json = MessageToJson(phenopacket, preserving_proto_field_name=True)\n",
+    "print(phenopacket_json)\n",
+    "\n",
+    "# -----------------------------------------------------------------------------\n",
+    "# STEP 6: Validation Summary\n",
+    "# -----------------------------------------------------------------------------\n",
+    "print(\"\\n\" + \"=\" * 80)\n",
+    "print(\" VALIDATION SUMMARY\")\n",
+    "print(\"=\" * 80)\n",
+    "\n",
+    "print(\"\\n Phenopacket Structure:\")\n",
+    "print(f\"   ID: {phenopacket.id}\")\n",
+    "print(f\"   Subject ID: {phenopacket.subject.id}\")\n",
+    "print(f\"   Subject GA: {subject_ga.weeks}w{subject_ga.days}d\")\n",
+    "print(f\"   Sex: {pps2.Sex.Name(phenopacket.subject.sex)}\")\n",
+    "print(f\"   Phenotypic Features: {len(phenopacket.phenotypic_features)}\")\n",
+    "print(f\"   Schema Version: {phenopacket.meta_data.phenopacket_schema_version}\")\n",
+    "print(f\"   HPO Resource: {phenopacket.meta_data.resources[0].version}\")\n",
+    "\n",
+    "print(\"\\n Phenotypic Features Detail:\")\n",
+    "for i, pf in enumerate(phenopacket.phenotypic_features, 1):\n",
+    "    status = \" Normal (excluded)\" if pf.excluded else \"Abnormal (observed)\"\n",
+    "    ga = pf.onset.gestational_age\n",
+    "    print(f\"\\n  [{i}] {pf.type.id} - {pf.type.label}\")\n",
+    "    print(f\"      {status}\")\n",
+    "    print(f\"      Onset: {ga.weeks}w{ga.days}d\")\n",
+    "    print(f\"      Detail: {pf.description}\")\n",
+    "\n",
+    "# Count normal vs abnormal\n",
+    "normal_count = sum(1 for pf in phenopacket.phenotypic_features if pf.excluded)\n",
+    "abnormal_count = len(phenopacket.phenotypic_features) - normal_count\n",
+    "\n",
+    "print(\"\\n Summary Statistics:\")\n",
+    "print(f\"  Total features: {len(phenopacket.phenotypic_features)}\")\n",
+    "print(f\"  Normal (excluded): {normal_count}\")\n",
+    "print(f\"  Abnormal (observed): {abnormal_count}\")\n",
+    "\n",
+    "print(\"\\n\" + \"=\" * 80)\n",
+    "print(\" SUCCESS: Valid Phenopacket v2.0 generated\")\n",
+    "print(\"=\" * 80)\n",
+    "\n",
+    "# Save to file\n",
+    "output_path = Path(\"output/apple_sally_phenopacket_v2.json\")\n",
+    "output_path.parent.mkdir(exist_ok=True)\n",
+    "with open(output_path, \"w\") as f:\n",
+    "    f.write(phenopacket_json)\n",
+    "print(f\"\\n Phenopacket saved to: {output_path}\")\n",
+    "\n",
+    "# Validate by round-tripping\n",
+    "print(\"\\n Validation: Round-trip test...\")\n",
+    "from google.protobuf.json_format import Parse\n",
+    "\n",
+    "parsed_back = Parse(phenopacket_json, pps2.Phenopacket())\n",
+    "assert parsed_back.id == phenopacket.id\n",
+    "assert len(parsed_back.phenotypic_features) == len(phenopacket.phenotypic_features)\n",
+    "print(\" Validation passed\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "3685f9e5",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "DEBUG:prenatalppkt.mapping_loader:Loaded 8 bins for head_circumference\n",
+      "DEBUG:prenatalppkt.mapping_loader:Loaded 8 bins for biparietal_diameter\n",
+      "DEBUG:prenatalppkt.mapping_loader:Loaded 8 bins for femur_length\n",
+      "DEBUG:prenatalppkt.mapping_loader:Loaded 8 bins for abdominal_circumference\n",
+      "DEBUG:prenatalppkt.mapping_loader:Loaded 8 bins for occipitofrontal_diameter\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Loaded mappings for: ['head_circumference', 'biparietal_diameter', 'femur_length', 'abdominal_circumference', 'occipitofrontal_diameter']\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:Starting Observer JSON extraction\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:Processing fetus 1\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:Found 6 measurements\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:Processing measurement: AC\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:AC has percentile=55.6% (valid)\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:Creating TermBin for AC: value=226.20000000000002mm, percentile=55.6%, ga=<GestationalAge: 26 weeks, 6 days>\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Creating TermBin: name=AC, value=226.20000000000002mm, percentile=55.6%, ga=<GestationalAge: 26 weeks, 6 days>, method=None\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Selected HPO: HP:0034207 - Abnormal fetal gastrointestinal system morphology\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Created TermBin: HP:0034207 - normal=True\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:Processing measurement: BPD\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:BPD has percentile=51.2% (valid)\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:Creating TermBin for BPD: value=66.8mm, percentile=51.2%, ga=<GestationalAge: 26 weeks, 6 days>\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Creating TermBin: name=BPD, value=66.8mm, percentile=51.2%, ga=<GestationalAge: 26 weeks, 6 days>, method=None\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Selected HPO: HP:0000240 - Abnormality of skull size\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Created TermBin: HP:0000240 - normal=True\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:Processing measurement: HC\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:HC has percentile=42.5% (valid)\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:Creating TermBin for HC: value=250.0mm, percentile=42.5%, ga=<GestationalAge: 26 weeks, 6 days>\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Creating TermBin: name=HC, value=250.0mm, percentile=42.5%, ga=<GestationalAge: 26 weeks, 6 days>, method=None\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Selected HPO: HP:0000240 - Abnormality of skull size\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Created TermBin: HP:0000240 - normal=True\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:Processing measurement: Femur\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:Femur has percentile=46.8% (valid)\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:Creating TermBin for Femur: value=50.099999999999994mm, percentile=46.8%, ga=<GestationalAge: 27 weeks, 0 days>\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Creating TermBin: name=Femur, value=50.099999999999994mm, percentile=46.8%, ga=<GestationalAge: 27 weeks, 0 days>, method=None\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Selected HPO: HP:0002823 - Abnormal femur morphology\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Created TermBin: HP:0002823 - normal=True\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:Processing measurement: Nuchal Fold\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:Nuchal Fold has percentile=0% (valid)\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:Creating TermBin for Nuchal Fold: value=10.0mm, percentile=0%, ga=<GestationalAge: 0 weeks, 0 days>\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Creating TermBin: name=Nuchal Fold, value=10.0mm, percentile=0.0%, ga=<GestationalAge: 0 weeks, 0 days>, method=None\n",
+      "WARNING:prenatalppkt.etl.term_bin_factory:No HPO mapping for optional measurement 'Nuchal Fold' - skipping. TODO(@VarenyaJ): Add HPO terms when available\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:Processing measurement: Cerebellum\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:Cerebellum has percentile=0% (valid)\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:Creating TermBin for Cerebellum: value=30.0mm, percentile=0%, ga=<GestationalAge: 27 weeks, 2 days>\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Creating TermBin: name=Cerebellum, value=30.0mm, percentile=0.0%, ga=<GestationalAge: 27 weeks, 2 days>, method=None\n",
+      "WARNING:prenatalppkt.etl.term_bin_factory:No HPO mapping for optional measurement 'Cerebellum' - skipping. TODO(@VarenyaJ): Add HPO terms when available\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:Successfully parsed 4 measurements\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Validating 4 TermBins for required measurements\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Found measurement: AC\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Found measurement: BPD\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Found measurement: HC\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Found measurement: Femur\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Present: {'Femur', 'AC', 'BPD', 'HC'}\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Required: {'Femur', 'AC', 'BPD', 'HC'}\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:All required measurements present\n",
+      "INFO:prenatalppkt.etl.extractors.observer:Extracted 4 TermBins from Observer JSON\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      " STEP 1: Loading Observer JSON...\n",
+      "Loaded: tests/data/Apple_Sally_pretty.json\n",
+      "Fetuses: 1\n",
+      "Measurements: 6\n",
+      "Sample: AC =  22.62 cm\n",
+      "\n",
+      "  STEP 2: Extracting biometry measurements to TermBins...\n",
+      " Extracted 4 TermBins\n",
+      "\n",
+      "  [1] AC: 226.2 mm (55.6%) at 26w6d [Fetus 1]\n",
+      "      HPO: HP:0034207 - Abnormal fetal gastrointestinal system morphology\n",
+      "      Normal: True\n",
+      "\n",
+      "  [2] BPD: 66.8 mm (51.2%) at 26w6d [Fetus 1]\n",
+      "      HPO: HP:0000240 - Abnormality of skull size\n",
+      "      Normal: True\n",
+      "\n",
+      "  [3] HC: 250.0 mm (42.5%) at 26w6d [Fetus 1]\n",
+      "      HPO: HP:0000240 - Abnormality of skull size\n",
+      "      Normal: True\n",
+      "\n",
+      "  [4] Femur: 50.1 mm (46.8%) at 27w0d [Fetus 1]\n",
+      "      HPO: HP:0002823 - Abnormal femur morphology\n",
+      "      Normal: True\n",
+      "\n",
+      " STEP 3: Converting TermBins to PhenotypicFeatures...\n",
+      " Generated 4 PhenotypicFeatures\n",
+      "\n",
+      "  [1] HP:0034207\n",
+      "      Status: EXCLUDED (normal)\n",
+      "      Description: AC: 226.2 mm (55.6%) at 26w6d [Fetus 1]\n",
+      "\n",
+      "  [2] HP:0000240\n",
+      "      Status: EXCLUDED (normal)\n",
+      "      Description: BPD: 66.8 mm (51.2%) at 26w6d [Fetus 1]\n",
+      "\n",
+      "  [3] HP:0000240\n",
+      "      Status: EXCLUDED (normal)\n",
+      "      Description: HC: 250.0 mm (42.5%) at 26w6d [Fetus 1]\n",
+      "\n",
+      "  [4] HP:0002823\n",
+      "      Status: EXCLUDED (normal)\n",
+      "      Description: Femur: 50.1 mm (46.8%) at 27w0d [Fetus 1]\n",
+      "\n",
+      " STEP 4: Building Phenopacket v2.0...\n",
+      "{\n",
+      "  \"id\": \"apple-sally-fetus-1\",\n",
+      "  \"subject\": {\n",
+      "    \"id\": \"fetus-1\",\n",
+      "    \"time_at_last_encounter\": {\n",
+      "      \"gestational_age\": {\n",
+      "        \"weeks\": 26,\n",
+      "        \"days\": 6\n",
+      "      }\n",
+      "    }\n",
+      "  },\n",
+      "  \"phenotypic_features\": [\n",
+      "    {\n",
+      "      \"description\": \"AC: 226.2 mm (55.6%) at 26w6d [Fetus 1]\",\n",
+      "      \"type\": {\n",
+      "        \"id\": \"HP:0034207\",\n",
+      "        \"label\": \"Abnormal fetal gastrointestinal system morphology\"\n",
+      "      },\n",
+      "      \"excluded\": true,\n",
+      "      \"onset\": {\n",
+      "        \"gestational_age\": {\n",
+      "          \"weeks\": 26,\n",
+      "          \"days\": 6\n",
+      "        }\n",
+      "      }\n",
+      "    },\n",
+      "    {\n",
+      "      \"description\": \"BPD: 66.8 mm (51.2%) at 26w6d [Fetus 1]\",\n",
+      "      \"type\": {\n",
+      "        \"id\": \"HP:0000240\",\n",
+      "        \"label\": \"Abnormality of skull size\"\n",
+      "      },\n",
+      "      \"excluded\": true,\n",
+      "      \"onset\": {\n",
+      "        \"gestational_age\": {\n",
+      "          \"weeks\": 26,\n",
+      "          \"days\": 6\n",
+      "        }\n",
+      "      }\n",
+      "    },\n",
+      "    {\n",
+      "      \"description\": \"HC: 250.0 mm (42.5%) at 26w6d [Fetus 1]\",\n",
+      "      \"type\": {\n",
+      "        \"id\": \"HP:0000240\",\n",
+      "        \"label\": \"Abnormality of skull size\"\n",
+      "      },\n",
+      "      \"excluded\": true,\n",
+      "      \"onset\": {\n",
+      "        \"gestational_age\": {\n",
+      "          \"weeks\": 26,\n",
+      "          \"days\": 6\n",
+      "        }\n",
+      "      }\n",
+      "    },\n",
+      "    {\n",
+      "      \"description\": \"Femur: 50.1 mm (46.8%) at 27w0d [Fetus 1]\",\n",
+      "      \"type\": {\n",
+      "        \"id\": \"HP:0002823\",\n",
+      "        \"label\": \"Abnormal femur morphology\"\n",
+      "      },\n",
+      "      \"excluded\": true,\n",
+      "      \"onset\": {\n",
+      "        \"gestational_age\": {\n",
+      "          \"weeks\": 27\n",
+      "        }\n",
+      "      }\n",
+      "    }\n",
+      "  ],\n",
+      "  \"meta_data\": {\n",
+      "    \"created\": \"2026-01-23T14:56:52.295444Z\",\n",
+      "    \"created_by\": \"prenatalppkt-etl-pipeline\",\n",
+      "    \"resources\": [\n",
+      "      {\n",
+      "        \"id\": \"hp\",\n",
+      "        \"name\": \"Human Phenotype Ontology\",\n",
+      "        \"url\": \"http://purl.obolibrary.org/obo/hp.owl\",\n",
+      "        \"version\": \"2025-11-24\",\n",
+      "        \"namespace_prefix\": \"HP\",\n",
+      "        \"iri_prefix\": \"http://purl.obolibrary.org/obo/HP_\"\n",
+      "      }\n",
+      "    ],\n",
+      "    \"phenopacket_schema_version\": \"2.0\"\n",
+      "  }\n",
+      "}\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Shorter Test\n",
+    "\n",
+    "import json\n",
+    "import re\n",
+    "from datetime import datetime, timezone\n",
+    "from pathlib import Path\n",
+    "from google.protobuf.json_format import MessageToJson\n",
+    "from google.protobuf.timestamp_pb2 import Timestamp\n",
+    "import phenopackets.schema.v2 as pps2\n",
+    "from prenatalppkt.etl.extractors import observer\n",
+    "from prenatalppkt.gestational_age import GestationalAge\n",
+    "\n",
+    "print(\"\\n STEP 1: Loading Observer JSON...\")\n",
+    "data_path = Path(\"tests/data/Apple_Sally_pretty.json\")\n",
+    "with open(data_path) as f:\n",
+    "    observer_data = json.load(f)\n",
+    "print(f\"Loaded: {data_path}\")\n",
+    "print(f\"Fetuses: {len(observer_data.get('fetuses', []))}\")\n",
+    "\n",
+    "first_fetus = observer_data[\"fetuses\"][0]\n",
+    "measurements = first_fetus.get(\"measurements\", [])\n",
+    "print(f\"Measurements: {len(measurements)}\")\n",
+    "print(f\"Sample: {measurements[0]['label']} = \", f\"{measurements[0]['value']} {measurements[0]['unit_of_measure']}\")\n",
+    "\n",
+    "print(\"\\n  STEP 2: Extracting biometry measurements to TermBins...\")\n",
+    "term_bins = observer.extract(observer_data)\n",
+    "print(f\" Extracted {len(term_bins)} TermBins\")\n",
+    "for i, tb in enumerate(term_bins, 1):\n",
+    "    print(f\"\\n  [{i}] {tb.description}\")\n",
+    "    print(f\"      HPO: {tb.hpo_id} - {tb.hpo_label}\")\n",
+    "    print(f\"      Normal: {tb.normal}\")\n",
+    "\n",
+    "print(\"\\n STEP 3: Converting TermBins to PhenotypicFeatures...\")\n",
+    "def parse_ga_from_description(description: str) -> tuple[int, int]:\n",
+    "    \"\"\"Extract weeks and days from TermBin description.\"\"\"\n",
+    "    match = re.search(r\"at (\\d+)w(\\d+)d\", description)\n",
+    "    if match:\n",
+    "        return int(match.group(1)), int(match.group(2))\n",
+    "    # Fallback\n",
+    "    first_m = observer_data[\"fetuses\"][0][\"measurements\"][0]\n",
+    "    ga = GestationalAge.from_weeks(first_m.get(\"calculated_ega\", 26.9))\n",
+    "    return ga.weeks, ga.days\n",
+    "phenotypic_features = []\n",
+    "for tb in term_bins:\n",
+    "    weeks, days = parse_ga_from_description(tb.description)\n",
+    "    # Create GestationalAge message\n",
+    "    gestational_age = pps2.GestationalAge(weeks=weeks, days=days)\n",
+    "    # Create TimeElement with gestational_age\n",
+    "    onset = pps2.TimeElement(gestational_age=gestational_age)\n",
+    "    # Create OntologyClass for the HPO term\n",
+    "    hpo_type = pps2.OntologyClass(id=tb.hpo_id, label=tb.hpo_label)\n",
+    "    # Create PhenotypicFeature\n",
+    "    pf = pps2.PhenotypicFeature( type=hpo_type, excluded=tb.normal, onset=onset, description=tb.description)\n",
+    "    phenotypic_features.append(pf)\n",
+    "print(f\" Generated {len(phenotypic_features)} PhenotypicFeatures\")\n",
+    "for i, pf in enumerate(phenotypic_features, 1):\n",
+    "    status = \"EXCLUDED (normal)\" if pf.excluded else \"OBSERVED (abnormal)\"\n",
+    "    print(f\"\\n  [{i}] {pf.type.id}\")\n",
+    "    print(f\"      Status: {status}\")\n",
+    "    print(f\"      Description: {pf.description}\")\n",
+    "\n",
+    "\n",
+    "print(\"\\n STEP 4: Building Phenopacket v2.0...\")\n",
+    "# Get subject GA from first measurement\n",
+    "first_measurement = observer_data[\"fetuses\"][0][\"measurements\"][0]\n",
+    "subject_ga_weeks = first_measurement.get(\"calculated_ega\", 26.9)\n",
+    "subject_ga = GestationalAge.from_weeks(subject_ga_weeks)\n",
+    "# Create Individual (subject) with GestationalAge\n",
+    "subject_time = pps2.TimeElement(gestational_age=pps2.GestationalAge(weeks=subject_ga.weeks, days=subject_ga.days))\n",
+    "\n",
+    "subject = pps2.Individual(id=\"fetus-1\", sex=pps2.Sex.UNKNOWN_SEX, time_at_last_encounter=subject_time)\n",
+    "\n",
+    "# Create timestamp for metadata\n",
+    "now = datetime.now(timezone.utc)\n",
+    "created_timestamp = Timestamp()\n",
+    "created_timestamp.FromDatetime(now)\n",
+    "\n",
+    "# Create HPO Resource\n",
+    "hpo_resource = pps2.Resource(id=\"hp\", name=\"Human Phenotype Ontology\", url=\"http://purl.obolibrary.org/obo/hp.owl\", version=\"2025-11-24\", namespace_prefix=\"HP\", iri_prefix=\"http://purl.obolibrary.org/obo/HP_\")\n",
+    "\n",
+    "# Create MetaData\n",
+    "metadata = pps2.MetaData(created=created_timestamp, created_by=\"prenatalppkt-etl-pipeline\", phenopacket_schema_version=\"2.0\")\n",
+    "metadata.resources.append(hpo_resource)\n",
+    "\n",
+    "# Create the Phenopacket\n",
+    "phenopacket = pps2.Phenopacket(id=\"apple-sally-fetus-1\", subject=subject, meta_data=metadata)\n",
+    "phenopacket.phenotypic_features.extend(phenotypic_features)\n",
+    "\n",
+    "# Convert protobuf message to JSON using official method\n",
+    "phenopacket_json = MessageToJson(phenopacket, preserving_proto_field_name=True)\n",
+    "print(phenopacket_json)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1e24f7ff",
+   "metadata": {},
+   "source": [
+    "# New"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "0f79d3fe",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "DEBUG:hpotk.util:Using default encoding 'utf-8'\n",
+      "DEBUG:hpotk.util:Opening /tmp/hp.json\n",
+      "DEBUG:hpotk.util:Looks like a local file: /tmp/hp.json\n",
+      "DEBUG:hpotk.util:Looks like decompressed data\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "================================================================================\n",
+      "PRENATALPPKT EXPANDED ETL PIPELINE\n",
+      "Observer JSON -> Biometry + Clinical Sections -> Phenopacket v2.0\n",
+      "================================================================================\n",
+      "\n",
+      "[STEP 1] Loading HPO Concept Recognizer...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "DEBUG:hpotk.ontology.load.obographs._load:Extracting ontology terms\n",
+      "DEBUG:hpotk.ontology.load.obographs._factory:Unknown synonym type http://purl.obolibrary.org/obo/hp#allelic_requirement\n",
+      "DEBUG:hpotk.ontology.load.obographs._factory:Unknown synonym type http://purl.obolibrary.org/obo/hp#allelic_requirement\n",
+      "DEBUG:hpotk.ontology.load.obographs._factory:Unknown synonym type http://purl.obolibrary.org/obo/hp#allelic_requirement\n",
+      "DEBUG:hpotk.ontology.load.obographs._factory:Unknown synonym type http://purl.obolibrary.org/obo/hp#allelic_requirement\n",
+      "DEBUG:hpotk.ontology.load.obographs._factory:Unknown synonym type http://purl.obolibrary.org/obo/hp#allelic_requirement\n",
+      "DEBUG:hpotk.ontology.load.obographs._factory:Unknown synonym type http://purl.obolibrary.org/obo/hp#allelic_requirement\n",
+      "DEBUG:hpotk.ontology.io.obographs:Missing node type in {'id': 'http://purl.obolibrary.org/obo/BFO_0000051', 'lbl': 'has part', 'meta': {'xrefs': [{'val': 'BFO:0000051'}], 'basicPropertyValues': [{'pred': 'http://www.geneontology.org/formats/oboInOwl#shorthand', 'val': 'has_part'}]}}\n",
+      "DEBUG:hpotk.ontology.io.obographs:Missing node type in {'id': 'http://purl.obolibrary.org/obo/BFO_0000066', 'lbl': 'occurs in', 'meta': {'xrefs': [{'val': 'BFO:0000066'}], 'basicPropertyValues': [{'pred': 'http://www.geneontology.org/formats/oboInOwl#shorthand', 'val': 'occurs_in'}]}}\n",
+      "DEBUG:hpotk.ontology.io.obographs:Missing node type in {'id': 'http://purl.obolibrary.org/obo/RO_0002503', 'lbl': 'towards', 'meta': {'xrefs': [{'val': 'RO:0002503'}], 'basicPropertyValues': [{'pred': 'http://www.geneontology.org/formats/oboInOwl#shorthand', 'val': 'towards'}]}}\n",
+      "DEBUG:hpotk.ontology.io.obographs:Missing node type in {'id': 'http://purl.obolibrary.org/obo/RO_0002573', 'lbl': 'has modifier', 'meta': {'comments': ['placeholder relation to indicate normality/abnormality.'], 'xrefs': [{'val': 'RO:0002180'}], 'basicPropertyValues': [{'pred': 'http://www.geneontology.org/formats/oboInOwl#shorthand', 'val': 'qualifier'}]}}\n",
+      "DEBUG:hpotk.ontology.load.obographs._load:Creating the edge list\n",
+      "DEBUG:hpotk.ontology.load.obographs._load:Building ontology graph\n",
+      "DEBUG:hpotk.graph._factory:Creating ontology graph from 23612 edges\n",
+      "DEBUG:hpotk.graph._factory:Found root HP:0000001\n",
+      "DEBUG:hpotk.graph._factory:Extracted 19262 nodes\n",
+      "DEBUG:hpotk.ontology.load.obographs._load:Assembling the ontology\n",
+      "DEBUG:hpotk.ontology.load.obographs._load:Done\n",
+      "DEBUG:prenatalppkt.hpo.hpo_parser:Instantiating HPO concept recognizer.\n",
+      "DEBUG:prenatalppkt.mapping_loader:Loaded 8 bins for head_circumference\n",
+      "DEBUG:prenatalppkt.mapping_loader:Loaded 8 bins for biparietal_diameter\n",
+      "DEBUG:prenatalppkt.mapping_loader:Loaded 8 bins for femur_length\n",
+      "DEBUG:prenatalppkt.mapping_loader:Loaded 8 bins for abdominal_circumference\n",
+      "DEBUG:prenatalppkt.mapping_loader:Loaded 8 bins for occipitofrontal_diameter\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Loaded mappings for: ['head_circumference', 'biparietal_diameter', 'femur_length', 'abdominal_circumference', 'occipitofrontal_diameter']\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:Starting Observer JSON extraction\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:Processing fetus 1\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:Found 6 measurements\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:Processing measurement: AC\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:AC has percentile=55.6% (valid)\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:Creating TermBin for AC: value=226.20000000000002mm, percentile=55.6%, ga=<GestationalAge: 26 weeks, 6 days>\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Creating TermBin: name=AC, value=226.20000000000002mm, percentile=55.6%, ga=<GestationalAge: 26 weeks, 6 days>, method=None\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Selected HPO: HP:0034207 - Abnormal fetal gastrointestinal system morphology\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Created TermBin: HP:0034207 - normal=True\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:Processing measurement: BPD\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:BPD has percentile=51.2% (valid)\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:Creating TermBin for BPD: value=66.8mm, percentile=51.2%, ga=<GestationalAge: 26 weeks, 6 days>\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Creating TermBin: name=BPD, value=66.8mm, percentile=51.2%, ga=<GestationalAge: 26 weeks, 6 days>, method=None\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Selected HPO: HP:0000240 - Abnormality of skull size\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Created TermBin: HP:0000240 - normal=True\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:Processing measurement: HC\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:HC has percentile=42.5% (valid)\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:Creating TermBin for HC: value=250.0mm, percentile=42.5%, ga=<GestationalAge: 26 weeks, 6 days>\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Creating TermBin: name=HC, value=250.0mm, percentile=42.5%, ga=<GestationalAge: 26 weeks, 6 days>, method=None\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Selected HPO: HP:0000240 - Abnormality of skull size\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Created TermBin: HP:0000240 - normal=True\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:Processing measurement: Femur\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:Femur has percentile=46.8% (valid)\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:Creating TermBin for Femur: value=50.099999999999994mm, percentile=46.8%, ga=<GestationalAge: 27 weeks, 0 days>\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Creating TermBin: name=Femur, value=50.099999999999994mm, percentile=46.8%, ga=<GestationalAge: 27 weeks, 0 days>, method=None\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Selected HPO: HP:0002823 - Abnormal femur morphology\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Created TermBin: HP:0002823 - normal=True\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:Processing measurement: Nuchal Fold\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:Nuchal Fold has percentile=0% (valid)\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:Creating TermBin for Nuchal Fold: value=10.0mm, percentile=0%, ga=<GestationalAge: 0 weeks, 0 days>\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Creating TermBin: name=Nuchal Fold, value=10.0mm, percentile=0.0%, ga=<GestationalAge: 0 weeks, 0 days>, method=None\n",
+      "WARNING:prenatalppkt.etl.term_bin_factory:No HPO mapping for optional measurement 'Nuchal Fold' - skipping. TODO(@VarenyaJ): Add HPO terms when available\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:Processing measurement: Cerebellum\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:Cerebellum has percentile=0% (valid)\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:Creating TermBin for Cerebellum: value=30.0mm, percentile=0%, ga=<GestationalAge: 27 weeks, 2 days>\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Creating TermBin: name=Cerebellum, value=30.0mm, percentile=0.0%, ga=<GestationalAge: 27 weeks, 2 days>, method=None\n",
+      "WARNING:prenatalppkt.etl.term_bin_factory:No HPO mapping for optional measurement 'Cerebellum' - skipping. TODO(@VarenyaJ): Add HPO terms when available\n",
+      "DEBUG:prenatalppkt.etl.extractors.observer:Successfully parsed 4 measurements\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Validating 4 TermBins for required measurements\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Found measurement: AC\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Found measurement: BPD\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Found measurement: HC\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Found measurement: Femur\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Present: {'Femur', 'AC', 'BPD', 'HC'}\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Required: {'Femur', 'AC', 'BPD', 'HC'}\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:All required measurements present\n",
+      "INFO:prenatalppkt.etl.extractors.observer:Extracted 4 TermBins from Observer JSON\n",
+      "DEBUG:hpotk.store._github:Pulling tag from https://api.github.com/repos/obophenotype/human-phenotype-ontology/tags\n",
+      "DEBUG:hpotk.store._github:Fetched 30 tags\n",
+      "DEBUG:hpotk.util:Using default encoding 'utf-8'\n",
+      "DEBUG:hpotk.util:Opening /home/varenya/.hpo-toolkit/HP/hp.v2026-01-08.json\n",
+      "DEBUG:hpotk.util:Looks like a local file: /home/varenya/.hpo-toolkit/HP/hp.v2026-01-08.json\n",
+      "DEBUG:hpotk.util:Looks like decompressed data\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "  ? HPO version: 2025-10-22\n",
+      "  ? Concept recognizer: HpoExactConceptRecognizer\n",
+      "\n",
+      "[STEP 2] Loading Observer JSON...\n",
+      "  ? Loaded: tests/data/Apple_Sally_pretty.json\n",
+      "  ? Fetuses: 1\n",
+      "  ? Measurements: 6\n",
+      "  ? Sample: AC = 22.62 cm\n",
+      "\n",
+      "[STEP 3] Extracting biometry measurements to TermBins...\n",
+      "  ? Extracted 4 TermBins:\n",
+      "    [1] HP:0034207 (Abnormal fetal gastrointestinal system morphology) - ? Normal\n",
+      "        AC: 226.2 mm (55.6%) at 26w6d [Fetus 1]\n",
+      "    [2] HP:0000240 (Abnormality of skull size) - ? Normal\n",
+      "        BPD: 66.8 mm (51.2%) at 26w6d [Fetus 1]\n",
+      "    [3] HP:0000240 (Abnormality of skull size) - ? Normal\n",
+      "        HC: 250.0 mm (42.5%) at 26w6d [Fetus 1]\n",
+      "    [4] HP:0002823 (Abnormal femur morphology) - ? Normal\n",
+      "        Femur: 50.1 mm (46.8%) at 27w0d [Fetus 1]\n",
+      "\n",
+      "[STEP 4] Parsing clinical sections...\n",
+      "\n",
+      "  --- Clinical Indication ---\n",
+      "  Indication: (not found)\n",
+      "\n",
+      "  --- Pregnancy Dating ---\n",
+      "  LMP: 0001-01-01\n",
+      "  EDD: None\n",
+      "  Dating Method: None\n",
+      "  GA by Ultrasound: None\n",
+      "\n",
+      "  --- Clinical Impression ---\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "DEBUG:hpotk.ontology.load.obographs._load:Extracting ontology terms\n",
+      "DEBUG:hpotk.ontology.load.obographs._factory:Unknown synonym type http://purl.obolibrary.org/obo/hp#allelic_requirement\n",
+      "DEBUG:hpotk.ontology.load.obographs._factory:Unknown synonym type http://purl.obolibrary.org/obo/hp#allelic_requirement\n",
+      "DEBUG:hpotk.ontology.load.obographs._factory:Unknown synonym type http://purl.obolibrary.org/obo/hp#allelic_requirement\n",
+      "DEBUG:hpotk.ontology.load.obographs._factory:Unknown synonym type http://purl.obolibrary.org/obo/hp#allelic_requirement\n",
+      "DEBUG:hpotk.ontology.load.obographs._factory:Unknown synonym type http://purl.obolibrary.org/obo/hp#allelic_requirement\n",
+      "DEBUG:hpotk.ontology.load.obographs._factory:Unknown synonym type http://purl.obolibrary.org/obo/hp#allelic_requirement\n",
+      "DEBUG:hpotk.ontology.io.obographs:Missing node type in {'id': 'http://purl.obolibrary.org/obo/BFO_0000051', 'lbl': 'has part', 'meta': {'xrefs': [{'val': 'BFO:0000051'}], 'basicPropertyValues': [{'pred': 'http://www.geneontology.org/formats/oboInOwl#shorthand', 'val': 'has_part'}]}}\n",
+      "DEBUG:hpotk.ontology.io.obographs:Missing node type in {'id': 'http://purl.obolibrary.org/obo/BFO_0000066', 'lbl': 'occurs in', 'meta': {'xrefs': [{'val': 'BFO:0000066'}], 'basicPropertyValues': [{'pred': 'http://www.geneontology.org/formats/oboInOwl#shorthand', 'val': 'occurs_in'}]}}\n",
+      "DEBUG:hpotk.ontology.io.obographs:Missing node type in {'id': 'http://purl.obolibrary.org/obo/RO_0002503', 'lbl': 'towards', 'meta': {'xrefs': [{'val': 'RO:0002503'}], 'basicPropertyValues': [{'pred': 'http://www.geneontology.org/formats/oboInOwl#shorthand', 'val': 'towards'}]}}\n",
+      "DEBUG:hpotk.ontology.io.obographs:Missing node type in {'id': 'http://purl.obolibrary.org/obo/RO_0002573', 'lbl': 'has modifier', 'meta': {'comments': ['placeholder relation to indicate normality/abnormality.'], 'xrefs': [{'val': 'RO:0002180'}], 'basicPropertyValues': [{'pred': 'http://www.geneontology.org/formats/oboInOwl#shorthand', 'val': 'qualifier'}]}}\n",
+      "DEBUG:hpotk.ontology.load.obographs._load:Creating the edge list\n",
+      "DEBUG:hpotk.ontology.load.obographs._load:Building ontology graph\n",
+      "DEBUG:hpotk.graph._factory:Creating ontology graph from 23765 edges\n",
+      "DEBUG:hpotk.graph._factory:Found root HP:0000001\n",
+      "DEBUG:hpotk.graph._factory:Extracted 19408 nodes\n",
+      "DEBUG:hpotk.ontology.load.obographs._load:Assembling the ontology\n",
+      "DEBUG:hpotk.ontology.load.obographs._load:Done\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "  Impression: (not found)\n",
+      "  Growth Assessment: None\n",
+      "\n",
+      "  --- HPO Concept Recognition from Clinical Text ---\n",
+      "  (no impression text to parse)\n",
+      "  (no HPO terms matched)\n",
+      "\n",
+      "[STEP 5] Previewing anatomy findings...\n",
+      "  Normal (0): ...\n",
+      "  Abnormal (0): (none)\n",
+      "  Not visualized (0): ...\n",
+      "  (Note: Anatomy section parser not yet implemented in ETL)\n",
+      "\n",
+      "[STEP 6] Converting to PhenotypicFeatures...\n",
+      "\n",
+      "  --- From Biometry ---\n",
+      "  ? Added 4 features from biometry\n",
+      "\n",
+      "  --- From Clinical Text ---\n",
+      "  ? Added 0 features from clinical text\n",
+      "\n",
+      "  Total PhenotypicFeatures: 4\n",
+      "\n",
+      "[STEP 7] Building Phenopacket v2.0...\n",
+      "  ? Phenopacket assembled successfully\n",
+      "    ID: apple-sally-fetus-1\n",
+      "    Subject: fetus-1 at 26w6d\n",
+      "    Features: 4\n",
+      "\n",
+      "================================================================================\n",
+      "PHENOPACKET v2.0 OUTPUT (JSON)\n",
+      "================================================================================\n",
+      "{\n",
+      "  \"id\": \"apple-sally-fetus-1\",\n",
+      "  \"subject\": {\n",
+      "    \"id\": \"fetus-1\",\n",
+      "    \"time_at_last_encounter\": {\n",
+      "      \"gestational_age\": {\n",
+      "        \"weeks\": 26,\n",
+      "        \"days\": 6\n",
+      "      }\n",
+      "    }\n",
+      "  },\n",
+      "  \"phenotypic_features\": [\n",
+      "    {\n",
+      "      \"description\": \"[Biometry] AC: 226.2 mm (55.6%) at 26w6d [Fetus 1]\",\n",
+      "      \"type\": {\n",
+      "        \"id\": \"HP:0034207\",\n",
+      "        \"label\": \"Abnormal fetal gastrointestinal system morphology\"\n",
+      "      },\n",
+      "      \"excluded\": true,\n",
+      "      \"onset\": {\n",
+      "        \"gestational_age\": {\n",
+      "          \"weeks\": 26,\n",
+      "          \"days\": 6\n",
+      "        }\n",
+      "      }\n",
+      "    },\n",
+      "    {\n",
+      "      \"description\": \"[Biometry] BPD: 66.8 mm (51.2%) at 26w6d [Fetus 1]\",\n",
+      "      \"type\": {\n",
+      "        \"id\": \"HP:0000240\",\n",
+      "        \"label\": \"Abnormality of skull size\"\n",
+      "      },\n",
+      "      \"excluded\": true,\n",
+      "      \"onset\": {\n",
+      "        \"gestational_age\": {\n",
+      "          \"weeks\": 26,\n",
+      "          \"days\": 6\n",
+      "        }\n",
+      "      }\n",
+      "    },\n",
+      "    {\n",
+      "      \"description\": \"[Biometry] HC: 250.0 mm (42.5%) at 26w6d [Fetus 1]\",\n",
+      "      \"type\": {\n",
+      "        \"id\": \"HP:0000240\",\n",
+      "        \"label\": \"Abnormality of skull size\"\n",
+      "      },\n",
+      "      \"excluded\": true,\n",
+      "      \"onset\": {\n",
+      "        \"gestational_age\": {\n",
+      "          \"weeks\": 26,\n",
+      "          \"days\": 6\n",
+      "        }\n",
+      "      }\n",
+      "    },\n",
+      "    {\n",
+      "      \"description\": \"[Biometry] Femur: 50.1 mm (46.8%) at 27w0d [Fetus 1]\",\n",
+      "      \"type\": {\n",
+      "        \"id\": \"HP:0002823\",\n",
+      "        \"label\": \"Abnormal femur morphology\"\n",
+      "      },\n",
+      "      \"excluded\": true,\n",
+      "      \"onset\": {\n",
+      "        \"gestational_age\": {\n",
+      "          \"weeks\": 27\n",
+      "        }\n",
+      "      }\n",
+      "    }\n",
+      "  ],\n",
+      "  \"meta_data\": {\n",
+      "    \"created\": \"2026-01-23T14:56:57.292752Z\",\n",
+      "    \"created_by\": \"prenatalppkt-etl-pipeline\",\n",
+      "    \"resources\": [\n",
+      "      {\n",
+      "        \"id\": \"hp\",\n",
+      "        \"name\": \"Human Phenotype Ontology\",\n",
+      "        \"url\": \"http://purl.obolibrary.org/obo/hp.owl\",\n",
+      "        \"version\": \"2025-10-22\",\n",
+      "        \"namespace_prefix\": \"HP\",\n",
+      "        \"iri_prefix\": \"http://purl.obolibrary.org/obo/HP_\"\n",
+      "      }\n",
+      "    ],\n",
+      "    \"phenopacket_schema_version\": \"2.0\"\n",
+      "  }\n",
+      "}\n",
+      "\n",
+      "================================================================================\n",
+      "VALIDATION & SUMMARY\n",
+      "================================================================================\n",
+      "\n",
+      "[Validation] Round-trip test...\n",
+      "  ? Round-trip validation passed\n",
+      "\n",
+      "[Summary] Phenotypic Features:\n",
+      "  Total: 4\n",
+      "    From Biometry: 4\n",
+      "    From Clinical Text: 0\n",
+      "  Normal (excluded): 4\n",
+      "  Abnormal (observed): 0\n",
+      "\n",
+      "[Detail] All Phenotypic Features:\n",
+      "------------------------------------------------------------\n",
+      "\n",
+      "  [1] HP:0034207 - Abnormal fetal gastrointestinal system morphology\n",
+      "      Source: Biometry\n",
+      "      Status: EXCLUDED (normal)\n",
+      "      Onset: 26w6d\n",
+      "\n",
+      "  [2] HP:0000240 - Abnormality of skull size\n",
+      "      Source: Biometry\n",
+      "      Status: EXCLUDED (normal)\n",
+      "      Onset: 26w6d\n",
+      "\n",
+      "  [3] HP:0000240 - Abnormality of skull size\n",
+      "      Source: Biometry\n",
+      "      Status: EXCLUDED (normal)\n",
+      "      Onset: 26w6d\n",
+      "\n",
+      "  [4] HP:0002823 - Abnormal femur morphology\n",
+      "      Source: Biometry\n",
+      "      Status: EXCLUDED (normal)\n",
+      "      Onset: 27w0d\n",
+      "\n",
+      "================================================================================\n",
+      "SUCCESS: Phenopacket saved to output/apple_sally_phenopacket_expanded.json\n",
+      "================================================================================\n"
+     ]
+    }
+   ],
+   "source": [
+    "\"\"\"\n",
+    "PRENATALPPKT EXPANDED ETL PIPELINE\n",
+    "Observer JSON -> Biometry + Clinical Sections -> Phenopacket v2.0\n",
+    "\n",
+    "Demonstrates the complete ETL pipeline:\n",
+    "1. Biometry extraction -> List[TermBin] -> quantitative HPO terms\n",
+    "2. Clinical indication -> reason for exam\n",
+    "3. Pregnancy dating -> LMP, EDD, gestational age context\n",
+    "4. Clinical impression -> qualitative HPO terms from free text\n",
+    "5. Phenopacket assembly -> GA4GH Phenopacket v2.0 JSON\n",
+    "\n",
+    "Uses the official GA4GH phenopackets library per:\n",
+    "https://phenopacket-schema.readthedocs.io/en/latest/python.html\n",
+    "\"\"\"\n",
+    "\n",
+    "import gzip\n",
+    "import json\n",
+    "import re\n",
+    "from datetime import datetime, timezone\n",
+    "from pathlib import Path\n",
+    "\n",
+    "from google.protobuf.json_format import MessageToJson, Parse\n",
+    "from google.protobuf.timestamp_pb2 import Timestamp\n",
+    "import phenopackets.schema.v2 as pps2\n",
+    "\n",
+    "# ETL Extractors (biometry -> TermBins)\n",
+    "from prenatalppkt.etl.extractors import observer\n",
+    "\n",
+    "# ETL Section Parsers (clinical metadata -> Dicts)\n",
+    "from prenatalppkt.etl.sections import (\n",
+    "   parse_clinical_indication,\n",
+    "   parse_pregnancy_dating,\n",
+    "   parse_clinical_impression,\n",
+    ")\n",
+    "\n",
+    "# HPO Concept Recognition\n",
+    "from prenatalppkt.hpo import HpoParser\n",
+    "\n",
+    "# Gestational Age utilities\n",
+    "from prenatalppkt.gestational_age import GestationalAge\n",
+    "\n",
+    "print(\"=\" * 80)\n",
+    "print(\"PRENATALPPKT EXPANDED ETL PIPELINE\")\n",
+    "print(\"Observer JSON -> Biometry + Clinical Sections -> Phenopacket v2.0\")\n",
+    "print(\"=\" * 80)\n",
+    "\n",
+    "# =============================================================================\n",
+    "# STEP 1: Load HPO Concept Recognizer\n",
+    "# =============================================================================\n",
+    "print(\"\\n[STEP 1] Loading HPO Concept Recognizer...\")\n",
+    "\n",
+    "HP_JSON_GZ = Path(\"tests/data/hp.json.gz\")\n",
+    "TMP_HP_JSON = Path(\"/tmp/hp.json\")\n",
+    "\n",
+    "# Decompress hp.json.gz to temp location\n",
+    "with gzip.open(HP_JSON_GZ, \"rt\", encoding=\"utf-8\") as f_in:\n",
+    "   with open(TMP_HP_JSON, \"w\", encoding=\"utf-8\") as f_out:\n",
+    "       f_out.write(f_in.read())\n",
+    "\n",
+    "hpo_parser = HpoParser(hpo_json_file=str(TMP_HP_JSON))\n",
+    "hpo_cr = hpo_parser.get_hpo_concept_recognizer()\n",
+    "\n",
+    "print(f\"  ? HPO version: {hpo_parser.get_version()}\")\n",
+    "print(f\"  ? Concept recognizer: {type(hpo_cr).__name__}\")\n",
+    "\n",
+    "# =============================================================================\n",
+    "# STEP 2: Load Observer JSON Data\n",
+    "# =============================================================================\n",
+    "print(\"\\n[STEP 2] Loading Observer JSON...\")\n",
+    "\n",
+    "DATA_PATH = Path(\"tests/data/Apple_Sally_pretty.json\")\n",
+    "\n",
+    "with open(DATA_PATH) as f:\n",
+    "   observer_data = json.load(f)\n",
+    "\n",
+    "# Keep raw JSON string for section parsers\n",
+    "with open(DATA_PATH) as f:\n",
+    "   observer_json_str = f.read()\n",
+    "\n",
+    "print(f\"  ? Loaded: {DATA_PATH}\")\n",
+    "print(f\"  ? Fetuses: {len(observer_data.get('fetuses', []))}\")\n",
+    "\n",
+    "first_fetus = observer_data[\"fetuses\"][0]\n",
+    "measurements = first_fetus.get(\"measurements\", [])\n",
+    "print(f\"  ? Measurements: {len(measurements)}\")\n",
+    "print(f\"  ? Sample: {measurements[0]['label']} = {measurements[0]['value']} {measurements[0]['unit_of_measure']}\")\n",
+    "\n",
+    "# =============================================================================\n",
+    "# STEP 3: Extract Biometry -> TermBins\n",
+    "# =============================================================================\n",
+    "print(\"\\n[STEP 3] Extracting biometry measurements to TermBins...\")\n",
+    "\n",
+    "term_bins = observer.extract(observer_data)\n",
+    "\n",
+    "print(f\"  ? Extracted {len(term_bins)} TermBins:\")\n",
+    "for i, tb in enumerate(term_bins, 1):\n",
+    "   status = \"? Normal\" if tb.normal else \"? Abnormal\"\n",
+    "   print(f\"    [{i}] {tb.hpo_id} ({tb.hpo_label}) - {status}\")\n",
+    "   print(f\"        {tb.description}\")\n",
+    "\n",
+    "# =============================================================================\n",
+    "# STEP 4: Parse Clinical Sections\n",
+    "# =============================================================================\n",
+    "print(\"\\n[STEP 4] Parsing clinical sections...\")\n",
+    "\n",
+    "SOURCE_FORMAT = \"observer_json\"\n",
+    "\n",
+    "# 4a: Clinical Indication\n",
+    "print(\"\\n  --- Clinical Indication ---\")\n",
+    "indication = parse_clinical_indication(observer_json_str, SOURCE_FORMAT)\n",
+    "indication_text = indication.get(\"indication_text\", \"\")\n",
+    "if indication_text:\n",
+    "   print(f\"  Indication: {indication_text[:100]}{'...' if len(indication_text) > 100 else ''}\")\n",
+    "else:\n",
+    "   print(\"  Indication: (not found)\")\n",
+    "\n",
+    "# 4b: Pregnancy Dating\n",
+    "print(\"\\n  --- Pregnancy Dating ---\")\n",
+    "dating = parse_pregnancy_dating(observer_json_str, SOURCE_FORMAT)\n",
+    "print(f\"  LMP: {dating.get('lmp', '(not found)')}\")\n",
+    "print(f\"  EDD: {dating.get('edd', '(not found)')}\")\n",
+    "print(f\"  Dating Method: {dating.get('dating_method', '(not found)')}\")\n",
+    "print(f\"  GA by Ultrasound: {dating.get('ga_by_ultrasound', '(not found)')}\")\n",
+    "\n",
+    "# 4c: Clinical Impression\n",
+    "print(\"\\n  --- Clinical Impression ---\")\n",
+    "impression = parse_clinical_impression(observer_json_str, SOURCE_FORMAT)\n",
+    "impression_text = impression.get(\"impression_text\", \"\")\n",
+    "\n",
+    "if impression_text:\n",
+    "   # Clean up for display\n",
+    "   preview = impression_text[:200].replace('\\r', ' ').replace('\\n', ' ')\n",
+    "   print(f\"  Impression ({len(impression_text)} chars): \\\"{preview}...\\\"\")\n",
+    "else:\n",
+    "   print(\"  Impression: (not found)\")\n",
+    "\n",
+    "print(f\"  Growth Assessment: {impression.get('growth_assessment', '(not detected)')}\")\n",
+    "\n",
+    "# 4d: Extract HPO terms from clinical narrative\n",
+    "print(\"\\n  --- HPO Concept Recognition from Clinical Text ---\")\n",
+    "if impression_text:\n",
+    "   hpo_terms_from_text = hpo_cr.parse(impression_text)\n",
+    "   print(f\"  Found {len(hpo_terms_from_text)} HPO terms in clinical narrative:\")\n",
+    "   for term in hpo_terms_from_text:\n",
+    "       print(f\"    ? {term.hpo_id}: {term.hpo_label}\")\n",
+    "else:\n",
+    "   hpo_terms_from_text = []\n",
+    "   print(\"  (no impression text to parse)\")\n",
+    "\n",
+    "if not hpo_terms_from_text:\n",
+    "   print(\"  (no HPO terms matched)\")\n",
+    "\n",
+    "# =============================================================================\n",
+    "# STEP 5: Preview Anatomy Findings (Structured Data)\n",
+    "# =============================================================================\n",
+    "print(\"\\n[STEP 5] Previewing anatomy findings...\")\n",
+    "\n",
+    "fetus_data = observer_data[\"fetuses\"][0].get(\"fetus\", {})\n",
+    "anatomy_list = fetus_data.get(\"anatomy\", [])\n",
+    "\n",
+    "normal_structures = []\n",
+    "abnormal_structures = []\n",
+    "unseen_structures = []\n",
+    "anomalies_found = []\n",
+    "\n",
+    "for item in anatomy_list:\n",
+    "   main = item.get(\"main\", {})\n",
+    "   label = main.get(\"label\", \"Unknown\")\n",
+    "   state = main.get(\"anat_state\", \"\")\n",
+    "   \n",
+    "   if state == \"Normal\":\n",
+    "       normal_structures.append(label)\n",
+    "   elif state == \"Abnormal\":\n",
+    "       abnormal_structures.append(label)\n",
+    "       # Check for specific anomalies\n",
+    "       anomalies = item.get(\"anomalies\", [])\n",
+    "       if anomalies:\n",
+    "           for anom in anomalies:\n",
+    "               desc = anom.get(\"description\", \"?\")\n",
+    "               anomalies_found.append(f\"{label}: {desc}\")\n",
+    "   elif state == \"Unseen\":\n",
+    "       unseen_structures.append(label)\n",
+    "\n",
+    "print(f\"  Normal ({len(normal_structures)}): {', '.join(normal_structures[:5])}...\")\n",
+    "print(f\"  Abnormal ({len(abnormal_structures)}): {', '.join(abnormal_structures) if abnormal_structures else '(none)'}\")\n",
+    "print(f\"  Not visualized ({len(unseen_structures)}): {', '.join(unseen_structures[:3])}...\")\n",
+    "\n",
+    "if anomalies_found:\n",
+    "   print(f\"  ? Anomalies detected:\")\n",
+    "   for anom in anomalies_found:\n",
+    "       print(f\"    - {anom}\")\n",
+    "\n",
+    "print(\"  (Note: Anatomy section parser not yet implemented in ETL)\")\n",
+    "\n",
+    "# =============================================================================\n",
+    "# STEP 6: Convert to PhenotypicFeatures\n",
+    "# =============================================================================\n",
+    "print(\"\\n[STEP 6] Converting to PhenotypicFeatures...\")\n",
+    "\n",
+    "\n",
+    "def parse_ga_from_description(description: str, fallback_weeks: float = 26.9) -> tuple[int, int]:\n",
+    "   \"\"\"Extract weeks and days from TermBin description.\"\"\"\n",
+    "   match = re.search(r\"at (\\d+)w(\\d+)d\", description)\n",
+    "   if match:\n",
+    "       return int(match.group(1)), int(match.group(2))\n",
+    "   ga = GestationalAge.from_weeks(fallback_weeks)\n",
+    "   return ga.weeks, ga.days\n",
+    "\n",
+    "\n",
+    "# Get subject GA for features without specific timing\n",
+    "first_measurement = observer_data[\"fetuses\"][0][\"measurements\"][0]\n",
+    "subject_ga_weeks = first_measurement.get(\"calculated_ega\", 26.9)\n",
+    "subject_ga = GestationalAge.from_weeks(subject_ga_weeks)\n",
+    "\n",
+    "phenotypic_features = []\n",
+    "\n",
+    "# 6a: Convert biometry TermBins -> PhenotypicFeatures\n",
+    "print(\"\\n  --- From Biometry ---\")\n",
+    "for tb in term_bins:\n",
+    "   weeks, days = parse_ga_from_description(tb.description, subject_ga_weeks)\n",
+    "   \n",
+    "   pf = pps2.PhenotypicFeature(\n",
+    "       type=pps2.OntologyClass(id=tb.hpo_id, label=tb.hpo_label),\n",
+    "       excluded=tb.normal,  # normal=True means abnormality is EXCLUDED\n",
+    "       onset=pps2.TimeElement(\n",
+    "           gestational_age=pps2.GestationalAge(weeks=weeks, days=days)\n",
+    "       ),\n",
+    "       description=f\"[Biometry] {tb.description}\",\n",
+    "   )\n",
+    "   phenotypic_features.append(pf)\n",
+    "\n",
+    "print(f\"  ? Added {len(term_bins)} features from biometry\")\n",
+    "\n",
+    "# 6b: Convert clinical text HPO terms -> PhenotypicFeatures\n",
+    "print(\"\\n  --- From Clinical Text ---\")\n",
+    "text_feature_count = 0\n",
+    "for term in hpo_terms_from_text:\n",
+    "   # Findings mentioned in clinical impression are OBSERVED (not excluded)\n",
+    "   pf = pps2.PhenotypicFeature(\n",
+    "       type=pps2.OntologyClass(id=term.hpo_id, label=term.hpo_label),\n",
+    "       excluded=False,  # These are observed findings\n",
+    "       onset=pps2.TimeElement(\n",
+    "           gestational_age=pps2.GestationalAge(weeks=subject_ga.weeks, days=subject_ga.days)\n",
+    "       ),\n",
+    "       description=f\"[Clinical Impression] Extracted from narrative text via HPO Concept Recognition\",\n",
+    "   )\n",
+    "   phenotypic_features.append(pf)\n",
+    "   text_feature_count += 1\n",
+    "\n",
+    "print(f\"  ? Added {text_feature_count} features from clinical text\")\n",
+    "print(f\"\\n  Total PhenotypicFeatures: {len(phenotypic_features)}\")\n",
+    "\n",
+    "# =============================================================================\n",
+    "# STEP 7: Build Complete Phenopacket v2.0\n",
+    "# =============================================================================\n",
+    "print(\"\\n[STEP 7] Building Phenopacket v2.0...\")\n",
+    "\n",
+    "# Subject (fetus)\n",
+    "subject = pps2.Individual(\n",
+    "   id=\"fetus-1\",\n",
+    "   sex=pps2.Sex.UNKNOWN_SEX,\n",
+    "   time_at_last_encounter=pps2.TimeElement(\n",
+    "       gestational_age=pps2.GestationalAge(weeks=subject_ga.weeks, days=subject_ga.days)\n",
+    "   ),\n",
+    ")\n",
+    "\n",
+    "# Metadata\n",
+    "now = datetime.now(timezone.utc)\n",
+    "created_timestamp = Timestamp()\n",
+    "created_timestamp.FromDatetime(now)\n",
+    "\n",
+    "hpo_resource = pps2.Resource(\n",
+    "   id=\"hp\",\n",
+    "   name=\"Human Phenotype Ontology\",\n",
+    "   url=\"http://purl.obolibrary.org/obo/hp.owl\",\n",
+    "   version=hpo_parser.get_version() or \"2025-01-01\",\n",
+    "   namespace_prefix=\"HP\",\n",
+    "   iri_prefix=\"http://purl.obolibrary.org/obo/HP_\",\n",
+    ")\n",
+    "\n",
+    "metadata = pps2.MetaData(\n",
+    "   created=created_timestamp,\n",
+    "   created_by=\"prenatalppkt-etl-pipeline\",\n",
+    "   phenopacket_schema_version=\"2.0\",\n",
+    ")\n",
+    "metadata.resources.append(hpo_resource)\n",
+    "\n",
+    "# Assemble the Phenopacket\n",
+    "phenopacket = pps2.Phenopacket(\n",
+    "   id=\"apple-sally-fetus-1\",\n",
+    "   subject=subject,\n",
+    "   meta_data=metadata,\n",
+    ")\n",
+    "phenopacket.phenotypic_features.extend(phenotypic_features)\n",
+    "\n",
+    "print(\"  ? Phenopacket assembled successfully\")\n",
+    "print(f\"    ID: {phenopacket.id}\")\n",
+    "print(f\"    Subject: {phenopacket.subject.id} at {subject_ga.weeks}w{subject_ga.days}d\")\n",
+    "print(f\"    Features: {len(phenopacket.phenotypic_features)}\")\n",
+    "\n",
+    "# =============================================================================\n",
+    "# STEP 8: Output JSON\n",
+    "# =============================================================================\n",
+    "print(\"\\n\" + \"=\" * 80)\n",
+    "print(\"PHENOPACKET v2.0 OUTPUT (JSON)\")\n",
+    "print(\"=\" * 80)\n",
+    "\n",
+    "phenopacket_json = MessageToJson(phenopacket, preserving_proto_field_name=True)\n",
+    "print(phenopacket_json)\n",
+    "\n",
+    "# =============================================================================\n",
+    "# STEP 9: Validation & Summary\n",
+    "# =============================================================================\n",
+    "print(\"\\n\" + \"=\" * 80)\n",
+    "print(\"VALIDATION & SUMMARY\")\n",
+    "print(\"=\" * 80)\n",
+    "\n",
+    "# Round-trip validation\n",
+    "print(\"\\n[Validation] Round-trip test...\")\n",
+    "parsed_back = Parse(phenopacket_json, pps2.Phenopacket())\n",
+    "assert parsed_back.id == phenopacket.id\n",
+    "assert len(parsed_back.phenotypic_features) == len(phenopacket.phenotypic_features)\n",
+    "print(\"  ? Round-trip validation passed\")\n",
+    "\n",
+    "# Feature breakdown\n",
+    "biometry_features = [pf for pf in phenopacket.phenotypic_features if \"[Biometry]\" in pf.description]\n",
+    "clinical_features = [pf for pf in phenopacket.phenotypic_features if \"[Clinical\" in pf.description]\n",
+    "excluded_count = sum(1 for pf in phenopacket.phenotypic_features if pf.excluded)\n",
+    "observed_count = len(phenopacket.phenotypic_features) - excluded_count\n",
+    "\n",
+    "print(\"\\n[Summary] Phenotypic Features:\")\n",
+    "print(f\"  Total: {len(phenopacket.phenotypic_features)}\")\n",
+    "print(f\"    From Biometry: {len(biometry_features)}\")\n",
+    "print(f\"    From Clinical Text: {len(clinical_features)}\")\n",
+    "print(f\"  Normal (excluded): {excluded_count}\")\n",
+    "print(f\"  Abnormal (observed): {observed_count}\")\n",
+    "\n",
+    "# Detailed feature list\n",
+    "print(\"\\n[Detail] All Phenotypic Features:\")\n",
+    "print(\"-\" * 60)\n",
+    "for i, pf in enumerate(phenopacket.phenotypic_features, 1):\n",
+    "   status = \"EXCLUDED (normal)\" if pf.excluded else \"OBSERVED (abnormal)\"\n",
+    "   ga = pf.onset.gestational_age\n",
+    "   source = \"Biometry\" if \"[Biometry]\" in pf.description else \"Clinical Text\"\n",
+    "   print(f\"\\n  [{i}] {pf.type.id} - {pf.type.label}\")\n",
+    "   print(f\"      Source: {source}\")\n",
+    "   print(f\"      Status: {status}\")\n",
+    "   print(f\"      Onset: {ga.weeks}w{ga.days}d\")\n",
+    "\n",
+    "# Save to file\n",
+    "output_path = Path(\"output/apple_sally_phenopacket_expanded.json\")\n",
+    "output_path.parent.mkdir(exist_ok=True)\n",
+    "with open(output_path, \"w\") as f:\n",
+    "   f.write(phenopacket_json)\n",
+    "\n",
+    "print(\"\\n\" + \"=\" * 80)\n",
+    "print(f\"SUCCESS: Phenopacket saved to {output_path}\")\n",
+    "print(\"=\" * 80)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "prenatalppkt",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.13.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

From fb5bbcb71ae41cd4ecc0fec972c55b0f0b862a56 Mon Sep 17 00:00:00 2001
From: VarenyaJ <varenyajj@gmail.com>
Date: Mon, 26 Jan 2026 15:46:58 -0500
Subject: [PATCH 06/20] ruffing

---
 .../etl/sections/clinical_impression.py       | 262 +++++++++---------
 1 file changed, 131 insertions(+), 131 deletions(-)

diff --git a/src/prenatalppkt/etl/sections/clinical_impression.py b/src/prenatalppkt/etl/sections/clinical_impression.py
index 0003973..6f83ee5 100644
--- a/src/prenatalppkt/etl/sections/clinical_impression.py
+++ b/src/prenatalppkt/etl/sections/clinical_impression.py
@@ -13,68 +13,68 @@
 
 
 def parse_clinical_impression(
-   data: Union[str, Dict], source_format: str, hpo_cr=None
+    data: Union[str, Dict], source_format: str, hpo_cr=None
 ) -> Dict:
-   """
-   Parse clinical impression / interpretation section.
-
-   Supports:
-       - observer_json
-       - viewpoint_text
-       - viewpoint_hl7
-
-   Args:
-       data: Raw input data (JSON string, dict, or text)
-       source_format: One of "observer_json", "viewpoint_text", "viewpoint_hl7"
-       hpo_cr: Optional HpoExactConceptRecognizer for HPO term extraction.
-               If provided, will extract HPO terms from impression text.
-
-   Returns:
-       Dict with keys:
-           - impression_text: str - Full impression narrative
-           - diagnoses: List[str] - Identified diagnoses (future)
-           - anomalies: List[Dict] - Structured anomaly data (future)
-           - gestational_age_assessment: Optional[str] - GA conclusion
-           - growth_assessment: Optional[str] - FGR, LGA, AGA, or None
-           - recommendations: List[str] - Follow-up recommendations (future)
-           - hpo_terms: List[SimpleTerm] - HPO terms extracted via CR
-           - source_format: str
-   """
-   if source_format == "observer_json":
-       if isinstance(data, str):
-           data = json.loads(data)
-       impression_text = _parse_observer_impression(data)
-
-   elif source_format == "viewpoint_text":
-       if not isinstance(data, str):
-           raise ValueError("viewpoint_text data must be a string")
-       impression_text = _parse_viewpoint_text_impression(data)
-
-   elif source_format == "viewpoint_hl7":
-       if not isinstance(data, str):
-           raise ValueError("viewpoint_hl7 data must be a string")
-       impression_text = _parse_viewpoint_hl7_impression(data)
-
-   else:
-       raise ValueError(f"Unsupported source_format: {source_format}")
-
-   # Extract HPO terms if concept recognizer is provided
-   hpo_terms = []
-   if impression_text and hpo_cr is not None:
-       # HpoExactConceptRecognizer uses parse() method, not extract()
-       if hasattr(hpo_cr, "parse"):
-           hpo_terms = hpo_cr.parse(impression_text)
-
-   return {
-       "impression_text": impression_text,
-       "diagnoses": [],
-       "anomalies": [],
-       "gestational_age_assessment": None,
-       "growth_assessment": _infer_growth_assessment(impression_text),
-       "recommendations": [],
-       "hpo_terms": hpo_terms,
-       "source_format": source_format,
-   }
+    """
+    Parse clinical impression / interpretation section.
+
+    Supports:
+        - observer_json
+        - viewpoint_text
+        - viewpoint_hl7
+
+    Args:
+        data: Raw input data (JSON string, dict, or text)
+        source_format: One of "observer_json", "viewpoint_text", "viewpoint_hl7"
+        hpo_cr: Optional HpoExactConceptRecognizer for HPO term extraction.
+                If provided, will extract HPO terms from impression text.
+
+    Returns:
+        Dict with keys:
+            - impression_text: str - Full impression narrative
+            - diagnoses: List[str] - Identified diagnoses (future)
+            - anomalies: List[Dict] - Structured anomaly data (future)
+            - gestational_age_assessment: Optional[str] - GA conclusion
+            - growth_assessment: Optional[str] - FGR, LGA, AGA, or None
+            - recommendations: List[str] - Follow-up recommendations (future)
+            - hpo_terms: List[SimpleTerm] - HPO terms extracted via CR
+            - source_format: str
+    """
+    if source_format == "observer_json":
+        if isinstance(data, str):
+            data = json.loads(data)
+        impression_text = _parse_observer_impression(data)
+
+    elif source_format == "viewpoint_text":
+        if not isinstance(data, str):
+            raise ValueError("viewpoint_text data must be a string")
+        impression_text = _parse_viewpoint_text_impression(data)
+
+    elif source_format == "viewpoint_hl7":
+        if not isinstance(data, str):
+            raise ValueError("viewpoint_hl7 data must be a string")
+        impression_text = _parse_viewpoint_hl7_impression(data)
+
+    else:
+        raise ValueError(f"Unsupported source_format: {source_format}")
+
+    # Extract HPO terms if concept recognizer is provided
+    hpo_terms = []
+    if impression_text and hpo_cr is not None:
+        # HpoExactConceptRecognizer uses parse() method, not extract()
+        if hasattr(hpo_cr, "parse"):
+            hpo_terms = hpo_cr.parse(impression_text)
+
+    return {
+        "impression_text": impression_text,
+        "diagnoses": [],
+        "anomalies": [],
+        "gestational_age_assessment": None,
+        "growth_assessment": _infer_growth_assessment(impression_text),
+        "recommendations": [],
+        "hpo_terms": hpo_terms,
+        "source_format": source_format,
+    }
 
 
 # ---------------------------------------------------------------------
@@ -83,28 +83,28 @@ def parse_clinical_impression(
 
 
 def _parse_observer_impression(json_data: Dict) -> str:
-   """
-   Extract impression from Observer JSON.
+    """
+    Extract impression from Observer JSON.
 
-   The finalize block can be at:
-   - Root level: json_data["finalize"]["generalComment"]["plain_text"]
-   - Under exam: json_data["exam"]["finalize"]["generalComment"]["plain_text"]
+    The finalize block can be at:
+    - Root level: json_data["finalize"]["generalComment"]["plain_text"]
+    - Under exam: json_data["exam"]["finalize"]["generalComment"]["plain_text"]
 
-   We check the root level first (most common), then fall back to exam.
-   """
-   impression = ""
+    We check the root level first (most common), then fall back to exam.
+    """
+    impression = ""
 
-   # Check root level first (this is where Apple_Sally has it)
-   finalize = json_data.get("finalize", {})
-   impression = finalize.get("generalComment", {}).get("plain_text", "").strip()
+    # Check root level first (this is where Apple_Sally has it)
+    finalize = json_data.get("finalize", {})
+    impression = finalize.get("generalComment", {}).get("plain_text", "").strip()
 
-   # Fall back to exam.finalize if not found at root
-   if not impression:
-       exam = json_data.get("exam", {})
-       finalize = exam.get("finalize", {})
-       impression = finalize.get("generalComment", {}).get("plain_text", "").strip()
+    # Fall back to exam.finalize if not found at root
+    if not impression:
+        exam = json_data.get("exam", {})
+        finalize = exam.get("finalize", {})
+        impression = finalize.get("generalComment", {}).get("plain_text", "").strip()
 
-   return impression
+    return impression
 
 
 # ---------------------------------------------------------------------
@@ -113,21 +113,21 @@ def _parse_observer_impression(json_data: Dict) -> str:
 
 
 def _parse_viewpoint_text_impression(text: str) -> str:
-   """
-   Extract impression from ViewPoint text reports.
+    """
+    Extract impression from ViewPoint text reports.
 
-   Expected pattern:
-       Impression
-       ==========
-       [free text narrative]
-   """
-   pattern = re.compile(
-       r"Impression\s*\n=+\n(?P<body>.*?)(?:\n[A-Z][^\n]*\n=+|\Z)",
-       re.DOTALL | re.IGNORECASE,
-   )
+    Expected pattern:
+        Impression
+        ==========
+        [free text narrative]
+    """
+    pattern = re.compile(
+        r"Impression\s*\n=+\n(?P<body>.*?)(?:\n[A-Z][^\n]*\n=+|\Z)",
+        re.DOTALL | re.IGNORECASE,
+    )
 
-   match = pattern.search(text)
-   return match.group("body").strip() if match else ""
+    match = pattern.search(text)
+    return match.group("body").strip() if match else ""
 
 
 # ---------------------------------------------------------------------
@@ -136,30 +136,30 @@ def _parse_viewpoint_text_impression(text: str) -> str:
 
 
 def _parse_viewpoint_hl7_impression(hl7: str) -> str:
-   """
-   Extract impression from HL7 ORU^R01 messages.
+    """
+    Extract impression from HL7 ORU^R01 messages.
 
-   Looks for OBX segments containing "Impression" or "Interpretation"
-   in the observation identifier field.
-   """
-   lines: List[str] = []
+    Looks for OBX segments containing "Impression" or "Interpretation"
+    in the observation identifier field.
+    """
+    lines: List[str] = []
 
-   for line in hl7.splitlines():
-       if not line.startswith("OBX"):
-           continue
+    for line in hl7.splitlines():
+        if not line.startswith("OBX"):
+            continue
 
-       fields = line.split("|")
-       if len(fields) < 6:
-           continue
+        fields = line.split("|")
+        if len(fields) < 6:
+            continue
 
-       obs_id = fields[3]
-       value = fields[5].split("^")[0].strip()
+        obs_id = fields[3]
+        value = fields[5].split("^")[0].strip()
 
-       if "Impression" in obs_id or "Interpretation" in obs_id:
-           if value:
-               lines.append(value)
+        if "Impression" in obs_id or "Interpretation" in obs_id:
+            if value:
+                lines.append(value)
 
-   return " ".join(lines)
+    return " ".join(lines)
 
 
 # ---------------------------------------------------------------------
@@ -168,25 +168,25 @@ def _parse_viewpoint_hl7_impression(hl7: str) -> str:
 
 
 def _infer_growth_assessment(text: str) -> Optional[str]:
-   """
-   Infer fetal growth assessment from impression text.
-
-   Returns:
-       "FGR" - Fetal Growth Restriction
-       "LGA" - Large for Gestational Age
-       "AGA" - Appropriate for Gestational Age
-       None - No assessment detected
-   """
-   if not text:
-       return None
-
-   text_lower = text.lower()
-
-   if "growth restriction" in text_lower or "fgr" in text_lower:
-       return "FGR"
-   if "large for gestational age" in text_lower or "lga" in text_lower:
-       return "LGA"
-   if "appropriate for gestational age" in text_lower or "aga" in text_lower:
-       return "AGA"
-
-   return None
\ No newline at end of file
+    """
+    Infer fetal growth assessment from impression text.
+
+    Returns:
+        "FGR" - Fetal Growth Restriction
+        "LGA" - Large for Gestational Age
+        "AGA" - Appropriate for Gestational Age
+        None - No assessment detected
+    """
+    if not text:
+        return None
+
+    text_lower = text.lower()
+
+    if "growth restriction" in text_lower or "fgr" in text_lower:
+        return "FGR"
+    if "large for gestational age" in text_lower or "lga" in text_lower:
+        return "LGA"
+    if "appropriate for gestational age" in text_lower or "aga" in text_lower:
+        return "AGA"
+
+    return None

From 8703a58eebcabb28caed45c8119e9151014be981 Mon Sep 17 00:00:00 2001
From: VarenyaJ <varenyajj@gmail.com>
Date: Mon, 26 Jan 2026 15:47:11 -0500
Subject: [PATCH 07/20] fix(tests): correct hpo_cr parameter name in clinical
 impression tests. Tests were using  but function signature uses

---
 tests/etl/sections/test_clinical_impression.py | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/tests/etl/sections/test_clinical_impression.py b/tests/etl/sections/test_clinical_impression.py
index 715a2e1..dc2c057 100644
--- a/tests/etl/sections/test_clinical_impression.py
+++ b/tests/etl/sections/test_clinical_impression.py
@@ -23,7 +23,7 @@ def test_basic_impression(self, hpo_cr):
             }
         )
 
-        result = parse_clinical_impression(data, "observer_json", hpo_parser=hpo_cr)
+        result = parse_clinical_impression(data, "observer_json", hpo_cr=hpo_cr)
 
         assert "Normal fetal anatomy" in result["impression_text"]
         assert result["hpo_terms"] == []
@@ -43,7 +43,7 @@ def test_basic_impression(self, hpo_cr):
 Recommend follow-up scan.
 """
 
-        result = parse_clinical_impression(text, "viewpoint_text", hpo_parser=hpo_cr)
+        result = parse_clinical_impression(text, "viewpoint_text", hpo_cr=hpo_cr)
 
         assert "growth restriction" in result["impression_text"].lower()
         assert result["growth_assessment"] == "FGR"
@@ -51,7 +51,7 @@ def test_basic_impression(self, hpo_cr):
 
     def test_missing_impression(self, hpo_cr):
         text = "Fetal Biometry\n============\nHC 175 mm"
-        result = parse_clinical_impression(text, "viewpoint_text", hpo_parser=hpo_cr)
+        result = parse_clinical_impression(text, "viewpoint_text", hpo_cr=hpo_cr)
         assert result["impression_text"] == ""
 
 
@@ -64,7 +64,7 @@ class TestClinicalImpressionViewPointHL7:
     def test_basic_hl7_impression(self, hpo_cr):
         hl7 = "OBX||TX|Impression^Impression|1|Appropriate for gestational age\n"
 
-        result = parse_clinical_impression(hl7, "viewpoint_hl7", hpo_parser=hpo_cr)
+        result = parse_clinical_impression(hl7, "viewpoint_hl7", hpo_cr=hpo_cr)
 
         assert "Appropriate" in result["impression_text"]
         assert result["growth_assessment"] == "AGA"
@@ -78,10 +78,8 @@ def test_basic_hl7_impression(self, hpo_cr):
 class TestClinicalImpressionEdgeCases:
     def test_invalid_format(self, hpo_cr):
         with pytest.raises(ValueError):
-            parse_clinical_impression("data", "bad_format", hpo_parser=hpo_cr)
+            parse_clinical_impression("data", "bad_format", hpo_cr=hpo_cr)
 
     def test_non_string_text(self, hpo_cr):
         with pytest.raises(ValueError):
-            parse_clinical_impression(
-                {"bad": "data"}, "viewpoint_text", hpo_parser=hpo_cr
-            )
+            parse_clinical_impression({"bad": "data"}, "viewpoint_text", hpo_cr=hpo_cr)

From 0c33026832ab0c61d040f5861da8722a074b6a19 Mon Sep 17 00:00:00 2001
From: VarenyaJ <varenyajj@gmail.com>
Date: Mon, 26 Jan 2026 15:47:37 -0500
Subject: [PATCH 08/20] docs(notebook): expand ETL demo with clinical text HPO
 extraction

---
 prenatalppkt.ipynb | 171 +++++++++++++++++++++++++++------------------
 1 file changed, 102 insertions(+), 69 deletions(-)

diff --git a/prenatalppkt.ipynb b/prenatalppkt.ipynb
index 2426c29..badee2e 100644
--- a/prenatalppkt.ipynb
+++ b/prenatalppkt.ipynb
@@ -10,7 +10,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 1,
    "id": "d8f2cfce",
    "metadata": {},
    "outputs": [
@@ -67,8 +67,8 @@
       "DEBUG:prenatalppkt.etl.term_bin_factory:Found measurement: BPD\n",
       "DEBUG:prenatalppkt.etl.term_bin_factory:Found measurement: HC\n",
       "DEBUG:prenatalppkt.etl.term_bin_factory:Found measurement: Femur\n",
-      "DEBUG:prenatalppkt.etl.term_bin_factory:Present: {'Femur', 'AC', 'BPD', 'HC'}\n",
-      "DEBUG:prenatalppkt.etl.term_bin_factory:Required: {'Femur', 'AC', 'BPD', 'HC'}\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Present: {'HC', 'Femur', 'AC', 'BPD'}\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Required: {'HC', 'Femur', 'AC', 'BPD'}\n",
       "DEBUG:prenatalppkt.etl.term_bin_factory:All required measurements present\n",
       "INFO:prenatalppkt.etl.extractors.observer:Extracted 4 TermBins from Observer JSON\n"
      ]
@@ -201,7 +201,7 @@
       "    }\n",
       "  ],\n",
       "  \"meta_data\": {\n",
-      "    \"created\": \"2026-01-23T14:56:52.244568Z\",\n",
+      "    \"created\": \"2026-01-26T15:21:08.287048Z\",\n",
       "    \"created_by\": \"prenatalppkt-etl-pipeline\",\n",
       "    \"resources\": [\n",
       "      {\n",
@@ -499,7 +499,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 2,
    "id": "3685f9e5",
    "metadata": {},
    "outputs": [
@@ -556,8 +556,8 @@
       "DEBUG:prenatalppkt.etl.term_bin_factory:Found measurement: BPD\n",
       "DEBUG:prenatalppkt.etl.term_bin_factory:Found measurement: HC\n",
       "DEBUG:prenatalppkt.etl.term_bin_factory:Found measurement: Femur\n",
-      "DEBUG:prenatalppkt.etl.term_bin_factory:Present: {'Femur', 'AC', 'BPD', 'HC'}\n",
-      "DEBUG:prenatalppkt.etl.term_bin_factory:Required: {'Femur', 'AC', 'BPD', 'HC'}\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Present: {'HC', 'Femur', 'AC', 'BPD'}\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Required: {'HC', 'Femur', 'AC', 'BPD'}\n",
       "DEBUG:prenatalppkt.etl.term_bin_factory:All required measurements present\n",
       "INFO:prenatalppkt.etl.extractors.observer:Extracted 4 TermBins from Observer JSON\n"
      ]
@@ -681,7 +681,7 @@
       "    }\n",
       "  ],\n",
       "  \"meta_data\": {\n",
-      "    \"created\": \"2026-01-23T14:56:52.295444Z\",\n",
+      "    \"created\": \"2026-01-26T15:21:08.337338Z\",\n",
       "    \"created_by\": \"prenatalppkt-etl-pipeline\",\n",
       "    \"resources\": [\n",
       "      {\n",
@@ -803,20 +803,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 3,
    "id": "0f79d3fe",
    "metadata": {},
    "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "DEBUG:hpotk.util:Using default encoding 'utf-8'\n",
-      "DEBUG:hpotk.util:Opening /tmp/hp.json\n",
-      "DEBUG:hpotk.util:Looks like a local file: /tmp/hp.json\n",
-      "DEBUG:hpotk.util:Looks like decompressed data\n"
-     ]
-    },
     {
      "name": "stdout",
      "output_type": "stream",
@@ -833,6 +823,10 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
+      "DEBUG:hpotk.util:Using default encoding 'utf-8'\n",
+      "DEBUG:hpotk.util:Opening /tmp/hp.json\n",
+      "DEBUG:hpotk.util:Looks like a local file: /tmp/hp.json\n",
+      "DEBUG:hpotk.util:Looks like decompressed data\n",
       "DEBUG:hpotk.ontology.load.obographs._load:Extracting ontology terms\n",
       "DEBUG:hpotk.ontology.load.obographs._factory:Unknown synonym type http://purl.obolibrary.org/obo/hp#allelic_requirement\n",
       "DEBUG:hpotk.ontology.load.obographs._factory:Unknown synonym type http://purl.obolibrary.org/obo/hp#allelic_requirement\n",
@@ -901,16 +895,10 @@
       "DEBUG:prenatalppkt.etl.term_bin_factory:Found measurement: BPD\n",
       "DEBUG:prenatalppkt.etl.term_bin_factory:Found measurement: HC\n",
       "DEBUG:prenatalppkt.etl.term_bin_factory:Found measurement: Femur\n",
-      "DEBUG:prenatalppkt.etl.term_bin_factory:Present: {'Femur', 'AC', 'BPD', 'HC'}\n",
-      "DEBUG:prenatalppkt.etl.term_bin_factory:Required: {'Femur', 'AC', 'BPD', 'HC'}\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Present: {'HC', 'Femur', 'AC', 'BPD'}\n",
+      "DEBUG:prenatalppkt.etl.term_bin_factory:Required: {'HC', 'Femur', 'AC', 'BPD'}\n",
       "DEBUG:prenatalppkt.etl.term_bin_factory:All required measurements present\n",
-      "INFO:prenatalppkt.etl.extractors.observer:Extracted 4 TermBins from Observer JSON\n",
-      "DEBUG:hpotk.store._github:Pulling tag from https://api.github.com/repos/obophenotype/human-phenotype-ontology/tags\n",
-      "DEBUG:hpotk.store._github:Fetched 30 tags\n",
-      "DEBUG:hpotk.util:Using default encoding 'utf-8'\n",
-      "DEBUG:hpotk.util:Opening /home/varenya/.hpo-toolkit/HP/hp.v2026-01-08.json\n",
-      "DEBUG:hpotk.util:Looks like a local file: /home/varenya/.hpo-toolkit/HP/hp.v2026-01-08.json\n",
-      "DEBUG:hpotk.util:Looks like decompressed data\n"
+      "INFO:prenatalppkt.etl.extractors.observer:Extracted 4 TermBins from Observer JSON\n"
      ]
     },
     {
@@ -948,43 +936,16 @@
       "  Dating Method: None\n",
       "  GA by Ultrasound: None\n",
       "\n",
-      "  --- Clinical Impression ---\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "DEBUG:hpotk.ontology.load.obographs._load:Extracting ontology terms\n",
-      "DEBUG:hpotk.ontology.load.obographs._factory:Unknown synonym type http://purl.obolibrary.org/obo/hp#allelic_requirement\n",
-      "DEBUG:hpotk.ontology.load.obographs._factory:Unknown synonym type http://purl.obolibrary.org/obo/hp#allelic_requirement\n",
-      "DEBUG:hpotk.ontology.load.obographs._factory:Unknown synonym type http://purl.obolibrary.org/obo/hp#allelic_requirement\n",
-      "DEBUG:hpotk.ontology.load.obographs._factory:Unknown synonym type http://purl.obolibrary.org/obo/hp#allelic_requirement\n",
-      "DEBUG:hpotk.ontology.load.obographs._factory:Unknown synonym type http://purl.obolibrary.org/obo/hp#allelic_requirement\n",
-      "DEBUG:hpotk.ontology.load.obographs._factory:Unknown synonym type http://purl.obolibrary.org/obo/hp#allelic_requirement\n",
-      "DEBUG:hpotk.ontology.io.obographs:Missing node type in {'id': 'http://purl.obolibrary.org/obo/BFO_0000051', 'lbl': 'has part', 'meta': {'xrefs': [{'val': 'BFO:0000051'}], 'basicPropertyValues': [{'pred': 'http://www.geneontology.org/formats/oboInOwl#shorthand', 'val': 'has_part'}]}}\n",
-      "DEBUG:hpotk.ontology.io.obographs:Missing node type in {'id': 'http://purl.obolibrary.org/obo/BFO_0000066', 'lbl': 'occurs in', 'meta': {'xrefs': [{'val': 'BFO:0000066'}], 'basicPropertyValues': [{'pred': 'http://www.geneontology.org/formats/oboInOwl#shorthand', 'val': 'occurs_in'}]}}\n",
-      "DEBUG:hpotk.ontology.io.obographs:Missing node type in {'id': 'http://purl.obolibrary.org/obo/RO_0002503', 'lbl': 'towards', 'meta': {'xrefs': [{'val': 'RO:0002503'}], 'basicPropertyValues': [{'pred': 'http://www.geneontology.org/formats/oboInOwl#shorthand', 'val': 'towards'}]}}\n",
-      "DEBUG:hpotk.ontology.io.obographs:Missing node type in {'id': 'http://purl.obolibrary.org/obo/RO_0002573', 'lbl': 'has modifier', 'meta': {'comments': ['placeholder relation to indicate normality/abnormality.'], 'xrefs': [{'val': 'RO:0002180'}], 'basicPropertyValues': [{'pred': 'http://www.geneontology.org/formats/oboInOwl#shorthand', 'val': 'qualifier'}]}}\n",
-      "DEBUG:hpotk.ontology.load.obographs._load:Creating the edge list\n",
-      "DEBUG:hpotk.ontology.load.obographs._load:Building ontology graph\n",
-      "DEBUG:hpotk.graph._factory:Creating ontology graph from 23765 edges\n",
-      "DEBUG:hpotk.graph._factory:Found root HP:0000001\n",
-      "DEBUG:hpotk.graph._factory:Extracted 19408 nodes\n",
-      "DEBUG:hpotk.ontology.load.obographs._load:Assembling the ontology\n",
-      "DEBUG:hpotk.ontology.load.obographs._load:Done\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "  Impression: (not found)\n",
+      "  --- Clinical Impression ---\n",
+      "  Impression (1294 chars): \"The patient was referred for a fetal anatomical survey.    Sonographic measurements were consistent with the expected gestational age. The amniotic fluid volume was normal. A detailed fetal anatomic s...\"\n",
       "  Growth Assessment: None\n",
       "\n",
       "  --- HPO Concept Recognition from Clinical Text ---\n",
-      "  (no impression text to parse)\n",
-      "  (no HPO terms matched)\n",
+      "  Found 4 HPO terms in clinical narrative:\n",
+      "    ? HP:0001274: Agenesis of corpus callosum\n",
+      "    ? HP:0000256: Macrocephaly\n",
+      "    ? HP:0001305: Dandy-Walker malformation\n",
+      "    ? HP:0002119: Ventriculomegaly\n",
       "\n",
       "[STEP 5] Previewing anatomy findings...\n",
       "  Normal (0): ...\n",
@@ -998,15 +959,15 @@
       "  ? Added 4 features from biometry\n",
       "\n",
       "  --- From Clinical Text ---\n",
-      "  ? Added 0 features from clinical text\n",
+      "  ? Added 4 features from clinical text\n",
       "\n",
-      "  Total PhenotypicFeatures: 4\n",
+      "  Total PhenotypicFeatures: 8\n",
       "\n",
       "[STEP 7] Building Phenopacket v2.0...\n",
       "  ? Phenopacket assembled successfully\n",
       "    ID: apple-sally-fetus-1\n",
       "    Subject: fetus-1 at 26w6d\n",
-      "    Features: 4\n",
+      "    Features: 8\n",
       "\n",
       "================================================================================\n",
       "PHENOPACKET v2.0 OUTPUT (JSON)\n",
@@ -1077,10 +1038,62 @@
       "          \"weeks\": 27\n",
       "        }\n",
       "      }\n",
+      "    },\n",
+      "    {\n",
+      "      \"description\": \"[Clinical Impression] Extracted from narrative text via HPO Concept Recognition\",\n",
+      "      \"type\": {\n",
+      "        \"id\": \"HP:0001274\",\n",
+      "        \"label\": \"Agenesis of corpus callosum\"\n",
+      "      },\n",
+      "      \"onset\": {\n",
+      "        \"gestational_age\": {\n",
+      "          \"weeks\": 26,\n",
+      "          \"days\": 6\n",
+      "        }\n",
+      "      }\n",
+      "    },\n",
+      "    {\n",
+      "      \"description\": \"[Clinical Impression] Extracted from narrative text via HPO Concept Recognition\",\n",
+      "      \"type\": {\n",
+      "        \"id\": \"HP:0000256\",\n",
+      "        \"label\": \"Macrocephaly\"\n",
+      "      },\n",
+      "      \"onset\": {\n",
+      "        \"gestational_age\": {\n",
+      "          \"weeks\": 26,\n",
+      "          \"days\": 6\n",
+      "        }\n",
+      "      }\n",
+      "    },\n",
+      "    {\n",
+      "      \"description\": \"[Clinical Impression] Extracted from narrative text via HPO Concept Recognition\",\n",
+      "      \"type\": {\n",
+      "        \"id\": \"HP:0001305\",\n",
+      "        \"label\": \"Dandy-Walker malformation\"\n",
+      "      },\n",
+      "      \"onset\": {\n",
+      "        \"gestational_age\": {\n",
+      "          \"weeks\": 26,\n",
+      "          \"days\": 6\n",
+      "        }\n",
+      "      }\n",
+      "    },\n",
+      "    {\n",
+      "      \"description\": \"[Clinical Impression] Extracted from narrative text via HPO Concept Recognition\",\n",
+      "      \"type\": {\n",
+      "        \"id\": \"HP:0002119\",\n",
+      "        \"label\": \"Ventriculomegaly\"\n",
+      "      },\n",
+      "      \"onset\": {\n",
+      "        \"gestational_age\": {\n",
+      "          \"weeks\": 26,\n",
+      "          \"days\": 6\n",
+      "        }\n",
+      "      }\n",
       "    }\n",
       "  ],\n",
       "  \"meta_data\": {\n",
-      "    \"created\": \"2026-01-23T14:56:57.292752Z\",\n",
+      "    \"created\": \"2026-01-26T15:21:11.051438Z\",\n",
       "    \"created_by\": \"prenatalppkt-etl-pipeline\",\n",
       "    \"resources\": [\n",
       "      {\n",
@@ -1104,11 +1117,11 @@
       "  ? Round-trip validation passed\n",
       "\n",
       "[Summary] Phenotypic Features:\n",
-      "  Total: 4\n",
+      "  Total: 8\n",
       "    From Biometry: 4\n",
-      "    From Clinical Text: 0\n",
+      "    From Clinical Text: 4\n",
       "  Normal (excluded): 4\n",
-      "  Abnormal (observed): 0\n",
+      "  Abnormal (observed): 4\n",
       "\n",
       "[Detail] All Phenotypic Features:\n",
       "------------------------------------------------------------\n",
@@ -1133,6 +1146,26 @@
       "      Status: EXCLUDED (normal)\n",
       "      Onset: 27w0d\n",
       "\n",
+      "  [5] HP:0001274 - Agenesis of corpus callosum\n",
+      "      Source: Clinical Text\n",
+      "      Status: OBSERVED (abnormal)\n",
+      "      Onset: 26w6d\n",
+      "\n",
+      "  [6] HP:0000256 - Macrocephaly\n",
+      "      Source: Clinical Text\n",
+      "      Status: OBSERVED (abnormal)\n",
+      "      Onset: 26w6d\n",
+      "\n",
+      "  [7] HP:0001305 - Dandy-Walker malformation\n",
+      "      Source: Clinical Text\n",
+      "      Status: OBSERVED (abnormal)\n",
+      "      Onset: 26w6d\n",
+      "\n",
+      "  [8] HP:0002119 - Ventriculomegaly\n",
+      "      Source: Clinical Text\n",
+      "      Status: OBSERVED (abnormal)\n",
+      "      Onset: 26w6d\n",
+      "\n",
       "================================================================================\n",
       "SUCCESS: Phenopacket saved to output/apple_sally_phenopacket_expanded.json\n",
       "================================================================================\n"

From 3527e2d7099bd0612f4b546cf7aabeb3b4b94592 Mon Sep 17 00:00:00 2001
From: VarenyaJ <varenyajj@gmail.com>
Date: Mon, 2 Feb 2026 02:41:48 -0500
Subject: [PATCH 09/20] feat(etl): add fetal anatomy section parser with HPO
 extraction

---
 .../etl/sections/fetal_anatomy.py             | 265 +++++++++++++++++-
 tests/etl/sections/test_fetal_anatomy.py      | 219 +++++++++++++++
 2 files changed, 469 insertions(+), 15 deletions(-)
 create mode 100644 tests/etl/sections/test_fetal_anatomy.py

diff --git a/src/prenatalppkt/etl/sections/fetal_anatomy.py b/src/prenatalppkt/etl/sections/fetal_anatomy.py
index 694eab5..5613aef 100644
--- a/src/prenatalppkt/etl/sections/fetal_anatomy.py
+++ b/src/prenatalppkt/etl/sections/fetal_anatomy.py
@@ -1,21 +1,256 @@
 """
-Fetal anatomy section parser (SKELETON).
+Fetal anatomy section parser.
 
-TODO @VarenyaJ: Parse anatomy checklist (normal/abnormal/not visualized)
-TODO @VarenyaJ: Map anatomical findings to HPO terms
-TODO @VarenyaJ: Handle detailed anatomy subsections
+Extracts structured anatomy findings and free-text anatomy narrative,
+with optional HPO term extraction from anomaly descriptions.
 """
 
-from typing import Dict
+from __future__ import annotations
 
+import json
+import re
+from typing import Dict, List, Union
 
-def parse_fetal_anatomy(data: str, source_format: str = "viewpoint_text") -> Dict:
-    """Extract fetal anatomy assessment."""
-    return {
-        "structures_examined": [],
-        "normal_structures": [],
-        "abnormal_structures": [],
-        "not_visualized": [],
-        "anomalies": [],
-        "hpo_terms": [],
-    }
+
+def parse_fetal_anatomy(
+   data: Union[str, Dict], source_format: str, hpo_cr=None
+) -> Dict:
+   """
+   Parse fetal anatomy section.
+
+   Supports:
+       - observer_json
+       - viewpoint_text (skeleton)
+       - viewpoint_hl7 (skeleton)
+
+   Args:
+       data: Raw input data (JSON string, dict, or text)
+       source_format: One of "observer_json", "viewpoint_text", "viewpoint_hl7"
+       hpo_cr: Optional HpoExactConceptRecognizer for HPO term extraction.
+               If provided, will extract HPO terms from anomaly descriptions.
+
+   Returns:
+       Dict with keys:
+           - anatomy_text: str - Free text anatomy narrative
+           - normal_structures: List[str] - Structures marked Normal
+           - abnormal_structures: List[str] - Structures marked Abnormal
+           - not_visualized: List[str] - Structures marked Unseen
+           - anomalies: List[Dict] - Specific anomaly findings
+           - hpo_terms: List[SimpleTerm] - HPO terms extracted via CR
+           - source_format: str
+   """
+   if source_format == "observer_json":
+       if isinstance(data, str):
+           data = json.loads(data)
+       return _parse_observer_anatomy(data, hpo_cr)
+
+   elif source_format == "viewpoint_text":
+       if not isinstance(data, str):
+           raise ValueError("viewpoint_text data must be a string")
+       return _parse_viewpoint_text_anatomy(data, hpo_cr)
+
+   elif source_format == "viewpoint_hl7":
+       if not isinstance(data, str):
+           raise ValueError("viewpoint_hl7 data must be a string")
+       return _parse_viewpoint_hl7_anatomy(data, hpo_cr)
+
+   else:
+       raise ValueError(f"Unsupported source_format: {source_format}")
+
+
+# ---------------------------------------------------------------------
+# Observer JSON
+# ---------------------------------------------------------------------
+
+
+def _classify_structure(
+   label: str,
+   state: str,
+   normal: List[str],
+   abnormal: List[str],
+   unseen: List[str],
+) -> None:
+   """Classify a structure into the appropriate list based on state."""
+   if not label:
+       return
+   if state == "Normal" and label not in normal:
+       normal.append(label)
+   elif state == "Abnormal" and label not in abnormal:
+       abnormal.append(label)
+   elif state == "Unseen" and label not in unseen:
+       unseen.append(label)
+
+
+def _process_anatomy_item(
+   item: Dict,
+   normal: List[str],
+   abnormal: List[str],
+   unseen: List[str],
+   anomalies: List[Dict],
+) -> None:
+   """Process a single anatomy item, extracting structures and anomalies."""
+   main = item.get("main", {})
+   label = main.get("label", "")
+   state = main.get("anat_state", "")
+
+   # Classify main structure
+   _classify_structure(label, state, normal, abnormal, unseen)
+
+   # Process detail sub-structures
+   for detail in item.get("detail", []):
+       detail_label = detail.get("label", "")
+       detail_state = detail.get("anat_det_state", "")
+       _classify_structure(detail_label, detail_state, normal, abnormal, unseen)
+
+   # Process anomalies
+   for anom in item.get("anomalies", []):
+       description = anom.get("description", "")
+       if description:
+           anomalies.append({
+               "structure": label,
+               "description": description,
+               "variant_type": anom.get("abnormal_or_normal_variant", "Abnormal"),
+           })
+
+
+def _extract_hpo_terms(anatomy_text: str, anomalies: List[Dict], hpo_cr) -> List:
+   """Extract HPO terms from anatomy text and anomaly descriptions."""
+   if hpo_cr is None or not hasattr(hpo_cr, "parse"):
+       return []
+
+   all_anomaly_text = " ".join(
+       a["description"] for a in anomalies if a.get("description")
+   )
+   combined_text = f"{anatomy_text} {all_anomaly_text}".strip()
+
+   if not combined_text:
+       return []
+
+   return hpo_cr.parse(combined_text)
+
+
+def _parse_observer_anatomy(json_data: Dict, hpo_cr=None) -> Dict:
+   """
+   Extract anatomy findings from Observer JSON.
+
+   Paths:
+   - fetuses[i].fetus.anatomy_text - free text narrative
+   - fetuses[i].fetus.anatomy[] - structured findings
+     - main.label - structure name (e.g., "Head", "Face")
+     - main.anat_state - "Normal", "Abnormal", or "Unseen"
+     - detail[].label - sub-structure name
+     - detail[].anat_det_state - sub-structure state
+     - anomalies[].description - specific finding text
+     - anomalies[].abnormal_or_normal_variant - classification
+   """
+   fetuses = json_data.get("fetuses", [])
+   if not fetuses:
+       return _empty_result("observer_json")
+
+   fetus_block = fetuses[0].get("fetus", {})
+   anatomy_text = fetus_block.get("anatomy_text", "")
+
+   normal_structures: List[str] = []
+   abnormal_structures: List[str] = []
+   not_visualized: List[str] = []
+   anomalies: List[Dict] = []
+
+   for item in fetus_block.get("anatomy", []):
+       _process_anatomy_item(
+           item, normal_structures, abnormal_structures, not_visualized, anomalies
+       )
+
+   hpo_terms = _extract_hpo_terms(anatomy_text, anomalies, hpo_cr)
+
+   return {
+       "anatomy_text": anatomy_text,
+       "normal_structures": normal_structures,
+       "abnormal_structures": abnormal_structures,
+       "not_visualized": not_visualized,
+       "anomalies": anomalies,
+       "hpo_terms": hpo_terms,
+       "source_format": "observer_json",
+   }
+
+
+# ---------------------------------------------------------------------
+# ViewPoint Text (SKELETON)
+# ---------------------------------------------------------------------
+
+
+def _parse_viewpoint_text_anatomy(text: str, hpo_cr=None) -> Dict:
+   """
+   Extract anatomy from ViewPoint text reports.
+
+   Expected pattern:
+       Fetal Anatomy
+       =============
+       The following structures appear normal:
+       Cranium. Brain. Face. ...
+
+       The following structures appear abnormal:
+       GI tract: dilated bowel loops.
+
+       The following structures could not be adequately visualized:
+       LVOT view. RVOT view. ...
+
+   TODO @VarenyaJ: Implement full parsing
+   """
+   # Skeleton: Extract the Fetal Anatomy section
+   pattern = re.compile(
+       r"Fetal Anatomy\s*\n=+\n(?P<body>.*?)(?:\n[A-Z][^\n]*\n=+|\Z)",
+       re.DOTALL | re.IGNORECASE,
+   )
+   match = pattern.search(text)
+   anatomy_text = match.group("body").strip() if match else ""
+
+   # TODO @VarenyaJ: Parse "appear normal", "appear abnormal", "could not be visualized" lists
+
+   hpo_terms = []
+   if anatomy_text and hpo_cr is not None and hasattr(hpo_cr, "parse"):
+       hpo_terms = hpo_cr.parse(anatomy_text)
+
+   return {
+       "anatomy_text": anatomy_text,
+       "normal_structures": [],
+       "abnormal_structures": [],
+       "not_visualized": [],
+       "anomalies": [],
+       "hpo_terms": hpo_terms,
+       "source_format": "viewpoint_text",
+   }
+
+
+# ---------------------------------------------------------------------
+# ViewPoint HL7 (SKELETON)
+# ---------------------------------------------------------------------
+
+
+def _parse_viewpoint_hl7_anatomy(hl7: str, hpo_cr=None) -> Dict:
+   """
+   Extract anatomy from HL7 ORU^R01 messages.
+
+   Note: Anatomy is typically not encoded in discrete HL7 fields.
+   This is a skeleton for potential future implementation.
+
+   TODO @VarenyaJ: Implement if HL7 anatomy encoding is discovered
+   """
+   return _empty_result("viewpoint_hl7")
+
+
+# ---------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------
+
+
+def _empty_result(source_format: str) -> Dict:
+   """Return empty result structure."""
+   return {
+       "anatomy_text": "",
+       "normal_structures": [],
+       "abnormal_structures": [],
+       "not_visualized": [],
+       "anomalies": [],
+       "hpo_terms": [],
+       "source_format": source_format,
+   }
\ No newline at end of file
diff --git a/tests/etl/sections/test_fetal_anatomy.py b/tests/etl/sections/test_fetal_anatomy.py
new file mode 100644
index 0000000..d2f42ff
--- /dev/null
+++ b/tests/etl/sections/test_fetal_anatomy.py
@@ -0,0 +1,219 @@
+"""Tests for fetal anatomy section parser."""
+
+import json
+import pytest
+
+from prenatalppkt.etl.sections.fetal_anatomy import parse_fetal_anatomy
+
+
+# ---------------------------------------------------------------------
+# Observer JSON
+# ---------------------------------------------------------------------
+
+
+class TestFetalAnatomyObserver:
+    def test_basic_anatomy_structures(self):
+        """Test parsing of normal/abnormal/unseen structures."""
+        data = {
+            "fetuses": [
+                {
+                    "fetus": {
+                        "anatomy_text": "The fetal anatomy was assessed.",
+                        "anatomy": [
+                            {
+                                "main": {"label": "Head", "anat_state": "Normal"},
+                                "detail": [],
+                                "anomalies": [],
+                            },
+                            {
+                                "main": {"label": "Heart", "anat_state": "Abnormal"},
+                                "detail": [],
+                                "anomalies": [],
+                            },
+                            {
+                                "main": {"label": "Spine", "anat_state": "Unseen"},
+                                "detail": [],
+                                "anomalies": [],
+                            },
+                        ],
+                    }
+                }
+            ]
+        }
+
+        result = parse_fetal_anatomy(data, "observer_json")
+
+        assert "Head" in result["normal_structures"]
+        assert "Heart" in result["abnormal_structures"]
+        assert "Spine" in result["not_visualized"]
+        assert result["anatomy_text"] == "The fetal anatomy was assessed."
+        assert result["source_format"] == "observer_json"
+
+    def test_anatomy_with_anomalies(self):
+        """Test parsing of specific anomaly descriptions."""
+        data = {
+            "fetuses": [
+                {
+                    "fetus": {
+                        "anatomy_text": "",
+                        "anatomy": [
+                            {
+                                "main": {"label": "Head", "anat_state": "Abnormal"},
+                                "detail": [
+                                    {
+                                        "label": "Cerebellum",
+                                        "anat_det_state": "Abnormal",
+                                    }
+                                ],
+                                "anomalies": [
+                                    {
+                                        "description": "Dandy Walker",
+                                        "abnormal_or_normal_variant": "Abnormal",
+                                    }
+                                ],
+                            }
+                        ],
+                    }
+                }
+            ]
+        }
+
+        result = parse_fetal_anatomy(data, "observer_json")
+
+        assert "Head" in result["abnormal_structures"]
+        assert "Cerebellum" in result["abnormal_structures"]
+        assert len(result["anomalies"]) == 1
+        assert result["anomalies"][0]["structure"] == "Head"
+        assert result["anomalies"][0]["description"] == "Dandy Walker"
+        assert result["anomalies"][0]["variant_type"] == "Abnormal"
+
+    def test_anatomy_with_hpo_extraction(self, hpo_cr):
+        """Test HPO term extraction from anomaly descriptions."""
+        data = {
+            "fetuses": [
+                {
+                    "fetus": {
+                        "anatomy_text": "Findings consistent with Dandy-Walker malformation.",
+                        "anatomy": [
+                            {
+                                "main": {"label": "Brain", "anat_state": "Abnormal"},
+                                "detail": [],
+                                "anomalies": [
+                                    {"description": "Ventriculomegaly noted"}
+                                ],
+                            }
+                        ],
+                    }
+                }
+            ]
+        }
+
+        result = parse_fetal_anatomy(data, "observer_json", hpo_cr=hpo_cr)
+
+        # Should find HPO terms from the combined text
+        assert len(result["hpo_terms"]) > 0
+        hpo_ids = [t.hpo_id for t in result["hpo_terms"]]
+        # Dandy-Walker malformation is HP:0001305
+        assert "HP:0001305" in hpo_ids or "HP:0002119" in hpo_ids  # Ventriculomegaly
+
+    def test_anatomy_json_string_input(self):
+        """Test that JSON string input is handled correctly."""
+        data = json.dumps(
+            {
+                "fetuses": [
+                    {
+                        "fetus": {
+                            "anatomy_text": "Normal anatomy.",
+                            "anatomy": [
+                                {"main": {"label": "Face", "anat_state": "Normal"}}
+                            ],
+                        }
+                    }
+                ]
+            }
+        )
+
+        result = parse_fetal_anatomy(data, "observer_json")
+
+        assert "Face" in result["normal_structures"]
+
+    def test_empty_fetuses(self):
+        """Test handling of empty fetuses array."""
+        data = {"fetuses": []}
+
+        result = parse_fetal_anatomy(data, "observer_json")
+
+        assert result["normal_structures"] == []
+        assert result["abnormal_structures"] == []
+        assert result["anomalies"] == []
+
+    def test_missing_anatomy_key(self):
+        """Test handling of fetus without anatomy key."""
+        data = {"fetuses": [{"fetus": {"anatomy_text": "Some text."}}]}
+
+        result = parse_fetal_anatomy(data, "observer_json")
+
+        assert result["anatomy_text"] == "Some text."
+        assert result["normal_structures"] == []
+
+
+# ---------------------------------------------------------------------
+# ViewPoint Text (Skeleton)
+# ---------------------------------------------------------------------
+
+
+class TestFetalAnatomyViewPointText:
+    def test_skeleton_returns_empty_structures(self):
+        """Test that skeleton implementation returns expected structure."""
+        text = """Fetal Anatomy
+=============
+The following structures appear normal:
+Cranium. Brain. Face.
+"""
+
+        result = parse_fetal_anatomy(text, "viewpoint_text")
+
+        assert result["source_format"] == "viewpoint_text"
+        assert isinstance(result["normal_structures"], list)
+        assert isinstance(result["abnormal_structures"], list)
+        # Skeleton extracts anatomy_text but doesn't parse structure lists yet
+        assert "normal" in result["anatomy_text"].lower()
+
+
+# ---------------------------------------------------------------------
+# ViewPoint HL7 (Skeleton)
+# ---------------------------------------------------------------------
+
+
+class TestFetalAnatomyViewPointHL7:
+    def test_skeleton_returns_empty(self):
+        """Test that HL7 skeleton returns empty result."""
+        hl7 = "MSH|...\nOBX|..."
+
+        result = parse_fetal_anatomy(hl7, "viewpoint_hl7")
+
+        assert result["source_format"] == "viewpoint_hl7"
+        assert result["normal_structures"] == []
+        assert result["anatomy_text"] == ""
+
+
+# ---------------------------------------------------------------------
+# Edge Cases
+# ---------------------------------------------------------------------
+
+
+class TestFetalAnatomyEdgeCases:
+    def test_invalid_format(self):
+        """Test that invalid format raises ValueError."""
+        with pytest.raises(ValueError):
+            parse_fetal_anatomy("data", "invalid_format")
+
+    def test_non_string_viewpoint_text(self):
+        """Test that non-string viewpoint_text raises ValueError."""
+        with pytest.raises(ValueError):
+            parse_fetal_anatomy({"not": "string"}, "viewpoint_text")
+
+    def test_non_string_viewpoint_hl7(self):
+        """Test that non-string viewpoint_hl7 raises ValueError."""
+        with pytest.raises(ValueError):
+            parse_fetal_anatomy({"not": "string"}, "viewpoint_hl7")

From 7bfc89e554164894eddf45d2545d33cb096bb7be Mon Sep 17 00:00:00 2001
From: VarenyaJ <varenyajj@gmail.com>
Date: Mon, 2 Feb 2026 02:41:56 -0500
Subject: [PATCH 10/20] feat(etl): add estimated fetal weight section parser
 with growth classification

---
 .../etl/sections/estimated_fetal_weight.py    | 249 +++++++++++++++
 .../sections/test_estimated_fetal_weight.py   | 295 ++++++++++++++++++
 2 files changed, 544 insertions(+)
 create mode 100644 src/prenatalppkt/etl/sections/estimated_fetal_weight.py
 create mode 100644 tests/etl/sections/test_estimated_fetal_weight.py

diff --git a/src/prenatalppkt/etl/sections/estimated_fetal_weight.py b/src/prenatalppkt/etl/sections/estimated_fetal_weight.py
new file mode 100644
index 0000000..04ac910
--- /dev/null
+++ b/src/prenatalppkt/etl/sections/estimated_fetal_weight.py
@@ -0,0 +1,249 @@
+"""
+Estimated fetal weight (EFW) section parser.
+
+Extracts EFW values, percentiles, and growth classification.
+"""
+
+from __future__ import annotations
+
+import json
+import re
+from typing import Dict, List, Optional, Union
+
+
+def parse_estimated_fetal_weight(data: Union[str, Dict], source_format: str) -> Dict:
+    """
+    Parse estimated fetal weight section.
+
+    Supports:
+        - observer_json
+        - viewpoint_text (skeleton)
+        - viewpoint_hl7 (skeleton)
+
+    Args:
+        data: Raw input data (JSON string, dict, or text)
+        source_format: One of "observer_json", "viewpoint_text", "viewpoint_hl7"
+
+    Returns:
+        Dict with keys:
+            - efw_grams: float - Primary EFW value in grams
+            - percentile: float - Percentile for primary EFW
+            - method: str - Calculation method (e.g., "Hadlock (AC, FL, HC)")
+            - within_normal_range: bool - True if 10th-90th percentile
+            - growth_category: str - "SGA", "AGA", or "LGA"
+            - all_estimates: List[Dict] - All EFW calculations
+            - source_format: str
+    """
+    if source_format == "observer_json":
+        if isinstance(data, str):
+            data = json.loads(data)
+        return _parse_observer_efw(data)
+
+    elif source_format == "viewpoint_text":
+        if not isinstance(data, str):
+            raise ValueError("viewpoint_text data must be a string")
+        return _parse_viewpoint_text_efw(data)
+
+    elif source_format == "viewpoint_hl7":
+        if not isinstance(data, str):
+            raise ValueError("viewpoint_hl7 data must be a string")
+        return _parse_viewpoint_hl7_efw(data)
+
+    else:
+        raise ValueError(f"Unsupported source_format: {source_format}")
+
+
+# ---------------------------------------------------------------------
+# Observer JSON
+# ---------------------------------------------------------------------
+
+
+def _parse_observer_efw(json_data: Dict) -> Dict:
+    """
+    Extract EFW from Observer JSON.
+
+    Path: fetuses[i].efws[]
+    - fetus_number: int
+    - label: str - method description (e.g., "EFW (AC, FL, HC)")
+    - value: float - weight in grams
+    - calculated_percentile: float
+    - percentile_for_display: str
+    - print_in_report: int - 1 if this is the primary EFW
+    - range: str - optional expected range
+    """
+    all_estimates: List[Dict] = []
+    primary_efw: Optional[Dict] = None
+
+    # Get first fetus
+    fetuses = json_data.get("fetuses", [])
+    if not fetuses:
+        return _empty_result("observer_json")
+
+    efws = fetuses[0].get("efws", [])
+    if not efws:
+        return _empty_result("observer_json")
+
+    for efw in efws:
+        label = efw.get("label", "")
+        value = efw.get("value", 0)
+        percentile = efw.get("calculated_percentile", 0)
+        print_in_report = efw.get("print_in_report", 0)
+
+        # Extract method from label (e.g., "EFW (AC, FL, HC)" -> "AC, FL, HC")
+        method = _extract_method_from_label(label)
+
+        estimate = {
+            "method": method,
+            "grams": round(value, 1),
+            "percentile": round(percentile, 1),
+            "print_in_report": bool(print_in_report),
+        }
+        all_estimates.append(estimate)
+
+        # Select primary EFW (print_in_report=1 or first one)
+        if print_in_report == 1 and primary_efw is None:
+            primary_efw = estimate
+
+    # Fallback to first estimate if none marked for report
+    if primary_efw is None and all_estimates:
+        primary_efw = all_estimates[0]
+
+    if primary_efw is None:
+        return _empty_result("observer_json")
+
+    # Classify growth
+    percentile = primary_efw["percentile"]
+    growth_category = _classify_growth(percentile)
+    within_normal = 10 <= percentile <= 90
+
+    return {
+        "efw_grams": primary_efw["grams"],
+        "percentile": primary_efw["percentile"],
+        "method": primary_efw["method"],
+        "within_normal_range": within_normal,
+        "growth_category": growth_category,
+        "all_estimates": all_estimates,
+        "source_format": "observer_json",
+    }
+
+
+# ---------------------------------------------------------------------
+# ViewPoint Text (SKELETON)
+# ---------------------------------------------------------------------
+
+
+def _parse_viewpoint_text_efw(text: str) -> Dict:
+    """
+    Extract EFW from ViewPoint text reports.
+
+    Expected patterns:
+        EFW                    2,042    g                    2%
+        EFW (lb,oz)           4 lb 8    oz
+        EFW by                Hadlock (BPD-HC-AC-FL)
+
+    TODO @VarenyaJ: Implement full parsing
+    """
+    efw_grams = None
+    percentile = None
+    method = None
+
+    # Try to find EFW line with grams
+    efw_pattern = re.compile(r"EFW\s+([0-9,]+)\s+g\s+(\d+)%", re.IGNORECASE)
+    match = efw_pattern.search(text)
+    if match:
+        efw_grams = float(match.group(1).replace(",", ""))
+        percentile = float(match.group(2))
+
+    # Try to find method
+    method_pattern = re.compile(r"EFW by\s+(.+)", re.IGNORECASE)
+    method_match = method_pattern.search(text)
+    if method_match:
+        method = method_match.group(1).strip()
+
+    if efw_grams is None:
+        return _empty_result("viewpoint_text")
+
+    growth_category = _classify_growth(percentile) if percentile else "Unknown"
+    within_normal = 10 <= percentile <= 90 if percentile else False
+
+    return {
+        "efw_grams": efw_grams,
+        "percentile": percentile,
+        "method": method or "Unknown",
+        "within_normal_range": within_normal,
+        "growth_category": growth_category,
+        "all_estimates": [
+            {
+                "method": method or "Unknown",
+                "grams": efw_grams,
+                "percentile": percentile,
+            }
+        ],
+        "source_format": "viewpoint_text",
+    }
+
+
+# ---------------------------------------------------------------------
+# ViewPoint HL7 (SKELETON)
+# ---------------------------------------------------------------------
+
+
+def _parse_viewpoint_hl7_efw(hl7: str) -> Dict:
+    """
+    Extract EFW from HL7 ORU^R01 messages.
+
+    Note: EFW may not be present in all HL7 exports.
+    This is a skeleton for potential future implementation.
+
+    TODO @VarenyaJ: Implement if HL7 EFW encoding is discovered
+    """
+    return _empty_result("viewpoint_hl7")
+
+
+# ---------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------
+
+
+def _extract_method_from_label(label: str) -> str:
+    """
+    Extract method from EFW label.
+
+    Examples:
+        "EFW (AC, FL, HC)" -> "Hadlock (AC, FL, HC)"
+        "EFW (AC, FL)" -> "Hadlock (AC, FL)"
+    """
+    match = re.search(r"\(([^)]+)\)", label)
+    if match:
+        params = match.group(1)
+        return f"Hadlock ({params})"
+    return "Hadlock"
+
+
+def _classify_growth(percentile: float) -> str:
+    """
+    Classify fetal growth based on EFW percentile.
+
+    - SGA (Small for Gestational Age): <10th percentile
+    - AGA (Appropriate for Gestational Age): 10th-90th percentile
+    - LGA (Large for Gestational Age): >90th percentile
+    """
+    if percentile < 10:
+        return "SGA"
+    elif percentile > 90:
+        return "LGA"
+    else:
+        return "AGA"
+
+
+def _empty_result(source_format: str) -> Dict:
+    """Return empty result structure."""
+    return {
+        "efw_grams": None,
+        "percentile": None,
+        "method": None,
+        "within_normal_range": None,
+        "growth_category": None,
+        "all_estimates": [],
+        "source_format": source_format,
+    }
diff --git a/tests/etl/sections/test_estimated_fetal_weight.py b/tests/etl/sections/test_estimated_fetal_weight.py
new file mode 100644
index 0000000..68bc210
--- /dev/null
+++ b/tests/etl/sections/test_estimated_fetal_weight.py
@@ -0,0 +1,295 @@
+"""Tests for estimated fetal weight section parser."""
+
+import json
+import pytest
+
+from prenatalppkt.etl.sections.estimated_fetal_weight import (
+    parse_estimated_fetal_weight,
+)
+
+
+# ---------------------------------------------------------------------
+# Observer JSON
+# ---------------------------------------------------------------------
+
+
+class TestEstimatedFetalWeightObserver:
+    def test_basic_efw(self):
+        """Test parsing of basic EFW data."""
+        data = {
+            "fetuses": [
+                {
+                    "efws": [
+                        {
+                            "fetus_number": 1,
+                            "label": "EFW (AC, FL, HC)",
+                            "value": 1014.828,
+                            "calculated_percentile": 55.6,
+                            "percentile_for_display": "56%",
+                            "print_in_report": 1,
+                        }
+                    ]
+                }
+            ]
+        }
+
+        result = parse_estimated_fetal_weight(data, "observer_json")
+
+        assert result["efw_grams"] == 1014.8
+        assert result["percentile"] == 55.6
+        assert result["method"] == "Hadlock (AC, FL, HC)"
+        assert result["within_normal_range"] is True
+        assert result["growth_category"] == "AGA"
+        assert result["source_format"] == "observer_json"
+
+    def test_multiple_efw_estimates(self):
+        """Test that primary EFW is selected correctly."""
+        data = {
+            "fetuses": [
+                {
+                    "efws": [
+                        {
+                            "label": "EFW (AC, FL, HC)",
+                            "value": 1014.828,
+                            "calculated_percentile": 55.6,
+                            "print_in_report": 1,
+                        },
+                        {
+                            "label": "EFW (AC, FL)",
+                            "value": 1042.214,
+                            "calculated_percentile": 63.7,
+                            "print_in_report": 0,
+                        },
+                        {
+                            "label": "EFW (AC, BPD)",
+                            "value": 1000.887,
+                            "calculated_percentile": 51.2,
+                            "print_in_report": 0,
+                        },
+                    ]
+                }
+            ]
+        }
+
+        result = parse_estimated_fetal_weight(data, "observer_json")
+
+        # Should select the one with print_in_report=1
+        assert result["efw_grams"] == 1014.8
+        assert len(result["all_estimates"]) == 3
+
+    def test_sga_classification(self):
+        """Test SGA (Small for Gestational Age) classification."""
+        data = {
+            "fetuses": [
+                {
+                    "efws": [
+                        {
+                            "label": "EFW (AC, FL, HC)",
+                            "value": 800.0,
+                            "calculated_percentile": 5.0,
+                            "print_in_report": 1,
+                        }
+                    ]
+                }
+            ]
+        }
+
+        result = parse_estimated_fetal_weight(data, "observer_json")
+
+        assert result["growth_category"] == "SGA"
+        assert result["within_normal_range"] is False
+
+    def test_lga_classification(self):
+        """Test LGA (Large for Gestational Age) classification."""
+        data = {
+            "fetuses": [
+                {
+                    "efws": [
+                        {
+                            "label": "EFW (AC, FL, HC)",
+                            "value": 2500.0,
+                            "calculated_percentile": 95.0,
+                            "print_in_report": 1,
+                        }
+                    ]
+                }
+            ]
+        }
+
+        result = parse_estimated_fetal_weight(data, "observer_json")
+
+        assert result["growth_category"] == "LGA"
+        assert result["within_normal_range"] is False
+
+    def test_json_string_input(self):
+        """Test that JSON string input is handled correctly."""
+        data = json.dumps(
+            {
+                "fetuses": [
+                    {
+                        "efws": [
+                            {
+                                "label": "EFW (AC, FL)",
+                                "value": 1200.0,
+                                "calculated_percentile": 50.0,
+                                "print_in_report": 1,
+                            }
+                        ]
+                    }
+                ]
+            }
+        )
+
+        result = parse_estimated_fetal_weight(data, "observer_json")
+
+        assert result["efw_grams"] == 1200.0
+
+    def test_empty_fetuses(self):
+        """Test handling of empty fetuses array."""
+        data = {"fetuses": []}
+
+        result = parse_estimated_fetal_weight(data, "observer_json")
+
+        assert result["efw_grams"] is None
+        assert result["all_estimates"] == []
+
+    def test_missing_efws_key(self):
+        """Test handling of fetus without efws key."""
+        data = {"fetuses": [{"fetus": {}}]}
+
+        result = parse_estimated_fetal_weight(data, "observer_json")
+
+        assert result["efw_grams"] is None
+
+    def test_fallback_to_first_estimate(self):
+        """Test fallback when no estimate has print_in_report=1."""
+        data = {
+            "fetuses": [
+                {
+                    "efws": [
+                        {
+                            "label": "EFW (AC, FL)",
+                            "value": 1100.0,
+                            "calculated_percentile": 45.0,
+                            "print_in_report": 0,
+                        },
+                        {
+                            "label": "EFW (AC, BPD)",
+                            "value": 1050.0,
+                            "calculated_percentile": 40.0,
+                            "print_in_report": 0,
+                        },
+                    ]
+                }
+            ]
+        }
+
+        result = parse_estimated_fetal_weight(data, "observer_json")
+
+        # Should fall back to first estimate
+        assert result["efw_grams"] == 1100.0
+
+
+# ---------------------------------------------------------------------
+# ViewPoint Text (Skeleton)
+# ---------------------------------------------------------------------
+
+
+class TestEstimatedFetalWeightViewPointText:
+    def test_skeleton_returns_structure(self):
+        """Test that skeleton implementation returns expected structure."""
+        text = "EFW   2,042   g   2%\nEFW by   Hadlock"
+
+        result = parse_estimated_fetal_weight(text, "viewpoint_text")
+
+        assert result["source_format"] == "viewpoint_text"
+        # Skeleton may parse basic patterns
+        assert isinstance(result["all_estimates"], list)
+
+    def test_no_efw_in_text(self):
+        """Test handling when no EFW is found."""
+        text = "Fetal Biometry\nHC 250 mm"
+
+        result = parse_estimated_fetal_weight(text, "viewpoint_text")
+
+        assert result["efw_grams"] is None
+
+
+# ---------------------------------------------------------------------
+# ViewPoint HL7 (Skeleton)
+# ---------------------------------------------------------------------
+
+
+class TestEstimatedFetalWeightViewPointHL7:
+    def test_skeleton_returns_empty(self):
+        """Test that HL7 skeleton returns empty result."""
+        hl7 = "MSH|...\nOBX|..."
+
+        result = parse_estimated_fetal_weight(hl7, "viewpoint_hl7")
+
+        assert result["source_format"] == "viewpoint_hl7"
+        assert result["efw_grams"] is None
+
+
+# ---------------------------------------------------------------------
+# Edge Cases
+# ---------------------------------------------------------------------
+
+
+class TestEstimatedFetalWeightEdgeCases:
+    def test_invalid_format(self):
+        """Test that invalid format raises ValueError."""
+        with pytest.raises(ValueError):
+            parse_estimated_fetal_weight("data", "invalid_format")
+
+    def test_non_string_viewpoint_text(self):
+        """Test that non-string viewpoint_text raises ValueError."""
+        with pytest.raises(ValueError):
+            parse_estimated_fetal_weight({"not": "string"}, "viewpoint_text")
+
+    def test_non_string_viewpoint_hl7(self):
+        """Test that non-string viewpoint_hl7 raises ValueError."""
+        with pytest.raises(ValueError):
+            parse_estimated_fetal_weight({"not": "string"}, "viewpoint_hl7")
+
+    def test_boundary_aga_at_10_percentile(self):
+        """Test AGA classification at exactly 10th percentile."""
+        data = {
+            "fetuses": [
+                {
+                    "efws": [
+                        {
+                            "label": "EFW",
+                            "value": 900.0,
+                            "calculated_percentile": 10.0,
+                            "print_in_report": 1,
+                        }
+                    ]
+                }
+            ]
+        }
+
+        result = parse_estimated_fetal_weight(data, "observer_json")
+        assert result["growth_category"] == "AGA"
+        assert result["within_normal_range"] is True
+
+    def test_boundary_aga_at_90_percentile(self):
+        """Test AGA classification at exactly 90th percentile."""
+        data = {
+            "fetuses": [
+                {
+                    "efws": [
+                        {
+                            "label": "EFW",
+                            "value": 2000.0,
+                            "calculated_percentile": 90.0,
+                            "print_in_report": 1,
+                        }
+                    ]
+                }
+            ]
+        }
+
+        result = parse_estimated_fetal_weight(data, "observer_json")
+        assert result["growth_category"] == "AGA"
+        assert result["within_normal_range"] is True

From 52cd807bc791f024373e2b4e4973b0137078dccb Mon Sep 17 00:00:00 2001
From: VarenyaJ <varenyajj@gmail.com>
Date: Mon, 2 Feb 2026 02:42:19 -0500
Subject: [PATCH 11/20] feat(etl): add fetal ratios section parser with
 proportionality assessment

---
 src/prenatalppkt/etl/sections/fetal_ratios.py | 246 ++++++++++++++++
 tests/etl/sections/test_fetal_ratios.py       | 271 ++++++++++++++++++
 2 files changed, 517 insertions(+)
 create mode 100644 src/prenatalppkt/etl/sections/fetal_ratios.py
 create mode 100644 tests/etl/sections/test_fetal_ratios.py

diff --git a/src/prenatalppkt/etl/sections/fetal_ratios.py b/src/prenatalppkt/etl/sections/fetal_ratios.py
new file mode 100644
index 0000000..732b715
--- /dev/null
+++ b/src/prenatalppkt/etl/sections/fetal_ratios.py
@@ -0,0 +1,246 @@
+"""
+Fetal ratios section parser.
+
+Extracts biometric ratios (HC/AC, FL/BPD, FL/AC) and assesses proportionality.
+"""
+
+from __future__ import annotations
+
+import json
+import re
+from typing import Dict, List, Optional, Tuple, Union
+
+
+def parse_fetal_ratios(data: Union[str, Dict], source_format: str) -> Dict:
+    """
+    Parse fetal ratios section.
+
+    Supports:
+        - observer_json
+        - viewpoint_text (skeleton)
+        - viewpoint_hl7 (skeleton)
+
+    Args:
+        data: Raw input data (JSON string, dict, or text)
+        source_format: One of "observer_json", "viewpoint_text", "viewpoint_hl7"
+
+    Returns:
+        Dict with keys:
+            - ratios: List[Dict] - Individual ratio data
+            - all_within_range: bool - True if all ratios are normal
+            - proportionality_assessment: str - "Normal" or "Asymmetric"
+            - source_format: str
+    """
+    if source_format == "observer_json":
+        if isinstance(data, str):
+            data = json.loads(data)
+        return _parse_observer_ratios(data)
+
+    elif source_format == "viewpoint_text":
+        if not isinstance(data, str):
+            raise ValueError("viewpoint_text data must be a string")
+        return _parse_viewpoint_text_ratios(data)
+
+    elif source_format == "viewpoint_hl7":
+        if not isinstance(data, str):
+            raise ValueError("viewpoint_hl7 data must be a string")
+        return _parse_viewpoint_hl7_ratios(data)
+
+    else:
+        raise ValueError(f"Unsupported source_format: {source_format}")
+
+
+# ---------------------------------------------------------------------
+# Observer JSON
+# ---------------------------------------------------------------------
+
+
+def _parse_observer_ratios(json_data: Dict) -> Dict:
+    """
+    Extract ratios from Observer JSON.
+
+    Path: fetuses[i].ratios[]
+    - label: str - ratio name (e.g., "HC/AC", "FL/BPD")
+    - value: float - calculated ratio value
+    - range: str - expected normal range (e.g., "1.04 - 1.22")
+    - fetus_number: int
+    """
+    ratios: List[Dict] = []
+
+    # Get first fetus
+    fetuses = json_data.get("fetuses", [])
+    if not fetuses:
+        return _empty_result("observer_json")
+
+    ratio_list = fetuses[0].get("ratios", [])
+    if not ratio_list:
+        return _empty_result("observer_json")
+
+    all_within_range = True
+
+    for ratio in ratio_list:
+        label = ratio.get("label", "")
+        value = ratio.get("value", 0)
+        range_str = ratio.get("range", "")
+
+        # Parse expected range
+        expected_range = _parse_range_string(range_str)
+
+        # Check if within range
+        within_range = _is_within_range(value, expected_range)
+        if not within_range:
+            all_within_range = False
+
+        ratios.append(
+            {
+                "name": label,
+                "value": round(value, 3) if isinstance(value, float) else value,
+                "expected_range": expected_range,
+                "within_range": within_range,
+            }
+        )
+
+    # Assess overall proportionality
+    # Asymmetric growth typically indicated by abnormal HC/AC ratio
+    proportionality = _assess_proportionality(ratios)
+
+    return {
+        "ratios": ratios,
+        "all_within_range": all_within_range,
+        "proportionality_assessment": proportionality,
+        "source_format": "observer_json",
+    }
+
+
+# ---------------------------------------------------------------------
+# ViewPoint Text (SKELETON)
+# ---------------------------------------------------------------------
+
+
+def _parse_viewpoint_text_ratios(text: str) -> Dict:
+    """
+    Extract ratios from ViewPoint text reports.
+
+    Expected pattern (under Fetal Biometry section):
+        FL / HC                    0.23
+
+    TODO @VarenyaJ: : Implement full parsing
+    """
+    ratios: List[Dict] = []
+
+    # Try to find ratio lines
+    # Pattern: FL / HC   0.23
+    ratio_pattern = re.compile(
+        r"(FL|HC|AC|BPD)\s*/\s*(FL|HC|AC|BPD)\s+([\d.]+)", re.IGNORECASE
+    )
+
+    for match in ratio_pattern.finditer(text):
+        name = f"{match.group(1).upper()}/{match.group(2).upper()}"
+        value = float(match.group(3))
+        ratios.append(
+            {
+                "name": name,
+                "value": value,
+                "expected_range": None,  # Not available in text format
+                "within_range": None,
+            }
+        )
+
+    return {
+        "ratios": ratios,
+        "all_within_range": None,  # Cannot assess without ranges
+        "proportionality_assessment": "Unknown",
+        "source_format": "viewpoint_text",
+    }
+
+
+# ---------------------------------------------------------------------
+# ViewPoint HL7 (SKELETON)
+# ---------------------------------------------------------------------
+
+
+def _parse_viewpoint_hl7_ratios(hl7: str) -> Dict:
+    """
+    Extract ratios from HL7 ORU^R01 messages.
+
+    Note: Ratios may not be present in all HL7 exports.
+    This is a skeleton for potential future implementation.
+
+    TODO @VarenyaJ: : Implement if HL7 ratio encoding is discovered
+    """
+    return _empty_result("viewpoint_hl7")
+
+
+# ---------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------
+
+
+def _parse_range_string(range_str: str) -> Optional[Tuple[float, float]]:
+    """
+    Parse a range string into a tuple.
+
+    Examples:
+        "1.04 - 1.22" -> (1.04, 1.22)
+        "20 - 24" -> (20.0, 24.0)
+        "" -> None
+    """
+    if not range_str:
+        return None
+
+    # Pattern: "min - max" or "min-max"
+    match = re.match(r"([\d.]+)\s*-\s*([\d.]+)", range_str.strip())
+    if match:
+        return (float(match.group(1)), float(match.group(2)))
+
+    return None
+
+
+def _is_within_range(
+    value: float, expected_range: Optional[Tuple[float, float]]
+) -> Optional[bool]:
+    """
+    Check if a value is within the expected range.
+
+    Returns None if range is not available.
+    """
+    if expected_range is None:
+        return None
+
+    min_val, max_val = expected_range
+    return min_val <= value <= max_val
+
+
+def _assess_proportionality(ratios: List[Dict]) -> str:
+    """
+    Assess overall fetal proportionality based on ratios.
+
+    Asymmetric growth is typically indicated when:
+    - HC/AC ratio is abnormal (head-sparing or brain-sparing pattern)
+    - FL/AC ratio is abnormal
+    """
+    if not ratios:
+        return "Unknown"
+
+    # Check HC/AC specifically for asymmetric growth
+    for ratio in ratios:
+        if ratio["name"] == "HC/AC" and ratio["within_range"] is False:
+            return "Asymmetric"
+
+    # Check if all ratios with known ranges are within range
+    ratios_with_ranges = [r for r in ratios if r["within_range"] is not None]
+    if not ratios_with_ranges:
+        return "Unknown"
+
+    all_normal = all(r["within_range"] for r in ratios_with_ranges)
+    return "Normal" if all_normal else "Asymmetric"
+
+
+def _empty_result(source_format: str) -> Dict:
+    """Return empty result structure."""
+    return {
+        "ratios": [],
+        "all_within_range": None,
+        "proportionality_assessment": "Unknown",
+        "source_format": source_format,
+    }
diff --git a/tests/etl/sections/test_fetal_ratios.py b/tests/etl/sections/test_fetal_ratios.py
new file mode 100644
index 0000000..b5acd66
--- /dev/null
+++ b/tests/etl/sections/test_fetal_ratios.py
@@ -0,0 +1,271 @@
+"""Tests for fetal ratios section parser."""
+
+import json
+import pytest
+
+from prenatalppkt.etl.sections.fetal_ratios import parse_fetal_ratios
+
+
+# ---------------------------------------------------------------------
+# Observer JSON
+# ---------------------------------------------------------------------
+
+
+class TestFetalRatiosObserver:
+    def test_basic_ratios(self):
+        """Test parsing of basic ratio data."""
+        data = {
+            "fetuses": [
+                {
+                    "ratios": [
+                        {
+                            "label": "HC/AC",
+                            "value": 1.105,
+                            "range": "1.04 - 1.22",
+                            "fetus_number": 1,
+                        },
+                        {
+                            "label": "FL/AC",
+                            "value": 22.149,
+                            "range": "20 - 24",
+                            "fetus_number": 1,
+                        },
+                        {
+                            "label": "FL/BPD",
+                            "value": 75,
+                            "range": "71 - 87",
+                            "fetus_number": 1,
+                        },
+                    ]
+                }
+            ]
+        }
+
+        result = parse_fetal_ratios(data, "observer_json")
+
+        assert len(result["ratios"]) == 3
+        assert result["all_within_range"] is True
+        assert result["proportionality_assessment"] == "Normal"
+        assert result["source_format"] == "observer_json"
+
+        # Check specific ratio
+        hc_ac = next(r for r in result["ratios"] if r["name"] == "HC/AC")
+        assert hc_ac["value"] == 1.105
+        assert hc_ac["expected_range"] == (1.04, 1.22)
+        assert hc_ac["within_range"] is True
+
+    def test_ratio_out_of_range(self):
+        """Test detection of out-of-range ratio."""
+        data = {
+            "fetuses": [
+                {
+                    "ratios": [
+                        {
+                            "label": "HC/AC",
+                            "value": 1.35,  # Above normal range
+                            "range": "1.04 - 1.22",
+                            "fetus_number": 1,
+                        }
+                    ]
+                }
+            ]
+        }
+
+        result = parse_fetal_ratios(data, "observer_json")
+
+        assert result["all_within_range"] is False
+        assert result["proportionality_assessment"] == "Asymmetric"
+        assert result["ratios"][0]["within_range"] is False
+
+    def test_asymmetric_growth_detection(self):
+        """Test asymmetric growth pattern detection via HC/AC."""
+        data = {
+            "fetuses": [
+                {
+                    "ratios": [
+                        {
+                            "label": "HC/AC",
+                            "value": 0.95,  # Below normal - head-sparing
+                            "range": "1.04 - 1.22",
+                        },
+                        {
+                            "label": "FL/BPD",
+                            "value": 80,  # Within range
+                            "range": "71 - 87",
+                        },
+                    ]
+                }
+            ]
+        }
+
+        result = parse_fetal_ratios(data, "observer_json")
+
+        assert result["proportionality_assessment"] == "Asymmetric"
+
+    def test_json_string_input(self):
+        """Test that JSON string input is handled correctly."""
+        data = json.dumps(
+            {
+                "fetuses": [
+                    {
+                        "ratios": [
+                            {"label": "HC/AC", "value": 1.1, "range": "1.04 - 1.22"}
+                        ]
+                    }
+                ]
+            }
+        )
+
+        result = parse_fetal_ratios(data, "observer_json")
+
+        assert len(result["ratios"]) == 1
+
+    def test_empty_fetuses(self):
+        """Test handling of empty fetuses array."""
+        data = {"fetuses": []}
+
+        result = parse_fetal_ratios(data, "observer_json")
+
+        assert result["ratios"] == []
+        assert result["all_within_range"] is None
+
+    def test_missing_ratios_key(self):
+        """Test handling of fetus without ratios key."""
+        data = {"fetuses": [{"fetus": {}}]}
+
+        result = parse_fetal_ratios(data, "observer_json")
+
+        assert result["ratios"] == []
+
+    def test_ratio_without_range(self):
+        """Test handling of ratio without expected range."""
+        data = {
+            "fetuses": [
+                {
+                    "ratios": [
+                        {
+                            "label": "HC/AC",
+                            "value": 1.1,
+                            "range": "",  # Empty range
+                        }
+                    ]
+                }
+            ]
+        }
+
+        result = parse_fetal_ratios(data, "observer_json")
+
+        assert result["ratios"][0]["expected_range"] is None
+        assert result["ratios"][0]["within_range"] is None
+
+    def test_boundary_values(self):
+        """Test boundary values at exactly min and max of range."""
+        data = {
+            "fetuses": [
+                {
+                    "ratios": [
+                        {
+                            "label": "HC/AC",
+                            "value": 1.04,
+                            "range": "1.04 - 1.22",
+                        },  # At min
+                        {"label": "FL/AC", "value": 24, "range": "20 - 24"},  # At max
+                    ]
+                }
+            ]
+        }
+
+        result = parse_fetal_ratios(data, "observer_json")
+
+        assert all(r["within_range"] for r in result["ratios"])
+
+
+# ---------------------------------------------------------------------
+# ViewPoint Text (Skeleton)
+# ---------------------------------------------------------------------
+
+
+class TestFetalRatiosViewPointText:
+    def test_skeleton_parses_ratio_pattern(self):
+        """Test that skeleton can parse basic ratio patterns."""
+        text = """Fetal Biometry
+============
+FL / HC    0.23
+"""
+
+        result = parse_fetal_ratios(text, "viewpoint_text")
+
+        assert result["source_format"] == "viewpoint_text"
+        # Skeleton may parse the FL/HC ratio
+        assert isinstance(result["ratios"], list)
+
+    def test_no_ratios_in_text(self):
+        """Test handling when no ratios are found."""
+        text = "Fetal Biometry\nHC 250 mm"
+
+        result = parse_fetal_ratios(text, "viewpoint_text")
+
+        assert result["ratios"] == []
+
+
+# ---------------------------------------------------------------------
+# ViewPoint HL7 (Skeleton)
+# ---------------------------------------------------------------------
+
+
+class TestFetalRatiosViewPointHL7:
+    def test_skeleton_returns_empty(self):
+        """Test that HL7 skeleton returns empty result."""
+        hl7 = "MSH|...\nOBX|..."
+
+        result = parse_fetal_ratios(hl7, "viewpoint_hl7")
+
+        assert result["source_format"] == "viewpoint_hl7"
+        assert result["ratios"] == []
+
+
+# ---------------------------------------------------------------------
+# Edge Cases
+# ---------------------------------------------------------------------
+
+
+class TestFetalRatiosEdgeCases:
+    def test_invalid_format(self):
+        """Test that invalid format raises ValueError."""
+        with pytest.raises(ValueError):
+            parse_fetal_ratios("data", "invalid_format")
+
+    def test_non_string_viewpoint_text(self):
+        """Test that non-string viewpoint_text raises ValueError."""
+        with pytest.raises(ValueError):
+            parse_fetal_ratios({"not": "string"}, "viewpoint_text")
+
+    def test_non_string_viewpoint_hl7(self):
+        """Test that non-string viewpoint_hl7 raises ValueError."""
+        with pytest.raises(ValueError):
+            parse_fetal_ratios({"not": "string"}, "viewpoint_hl7")
+
+    def test_malformed_range_string(self):
+        """Test handling of malformed range string."""
+        data = {
+            "fetuses": [
+                {"ratios": [{"label": "HC/AC", "value": 1.1, "range": "invalid"}]}
+            ]
+        }
+
+        result = parse_fetal_ratios(data, "observer_json")
+
+        assert result["ratios"][0]["expected_range"] is None
+
+    def test_integer_ratio_value(self):
+        """Test that integer ratio values are handled."""
+        data = {
+            "fetuses": [
+                {"ratios": [{"label": "FL/BPD", "value": 75, "range": "71 - 87"}]}
+            ]
+        }
+
+        result = parse_fetal_ratios(data, "observer_json")
+
+        assert result["ratios"][0]["value"] == 75
+        assert result["ratios"][0]["within_range"] is True

From 0972eb5e062783b65067af440d6dc21aa35953c5 Mon Sep 17 00:00:00 2001
From: VarenyaJ <varenyajj@gmail.com>
Date: Mon, 2 Feb 2026 02:42:35 -0500
Subject: [PATCH 12/20] feat(etl): export new section parsers from sections
 module

---
 src/prenatalppkt/etl/sections/__init__.py | 29 +++++++++++++++--------
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/src/prenatalppkt/etl/sections/__init__.py b/src/prenatalppkt/etl/sections/__init__.py
index 2d3fc17..3893145 100644
--- a/src/prenatalppkt/etl/sections/__init__.py
+++ b/src/prenatalppkt/etl/sections/__init__.py
@@ -2,18 +2,21 @@
 Section parsers for non-biometry clinical data.
 
 These parsers extract additional clinical information from ultrasound reports
-beyond fetal biometry measurements. They are designed to eventually integrate
-with HPO Clinical Record (CR) modules for comprehensive phenotype capture.
+beyond fetal biometry measurements. They return Dict objects with parsed data.
 
-Current Status: SKELETON IMPLEMENTATIONS
-- Basic parsing structure in place
-- Returns placeholder data
-- TODO comments describe future implementation
+Implemented parsers:
+- parse_clinical_indication: Extract reason for exam
+- parse_pregnancy_dating: Extract LMP, EDD, gestational age
+- parse_clinical_impression: Extract clinical narrative and HPO terms
+- parse_fetal_anatomy: Extract anatomy findings and HPO terms
+- parse_estimated_fetal_weight: Extract EFW and growth classification
+- parse_fetal_ratios: Extract biometric ratios and proportionality
 
-Future Integration:
-- Map findings to HPO terms using src/prenatalppkt/hpo modules
-- Support symmetric processing across Observer JSON, ViewPoint Text, and HL7
-- Enable full phenotype packet generation
+Skeleton parsers (TODO):
+- parse_maternal_history: OB history, complications
+- parse_placenta: Placental assessment
+- parse_amniotic_fluid: AFI, MVP measurements
+- parse_umbilical_cord: Vessel count, insertion site
 """
 
 from prenatalppkt.etl.sections.maternal_history import parse_maternal_history
@@ -21,6 +24,10 @@
 from prenatalppkt.etl.sections.clinical_indication import parse_clinical_indication
 from prenatalppkt.etl.sections.pregnancy_dating import parse_pregnancy_dating
 from prenatalppkt.etl.sections.fetal_anatomy import parse_fetal_anatomy
+from prenatalppkt.etl.sections.estimated_fetal_weight import (
+    parse_estimated_fetal_weight,
+)
+from prenatalppkt.etl.sections.fetal_ratios import parse_fetal_ratios
 from prenatalppkt.etl.sections.placenta import parse_placenta
 from prenatalppkt.etl.sections.amniotic_fluid import parse_amniotic_fluid
 from prenatalppkt.etl.sections.umbilical_cord import parse_umbilical_cord
@@ -31,6 +38,8 @@
     "parse_clinical_indication",
     "parse_pregnancy_dating",
     "parse_fetal_anatomy",
+    "parse_estimated_fetal_weight",
+    "parse_fetal_ratios",
     "parse_placenta",
     "parse_amniotic_fluid",
     "parse_umbilical_cord",

From f4285b4204f1d285579d0da2eabeb876a9dcfc03 Mon Sep 17 00:00:00 2001
From: VarenyaJ <varenyajj@gmail.com>
Date: Mon, 2 Feb 2026 02:43:23 -0500
Subject: [PATCH 13/20] ruffing

---
 .../etl/sections/fetal_anatomy.py             | 382 +++++++++---------
 1 file changed, 190 insertions(+), 192 deletions(-)

diff --git a/src/prenatalppkt/etl/sections/fetal_anatomy.py b/src/prenatalppkt/etl/sections/fetal_anatomy.py
index 5613aef..34c0351 100644
--- a/src/prenatalppkt/etl/sections/fetal_anatomy.py
+++ b/src/prenatalppkt/etl/sections/fetal_anatomy.py
@@ -13,49 +13,49 @@
 
 
 def parse_fetal_anatomy(
-   data: Union[str, Dict], source_format: str, hpo_cr=None
+    data: Union[str, Dict], source_format: str, hpo_cr=None
 ) -> Dict:
-   """
-   Parse fetal anatomy section.
-
-   Supports:
-       - observer_json
-       - viewpoint_text (skeleton)
-       - viewpoint_hl7 (skeleton)
-
-   Args:
-       data: Raw input data (JSON string, dict, or text)
-       source_format: One of "observer_json", "viewpoint_text", "viewpoint_hl7"
-       hpo_cr: Optional HpoExactConceptRecognizer for HPO term extraction.
-               If provided, will extract HPO terms from anomaly descriptions.
-
-   Returns:
-       Dict with keys:
-           - anatomy_text: str - Free text anatomy narrative
-           - normal_structures: List[str] - Structures marked Normal
-           - abnormal_structures: List[str] - Structures marked Abnormal
-           - not_visualized: List[str] - Structures marked Unseen
-           - anomalies: List[Dict] - Specific anomaly findings
-           - hpo_terms: List[SimpleTerm] - HPO terms extracted via CR
-           - source_format: str
-   """
-   if source_format == "observer_json":
-       if isinstance(data, str):
-           data = json.loads(data)
-       return _parse_observer_anatomy(data, hpo_cr)
-
-   elif source_format == "viewpoint_text":
-       if not isinstance(data, str):
-           raise ValueError("viewpoint_text data must be a string")
-       return _parse_viewpoint_text_anatomy(data, hpo_cr)
-
-   elif source_format == "viewpoint_hl7":
-       if not isinstance(data, str):
-           raise ValueError("viewpoint_hl7 data must be a string")
-       return _parse_viewpoint_hl7_anatomy(data, hpo_cr)
-
-   else:
-       raise ValueError(f"Unsupported source_format: {source_format}")
+    """
+    Parse fetal anatomy section.
+
+    Supports:
+        - observer_json
+        - viewpoint_text (skeleton)
+        - viewpoint_hl7 (skeleton)
+
+    Args:
+        data: Raw input data (JSON string, dict, or text)
+        source_format: One of "observer_json", "viewpoint_text", "viewpoint_hl7"
+        hpo_cr: Optional HpoExactConceptRecognizer for HPO term extraction.
+                If provided, will extract HPO terms from anomaly descriptions.
+
+    Returns:
+        Dict with keys:
+            - anatomy_text: str - Free text anatomy narrative
+            - normal_structures: List[str] - Structures marked Normal
+            - abnormal_structures: List[str] - Structures marked Abnormal
+            - not_visualized: List[str] - Structures marked Unseen
+            - anomalies: List[Dict] - Specific anomaly findings
+            - hpo_terms: List[SimpleTerm] - HPO terms extracted via CR
+            - source_format: str
+    """
+    if source_format == "observer_json":
+        if isinstance(data, str):
+            data = json.loads(data)
+        return _parse_observer_anatomy(data, hpo_cr)
+
+    elif source_format == "viewpoint_text":
+        if not isinstance(data, str):
+            raise ValueError("viewpoint_text data must be a string")
+        return _parse_viewpoint_text_anatomy(data, hpo_cr)
+
+    elif source_format == "viewpoint_hl7":
+        if not isinstance(data, str):
+            raise ValueError("viewpoint_hl7 data must be a string")
+        return _parse_viewpoint_hl7_anatomy(data, hpo_cr)
+
+    else:
+        raise ValueError(f"Unsupported source_format: {source_format}")
 
 
 # ---------------------------------------------------------------------
@@ -64,113 +64,111 @@ def parse_fetal_anatomy(
 
 
 def _classify_structure(
-   label: str,
-   state: str,
-   normal: List[str],
-   abnormal: List[str],
-   unseen: List[str],
+    label: str, state: str, normal: List[str], abnormal: List[str], unseen: List[str]
 ) -> None:
-   """Classify a structure into the appropriate list based on state."""
-   if not label:
-       return
-   if state == "Normal" and label not in normal:
-       normal.append(label)
-   elif state == "Abnormal" and label not in abnormal:
-       abnormal.append(label)
-   elif state == "Unseen" and label not in unseen:
-       unseen.append(label)
+    """Classify a structure into the appropriate list based on state."""
+    if not label:
+        return
+    if state == "Normal" and label not in normal:
+        normal.append(label)
+    elif state == "Abnormal" and label not in abnormal:
+        abnormal.append(label)
+    elif state == "Unseen" and label not in unseen:
+        unseen.append(label)
 
 
 def _process_anatomy_item(
-   item: Dict,
-   normal: List[str],
-   abnormal: List[str],
-   unseen: List[str],
-   anomalies: List[Dict],
+    item: Dict,
+    normal: List[str],
+    abnormal: List[str],
+    unseen: List[str],
+    anomalies: List[Dict],
 ) -> None:
-   """Process a single anatomy item, extracting structures and anomalies."""
-   main = item.get("main", {})
-   label = main.get("label", "")
-   state = main.get("anat_state", "")
-
-   # Classify main structure
-   _classify_structure(label, state, normal, abnormal, unseen)
-
-   # Process detail sub-structures
-   for detail in item.get("detail", []):
-       detail_label = detail.get("label", "")
-       detail_state = detail.get("anat_det_state", "")
-       _classify_structure(detail_label, detail_state, normal, abnormal, unseen)
-
-   # Process anomalies
-   for anom in item.get("anomalies", []):
-       description = anom.get("description", "")
-       if description:
-           anomalies.append({
-               "structure": label,
-               "description": description,
-               "variant_type": anom.get("abnormal_or_normal_variant", "Abnormal"),
-           })
+    """Process a single anatomy item, extracting structures and anomalies."""
+    main = item.get("main", {})
+    label = main.get("label", "")
+    state = main.get("anat_state", "")
+
+    # Classify main structure
+    _classify_structure(label, state, normal, abnormal, unseen)
+
+    # Process detail sub-structures
+    for detail in item.get("detail", []):
+        detail_label = detail.get("label", "")
+        detail_state = detail.get("anat_det_state", "")
+        _classify_structure(detail_label, detail_state, normal, abnormal, unseen)
+
+    # Process anomalies
+    for anom in item.get("anomalies", []):
+        description = anom.get("description", "")
+        if description:
+            anomalies.append(
+                {
+                    "structure": label,
+                    "description": description,
+                    "variant_type": anom.get("abnormal_or_normal_variant", "Abnormal"),
+                }
+            )
 
 
 def _extract_hpo_terms(anatomy_text: str, anomalies: List[Dict], hpo_cr) -> List:
-   """Extract HPO terms from anatomy text and anomaly descriptions."""
-   if hpo_cr is None or not hasattr(hpo_cr, "parse"):
-       return []
+    """Extract HPO terms from anatomy text and anomaly descriptions."""
+    if hpo_cr is None or not hasattr(hpo_cr, "parse"):
+        return []
 
-   all_anomaly_text = " ".join(
-       a["description"] for a in anomalies if a.get("description")
-   )
-   combined_text = f"{anatomy_text} {all_anomaly_text}".strip()
+    all_anomaly_text = " ".join(
+        a["description"] for a in anomalies if a.get("description")
+    )
+    combined_text = f"{anatomy_text} {all_anomaly_text}".strip()
 
-   if not combined_text:
-       return []
+    if not combined_text:
+        return []
 
-   return hpo_cr.parse(combined_text)
+    return hpo_cr.parse(combined_text)
 
 
 def _parse_observer_anatomy(json_data: Dict, hpo_cr=None) -> Dict:
-   """
-   Extract anatomy findings from Observer JSON.
-
-   Paths:
-   - fetuses[i].fetus.anatomy_text - free text narrative
-   - fetuses[i].fetus.anatomy[] - structured findings
-     - main.label - structure name (e.g., "Head", "Face")
-     - main.anat_state - "Normal", "Abnormal", or "Unseen"
-     - detail[].label - sub-structure name
-     - detail[].anat_det_state - sub-structure state
-     - anomalies[].description - specific finding text
-     - anomalies[].abnormal_or_normal_variant - classification
-   """
-   fetuses = json_data.get("fetuses", [])
-   if not fetuses:
-       return _empty_result("observer_json")
-
-   fetus_block = fetuses[0].get("fetus", {})
-   anatomy_text = fetus_block.get("anatomy_text", "")
-
-   normal_structures: List[str] = []
-   abnormal_structures: List[str] = []
-   not_visualized: List[str] = []
-   anomalies: List[Dict] = []
-
-   for item in fetus_block.get("anatomy", []):
-       _process_anatomy_item(
-           item, normal_structures, abnormal_structures, not_visualized, anomalies
-       )
-
-   hpo_terms = _extract_hpo_terms(anatomy_text, anomalies, hpo_cr)
-
-   return {
-       "anatomy_text": anatomy_text,
-       "normal_structures": normal_structures,
-       "abnormal_structures": abnormal_structures,
-       "not_visualized": not_visualized,
-       "anomalies": anomalies,
-       "hpo_terms": hpo_terms,
-       "source_format": "observer_json",
-   }
+    """
+    Extract anatomy findings from Observer JSON.
+
+    Paths:
+    - fetuses[i].fetus.anatomy_text - free text narrative
+    - fetuses[i].fetus.anatomy[] - structured findings
+      - main.label - structure name (e.g., "Head", "Face")
+      - main.anat_state - "Normal", "Abnormal", or "Unseen"
+      - detail[].label - sub-structure name
+      - detail[].anat_det_state - sub-structure state
+      - anomalies[].description - specific finding text
+      - anomalies[].abnormal_or_normal_variant - classification
+    """
+    fetuses = json_data.get("fetuses", [])
+    if not fetuses:
+        return _empty_result("observer_json")
+
+    fetus_block = fetuses[0].get("fetus", {})
+    anatomy_text = fetus_block.get("anatomy_text", "")
+
+    normal_structures: List[str] = []
+    abnormal_structures: List[str] = []
+    not_visualized: List[str] = []
+    anomalies: List[Dict] = []
+
+    for item in fetus_block.get("anatomy", []):
+        _process_anatomy_item(
+            item, normal_structures, abnormal_structures, not_visualized, anomalies
+        )
+
+    hpo_terms = _extract_hpo_terms(anatomy_text, anomalies, hpo_cr)
+
+    return {
+        "anatomy_text": anatomy_text,
+        "normal_structures": normal_structures,
+        "abnormal_structures": abnormal_structures,
+        "not_visualized": not_visualized,
+        "anomalies": anomalies,
+        "hpo_terms": hpo_terms,
+        "source_format": "observer_json",
+    }
 
 
 # ---------------------------------------------------------------------
@@ -179,46 +177,46 @@ def _parse_observer_anatomy(json_data: Dict, hpo_cr=None) -> Dict:
 
 
 def _parse_viewpoint_text_anatomy(text: str, hpo_cr=None) -> Dict:
-   """
-   Extract anatomy from ViewPoint text reports.
-
-   Expected pattern:
-       Fetal Anatomy
-       =============
-       The following structures appear normal:
-       Cranium. Brain. Face. ...
-
-       The following structures appear abnormal:
-       GI tract: dilated bowel loops.
-
-       The following structures could not be adequately visualized:
-       LVOT view. RVOT view. ...
-
-   TODO @VarenyaJ: Implement full parsing
-   """
-   # Skeleton: Extract the Fetal Anatomy section
-   pattern = re.compile(
-       r"Fetal Anatomy\s*\n=+\n(?P<body>.*?)(?:\n[A-Z][^\n]*\n=+|\Z)",
-       re.DOTALL | re.IGNORECASE,
-   )
-   match = pattern.search(text)
-   anatomy_text = match.group("body").strip() if match else ""
-
-   # TODO @VarenyaJ: Parse "appear normal", "appear abnormal", "could not be visualized" lists
-
-   hpo_terms = []
-   if anatomy_text and hpo_cr is not None and hasattr(hpo_cr, "parse"):
-       hpo_terms = hpo_cr.parse(anatomy_text)
-
-   return {
-       "anatomy_text": anatomy_text,
-       "normal_structures": [],
-       "abnormal_structures": [],
-       "not_visualized": [],
-       "anomalies": [],
-       "hpo_terms": hpo_terms,
-       "source_format": "viewpoint_text",
-   }
+    """
+    Extract anatomy from ViewPoint text reports.
+
+    Expected pattern:
+        Fetal Anatomy
+        =============
+        The following structures appear normal:
+        Cranium. Brain. Face. ...
+
+        The following structures appear abnormal:
+        GI tract: dilated bowel loops.
+
+        The following structures could not be adequately visualized:
+        LVOT view. RVOT view. ...
+
+    TODO @VarenyaJ: Implement full parsing
+    """
+    # Skeleton: Extract the Fetal Anatomy section
+    pattern = re.compile(
+        r"Fetal Anatomy\s*\n=+\n(?P<body>.*?)(?:\n[A-Z][^\n]*\n=+|\Z)",
+        re.DOTALL | re.IGNORECASE,
+    )
+    match = pattern.search(text)
+    anatomy_text = match.group("body").strip() if match else ""
+
+    # TODO @VarenyaJ: Parse "appear normal", "appear abnormal", "could not be visualized" lists
+
+    hpo_terms = []
+    if anatomy_text and hpo_cr is not None and hasattr(hpo_cr, "parse"):
+        hpo_terms = hpo_cr.parse(anatomy_text)
+
+    return {
+        "anatomy_text": anatomy_text,
+        "normal_structures": [],
+        "abnormal_structures": [],
+        "not_visualized": [],
+        "anomalies": [],
+        "hpo_terms": hpo_terms,
+        "source_format": "viewpoint_text",
+    }
 
 
 # ---------------------------------------------------------------------
@@ -227,15 +225,15 @@ def _parse_viewpoint_text_anatomy(text: str, hpo_cr=None) -> Dict:
 
 
 def _parse_viewpoint_hl7_anatomy(hl7: str, hpo_cr=None) -> Dict:
-   """
-   Extract anatomy from HL7 ORU^R01 messages.
+    """
+    Extract anatomy from HL7 ORU^R01 messages.
 
-   Note: Anatomy is typically not encoded in discrete HL7 fields.
-   This is a skeleton for potential future implementation.
+    Note: Anatomy is typically not encoded in discrete HL7 fields.
+    This is a skeleton for potential future implementation.
 
-   TODO @VarenyaJ: Implement if HL7 anatomy encoding is discovered
-   """
-   return _empty_result("viewpoint_hl7")
+    TODO @VarenyaJ: Implement if HL7 anatomy encoding is discovered
+    """
+    return _empty_result("viewpoint_hl7")
 
 
 # ---------------------------------------------------------------------
@@ -244,13 +242,13 @@ def _parse_viewpoint_hl7_anatomy(hl7: str, hpo_cr=None) -> Dict:
 
 
 def _empty_result(source_format: str) -> Dict:
-   """Return empty result structure."""
-   return {
-       "anatomy_text": "",
-       "normal_structures": [],
-       "abnormal_structures": [],
-       "not_visualized": [],
-       "anomalies": [],
-       "hpo_terms": [],
-       "source_format": source_format,
-   }
\ No newline at end of file
+    """Return empty result structure."""
+    return {
+        "anatomy_text": "",
+        "normal_structures": [],
+        "abnormal_structures": [],
+        "not_visualized": [],
+        "anomalies": [],
+        "hpo_terms": [],
+        "source_format": source_format,
+    }

From c2e73534de7ab44690ce38594ee73e9706cd0351 Mon Sep 17 00:00:00 2001
From: VarenyaJ <varenyajj@gmail.com>
Date: Mon, 2 Feb 2026 09:46:18 -0500
Subject: [PATCH 14/20] update yq parsing of toml

---
 .github/actions/python_from_pyproject/action.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/actions/python_from_pyproject/action.yaml b/.github/actions/python_from_pyproject/action.yaml
index 79ee605..e3ce1cb 100644
--- a/.github/actions/python_from_pyproject/action.yaml
+++ b/.github/actions/python_from_pyproject/action.yaml
@@ -12,7 +12,7 @@ runs:
     steps:
         - name: Get project version with yq
           id: get_python_version
-          uses: mikefarah/yq@v4.46.1
+          uses: mikefarah/yq@v4.52.2
           with:
               cmd: yq '.project.requires-python' ${{ inputs.pyproject-file-path }}
 

From 580d4a262675878f8fe5bd59692a9470beed7e49 Mon Sep 17 00:00:00 2001
From: VarenyaJ <varenyajj@gmail.com>
Date: Mon, 2 Feb 2026 09:49:27 -0500
Subject: [PATCH 15/20] ci: bump yq to v4.52.2 to fix TOML comment parsing and
 force toml input for yq to avoid comment parsing bug

---
 .github/actions/python_from_pyproject/action.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/actions/python_from_pyproject/action.yaml b/.github/actions/python_from_pyproject/action.yaml
index e3ce1cb..9f2359c 100644
--- a/.github/actions/python_from_pyproject/action.yaml
+++ b/.github/actions/python_from_pyproject/action.yaml
@@ -14,7 +14,7 @@ runs:
           id: get_python_version
           uses: mikefarah/yq@v4.52.2
           with:
-              cmd: yq '.project.requires-python' ${{ inputs.pyproject-file-path }}
+              cmd: yq --input-format toml '.project.requires-python' ${{ inputs.pyproject-file-path }}
 
         -   name: Set up Python
             uses: actions/setup-python@v5.6.0

From cb055f96368ce72a1a3a7abf9269051658a83843 Mon Sep 17 00:00:00 2001
From: VarenyaJ <varenyajj@gmail.com>
Date: Mon, 2 Feb 2026 10:00:03 -0500
Subject: [PATCH 16/20] ci: read pyproject.toml using python tomllib instead of
 yq

---
 .../actions/python_from_pyproject/action.yaml | 39 +++++++++++--------
 1 file changed, 23 insertions(+), 16 deletions(-)

diff --git a/.github/actions/python_from_pyproject/action.yaml b/.github/actions/python_from_pyproject/action.yaml
index 9f2359c..5330579 100644
--- a/.github/actions/python_from_pyproject/action.yaml
+++ b/.github/actions/python_from_pyproject/action.yaml
@@ -1,22 +1,29 @@
 name: Install python from pyproject.toml
-description: 'Installs Python from the version found in the pyproject.toml'
+description: Installs Python from the version found in pyproject.toml
 
 inputs:
-  pyproject-file-path:
-      required: False
-      description: "Path to the pyproject.toml including filename"
-      default: "./pyproject.toml"
+ pyproject-file-path:
+   required: false
+   description: Path to the pyproject.toml including filename
+   default: ./pyproject.toml
 
 runs:
-    using: composite
-    steps:
-        - name: Get project version with yq
-          id: get_python_version
-          uses: mikefarah/yq@v4.52.2
-          with:
-              cmd: yq --input-format toml '.project.requires-python' ${{ inputs.pyproject-file-path }}
+ using: composite
+ steps:
+   - name: Read requires-python from pyproject.toml
+     id: get_python_version
+     shell: bash
+     run: |
+       python - <<'EOF'
+       import tomllib
+       from pathlib import Path
 
-        -   name: Set up Python
-            uses: actions/setup-python@v5.6.0
-            with:
-                python-version: ${{ steps.get_python_version.outputs.result }}
\ No newline at end of file
+       path = Path("${{ inputs.pyproject-file-path }}")
+       data = tomllib.loads(path.read_text())
+       print(f"result={data['project']['requires-python']}")
+       EOF >> "$GITHUB_OUTPUT"
+
+   - name: Set up Python
+     uses: actions/setup-python@v5.6.0
+     with:
+       python-version: ${{ steps.get_python_version.outputs.result }}
\ No newline at end of file

From 5c4e04029a2df1699ba3e02b3776217b459e33a8 Mon Sep 17 00:00:00 2001
From: VarenyaJ <varenyajj@gmail.com>
Date: Mon, 2 Feb 2026 10:26:56 -0500
Subject: [PATCH 17/20] ci: retrigger workflow after removing yq


From a39156af0da0a81062d553eebd98fd594638b197 Mon Sep 17 00:00:00 2001
From: VarenyaJ <varenyajj@gmail.com>
Date: Mon, 2 Feb 2026 10:48:43 -0500
Subject: [PATCH 18/20] ci: replace yq with tomllib for pyproject.toml parsing

---
 .github/actions/python_from_pyproject/action.yaml | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/.github/actions/python_from_pyproject/action.yaml b/.github/actions/python_from_pyproject/action.yaml
index 5330579..ab44d39 100644
--- a/.github/actions/python_from_pyproject/action.yaml
+++ b/.github/actions/python_from_pyproject/action.yaml
@@ -14,14 +14,10 @@ runs:
      id: get_python_version
      shell: bash
      run: |
-       python - <<'EOF'
-       import tomllib
-       from pathlib import Path
-
-       path = Path("${{ inputs.pyproject-file-path }}")
-       data = tomllib.loads(path.read_text())
-       print(f"result={data['project']['requires-python']}")
-       EOF >> "$GITHUB_OUTPUT"
+       python -c "import tomllib, pathlib; \
+       p = pathlib.Path('${{ inputs.pyproject-file-path }}'); \
+       req = tomllib.loads(p.read_text())['project']['requires-python']; \
+       print(f'result={req}')" >> \"$GITHUB_OUTPUT\"
 
    - name: Set up Python
      uses: actions/setup-python@v5.6.0

From bccfd4e9e094aebec1170469b22391bb45e91c68 Mon Sep 17 00:00:00 2001
From: VarenyaJ <varenyajj@gmail.com>
Date: Mon, 2 Feb 2026 10:50:52 -0500
Subject: [PATCH 19/20] ci: replace tomllib fix with a one-liner

---
 .github/actions/python_from_pyproject/action.yaml | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/.github/actions/python_from_pyproject/action.yaml b/.github/actions/python_from_pyproject/action.yaml
index ab44d39..13d6210 100644
--- a/.github/actions/python_from_pyproject/action.yaml
+++ b/.github/actions/python_from_pyproject/action.yaml
@@ -14,10 +14,7 @@ runs:
      id: get_python_version
      shell: bash
      run: |
-       python -c "import tomllib, pathlib; \
-       p = pathlib.Path('${{ inputs.pyproject-file-path }}'); \
-       req = tomllib.loads(p.read_text())['project']['requires-python']; \
-       print(f'result={req}')" >> \"$GITHUB_OUTPUT\"
+       python -c "import tomllib, pathlib; p = pathlib.Path('${{ inputs.pyproject-file-path }}'); req = tomllib.loads(p.read_text())['project']['requires-python']; print(f'result={req}')" >> $GITHUB_OUTPUT
 
    - name: Set up Python
      uses: actions/setup-python@v5.6.0

From 31ead32174a9d690db5bf392618b1d42ce6cdbd5 Mon Sep 17 00:00:00 2001
From: VarenyaJ <varenyajj@gmail.com>
Date: Thu, 5 Feb 2026 20:09:03 -0500
Subject: [PATCH 20/20] feat(notebook): include new section parsers in revised
 demo

---
 prenatalppkt.ipynb | 1470 ++++++++++----------------------------------
 1 file changed, 332 insertions(+), 1138 deletions(-)

diff --git a/prenatalppkt.ipynb b/prenatalppkt.ipynb
index badee2e..cb51d6d 100644
--- a/prenatalppkt.ipynb
+++ b/prenatalppkt.ipynb
@@ -10,7 +10,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "id": "d8f2cfce",
    "metadata": {},
    "outputs": [
@@ -18,815 +18,28 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "DEBUG:prenatalppkt.mapping_loader:Loaded 8 bins for head_circumference\n",
-      "DEBUG:prenatalppkt.mapping_loader:Loaded 8 bins for biparietal_diameter\n",
-      "DEBUG:prenatalppkt.mapping_loader:Loaded 8 bins for femur_length\n",
-      "DEBUG:prenatalppkt.mapping_loader:Loaded 8 bins for abdominal_circumference\n",
-      "DEBUG:prenatalppkt.mapping_loader:Loaded 8 bins for occipitofrontal_diameter\n",
-      "DEBUG:prenatalppkt.etl.term_bin_factory:Loaded mappings for: ['head_circumference', 'biparietal_diameter', 'femur_length', 'abdominal_circumference', 'occipitofrontal_diameter']\n",
-      "DEBUG:prenatalppkt.etl.extractors.observer:Starting Observer JSON extraction\n",
-      "DEBUG:prenatalppkt.etl.extractors.observer:Processing fetus 1\n",
-      "DEBUG:prenatalppkt.etl.extractors.observer:Found 6 measurements\n",
-      "DEBUG:prenatalppkt.etl.extractors.observer:Processing measurement: AC\n",
-      "DEBUG:prenatalppkt.etl.extractors.observer:AC has percentile=55.6% (valid)\n",
-      "DEBUG:prenatalppkt.etl.extractors.observer:Creating TermBin for AC: value=226.20000000000002mm, percentile=55.6%, ga=<GestationalAge: 26 weeks, 6 days>\n",
-      "DEBUG:prenatalppkt.etl.term_bin_factory:Creating TermBin: name=AC, value=226.20000000000002mm, percentile=55.6%, ga=<GestationalAge: 26 weeks, 6 days>, method=None\n",
-      "DEBUG:prenatalppkt.etl.term_bin_factory:Selected HPO: HP:0034207 - Abnormal fetal gastrointestinal system morphology\n",
-      "DEBUG:prenatalppkt.etl.term_bin_factory:Created TermBin: HP:0034207 - normal=True\n",
-      "DEBUG:prenatalppkt.etl.extractors.observer:Processing measurement: BPD\n",
-      "DEBUG:prenatalppkt.etl.extractors.observer:BPD has percentile=51.2% (valid)\n",
-      "DEBUG:prenatalppkt.etl.extractors.observer:Creating TermBin for BPD: value=66.8mm, percentile=51.2%, ga=<GestationalAge: 26 weeks, 6 days>\n",
-      "DEBUG:prenatalppkt.etl.term_bin_factory:Creating TermBin: name=BPD, value=66.8mm, percentile=51.2%, ga=<GestationalAge: 26 weeks, 6 days>, method=None\n",
-      "DEBUG:prenatalppkt.etl.term_bin_factory:Selected HPO: HP:0000240 - Abnormality of skull size\n",
-      "DEBUG:prenatalppkt.etl.term_bin_factory:Created TermBin: HP:0000240 - normal=True\n",
-      "DEBUG:prenatalppkt.etl.extractors.observer:Processing measurement: HC\n",
-      "DEBUG:prenatalppkt.etl.extractors.observer:HC has percentile=42.5% (valid)\n",
-      "DEBUG:prenatalppkt.etl.extractors.observer:Creating TermBin for HC: value=250.0mm, percentile=42.5%, ga=<GestationalAge: 26 weeks, 6 days>\n",
-      "DEBUG:prenatalppkt.etl.term_bin_factory:Creating TermBin: name=HC, value=250.0mm, percentile=42.5%, ga=<GestationalAge: 26 weeks, 6 days>, method=None\n",
-      "DEBUG:prenatalppkt.etl.term_bin_factory:Selected HPO: HP:0000240 - Abnormality of skull size\n",
-      "DEBUG:prenatalppkt.etl.term_bin_factory:Created TermBin: HP:0000240 - normal=True\n",
-      "DEBUG:prenatalppkt.etl.extractors.observer:Processing measurement: Femur\n",
-      "DEBUG:prenatalppkt.etl.extractors.observer:Femur has percentile=46.8% (valid)\n",
-      "DEBUG:prenatalppkt.etl.extractors.observer:Creating TermBin for Femur: value=50.099999999999994mm, percentile=46.8%, ga=<GestationalAge: 27 weeks, 0 days>\n",
-      "DEBUG:prenatalppkt.etl.term_bin_factory:Creating TermBin: name=Femur, value=50.099999999999994mm, percentile=46.8%, ga=<GestationalAge: 27 weeks, 0 days>, method=None\n",
-      "DEBUG:prenatalppkt.etl.term_bin_factory:Selected HPO: HP:0002823 - Abnormal femur morphology\n",
-      "DEBUG:prenatalppkt.etl.term_bin_factory:Created TermBin: HP:0002823 - normal=True\n",
-      "DEBUG:prenatalppkt.etl.extractors.observer:Processing measurement: Nuchal Fold\n",
-      "DEBUG:prenatalppkt.etl.extractors.observer:Nuchal Fold has percentile=0% (valid)\n",
-      "DEBUG:prenatalppkt.etl.extractors.observer:Creating TermBin for Nuchal Fold: value=10.0mm, percentile=0%, ga=<GestationalAge: 0 weeks, 0 days>\n",
-      "DEBUG:prenatalppkt.etl.term_bin_factory:Creating TermBin: name=Nuchal Fold, value=10.0mm, percentile=0.0%, ga=<GestationalAge: 0 weeks, 0 days>, method=None\n",
-      "WARNING:prenatalppkt.etl.term_bin_factory:No HPO mapping for optional measurement 'Nuchal Fold' - skipping. TODO(@VarenyaJ): Add HPO terms when available\n",
-      "DEBUG:prenatalppkt.etl.extractors.observer:Processing measurement: Cerebellum\n",
-      "DEBUG:prenatalppkt.etl.extractors.observer:Cerebellum has percentile=0% (valid)\n",
-      "DEBUG:prenatalppkt.etl.extractors.observer:Creating TermBin for Cerebellum: value=30.0mm, percentile=0%, ga=<GestationalAge: 27 weeks, 2 days>\n",
-      "DEBUG:prenatalppkt.etl.term_bin_factory:Creating TermBin: name=Cerebellum, value=30.0mm, percentile=0.0%, ga=<GestationalAge: 27 weeks, 2 days>, method=None\n",
-      "WARNING:prenatalppkt.etl.term_bin_factory:No HPO mapping for optional measurement 'Cerebellum' - skipping. TODO(@VarenyaJ): Add HPO terms when available\n",
-      "DEBUG:prenatalppkt.etl.extractors.observer:Successfully parsed 4 measurements\n",
-      "DEBUG:prenatalppkt.etl.term_bin_factory:Validating 4 TermBins for required measurements\n",
-      "DEBUG:prenatalppkt.etl.term_bin_factory:Found measurement: AC\n",
-      "DEBUG:prenatalppkt.etl.term_bin_factory:Found measurement: BPD\n",
-      "DEBUG:prenatalppkt.etl.term_bin_factory:Found measurement: HC\n",
-      "DEBUG:prenatalppkt.etl.term_bin_factory:Found measurement: Femur\n",
-      "DEBUG:prenatalppkt.etl.term_bin_factory:Present: {'HC', 'Femur', 'AC', 'BPD'}\n",
-      "DEBUG:prenatalppkt.etl.term_bin_factory:Required: {'HC', 'Femur', 'AC', 'BPD'}\n",
-      "DEBUG:prenatalppkt.etl.term_bin_factory:All required measurements present\n",
-      "INFO:prenatalppkt.etl.extractors.observer:Extracted 4 TermBins from Observer JSON\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "================================================================================\n",
-      "PRENATALPPKT ETL PIPELINE\n",
-      "Observer JSON → TermBins → Phenopacket v2.0\n",
-      "================================================================================\n",
-      "\n",
-      " STEP 1: Loading Observer JSON...\n",
-      "Loaded: tests/data/Apple_Sally_pretty.json\n",
-      "Fetuses: 1\n",
-      "Measurements: 6\n",
-      "Sample: AC = 22.62 cm\n",
-      "\n",
-      "  STEP 2: Extracting biometry measurements to TermBins...\n",
-      " Extracted 4 TermBins\n",
-      "\n",
-      "  [1] AC: 226.2 mm (55.6%) at 26w6d [Fetus 1]\n",
-      "      HPO: HP:0034207 - Abnormal fetal gastrointestinal system morphology\n",
-      "      Normal: True\n",
-      "\n",
-      "  [2] BPD: 66.8 mm (51.2%) at 26w6d [Fetus 1]\n",
-      "      HPO: HP:0000240 - Abnormality of skull size\n",
-      "      Normal: True\n",
-      "\n",
-      "  [3] HC: 250.0 mm (42.5%) at 26w6d [Fetus 1]\n",
-      "      HPO: HP:0000240 - Abnormality of skull size\n",
-      "      Normal: True\n",
-      "\n",
-      "  [4] Femur: 50.1 mm (46.8%) at 27w0d [Fetus 1]\n",
-      "      HPO: HP:0002823 - Abnormal femur morphology\n",
-      "      Normal: True\n",
-      "\n",
-      " STEP 3: Converting TermBins to PhenotypicFeatures...\n",
-      " Generated 4 PhenotypicFeatures\n",
-      "\n",
-      "  [1] HP:0034207\n",
-      "      Status: EXCLUDED (normal)\n",
-      "      Description: AC: 226.2 mm (55.6%) at 26w6d [Fetus 1]\n",
-      "\n",
-      "  [2] HP:0000240\n",
-      "      Status: EXCLUDED (normal)\n",
-      "      Description: BPD: 66.8 mm (51.2%) at 26w6d [Fetus 1]\n",
-      "\n",
-      "  [3] HP:0000240\n",
-      "      Status: EXCLUDED (normal)\n",
-      "      Description: HC: 250.0 mm (42.5%) at 26w6d [Fetus 1]\n",
-      "\n",
-      "  [4] HP:0002823\n",
-      "      Status: EXCLUDED (normal)\n",
-      "      Description: Femur: 50.1 mm (46.8%) at 27w0d [Fetus 1]\n",
-      "\n",
-      " STEP 4: Building Phenopacket v2.0...\n",
-      "✓ Phenopacket created successfully\n",
-      "\n",
-      "================================================================================\n",
-      " PHENOPACKET v2.0 OUTPUT (JSON)\n",
-      "================================================================================\n",
-      "{\n",
-      "  \"id\": \"apple-sally-fetus-1\",\n",
-      "  \"subject\": {\n",
-      "    \"id\": \"fetus-1\",\n",
-      "    \"time_at_last_encounter\": {\n",
-      "      \"gestational_age\": {\n",
-      "        \"weeks\": 26,\n",
-      "        \"days\": 6\n",
-      "      }\n",
-      "    }\n",
-      "  },\n",
-      "  \"phenotypic_features\": [\n",
-      "    {\n",
-      "      \"description\": \"AC: 226.2 mm (55.6%) at 26w6d [Fetus 1]\",\n",
-      "      \"type\": {\n",
-      "        \"id\": \"HP:0034207\",\n",
-      "        \"label\": \"Abnormal fetal gastrointestinal system morphology\"\n",
-      "      },\n",
-      "      \"excluded\": true,\n",
-      "      \"onset\": {\n",
-      "        \"gestational_age\": {\n",
-      "          \"weeks\": 26,\n",
-      "          \"days\": 6\n",
-      "        }\n",
-      "      }\n",
-      "    },\n",
-      "    {\n",
-      "      \"description\": \"BPD: 66.8 mm (51.2%) at 26w6d [Fetus 1]\",\n",
-      "      \"type\": {\n",
-      "        \"id\": \"HP:0000240\",\n",
-      "        \"label\": \"Abnormality of skull size\"\n",
-      "      },\n",
-      "      \"excluded\": true,\n",
-      "      \"onset\": {\n",
-      "        \"gestational_age\": {\n",
-      "          \"weeks\": 26,\n",
-      "          \"days\": 6\n",
-      "        }\n",
-      "      }\n",
-      "    },\n",
-      "    {\n",
-      "      \"description\": \"HC: 250.0 mm (42.5%) at 26w6d [Fetus 1]\",\n",
-      "      \"type\": {\n",
-      "        \"id\": \"HP:0000240\",\n",
-      "        \"label\": \"Abnormality of skull size\"\n",
-      "      },\n",
-      "      \"excluded\": true,\n",
-      "      \"onset\": {\n",
-      "        \"gestational_age\": {\n",
-      "          \"weeks\": 26,\n",
-      "          \"days\": 6\n",
-      "        }\n",
-      "      }\n",
-      "    },\n",
-      "    {\n",
-      "      \"description\": \"Femur: 50.1 mm (46.8%) at 27w0d [Fetus 1]\",\n",
-      "      \"type\": {\n",
-      "        \"id\": \"HP:0002823\",\n",
-      "        \"label\": \"Abnormal femur morphology\"\n",
-      "      },\n",
-      "      \"excluded\": true,\n",
-      "      \"onset\": {\n",
-      "        \"gestational_age\": {\n",
-      "          \"weeks\": 27\n",
-      "        }\n",
-      "      }\n",
-      "    }\n",
-      "  ],\n",
-      "  \"meta_data\": {\n",
-      "    \"created\": \"2026-01-26T15:21:08.287048Z\",\n",
-      "    \"created_by\": \"prenatalppkt-etl-pipeline\",\n",
-      "    \"resources\": [\n",
-      "      {\n",
-      "        \"id\": \"hp\",\n",
-      "        \"name\": \"Human Phenotype Ontology\",\n",
-      "        \"url\": \"http://purl.obolibrary.org/obo/hp.owl\",\n",
-      "        \"version\": \"2025-11-24\",\n",
-      "        \"namespace_prefix\": \"HP\",\n",
-      "        \"iri_prefix\": \"http://purl.obolibrary.org/obo/HP_\"\n",
-      "      }\n",
-      "    ],\n",
-      "    \"phenopacket_schema_version\": \"2.0\"\n",
-      "  }\n",
-      "}\n",
-      "\n",
-      "================================================================================\n",
-      " VALIDATION SUMMARY\n",
-      "================================================================================\n",
-      "\n",
-      " Phenopacket Structure:\n",
-      "   ID: apple-sally-fetus-1\n",
-      "   Subject ID: fetus-1\n",
-      "   Subject GA: 26w6d\n",
-      "   Sex: UNKNOWN_SEX\n",
-      "   Phenotypic Features: 4\n",
-      "   Schema Version: 2.0\n",
-      "   HPO Resource: 2025-11-24\n",
-      "\n",
-      " Phenotypic Features Detail:\n",
-      "\n",
-      "  [1] HP:0034207 - Abnormal fetal gastrointestinal system morphology\n",
-      "       Normal (excluded)\n",
-      "      Onset: 26w6d\n",
-      "      Detail: AC: 226.2 mm (55.6%) at 26w6d [Fetus 1]\n",
-      "\n",
-      "  [2] HP:0000240 - Abnormality of skull size\n",
-      "       Normal (excluded)\n",
-      "      Onset: 26w6d\n",
-      "      Detail: BPD: 66.8 mm (51.2%) at 26w6d [Fetus 1]\n",
-      "\n",
-      "  [3] HP:0000240 - Abnormality of skull size\n",
-      "       Normal (excluded)\n",
-      "      Onset: 26w6d\n",
-      "      Detail: HC: 250.0 mm (42.5%) at 26w6d [Fetus 1]\n",
-      "\n",
-      "  [4] HP:0002823 - Abnormal femur morphology\n",
-      "       Normal (excluded)\n",
-      "      Onset: 27w0d\n",
-      "      Detail: Femur: 50.1 mm (46.8%) at 27w0d [Fetus 1]\n",
-      "\n",
-      " Summary Statistics:\n",
-      "  Total features: 4\n",
-      "  Normal (excluded): 4\n",
-      "  Abnormal (observed): 0\n",
-      "\n",
-      "================================================================================\n",
-      " SUCCESS: Valid Phenopacket v2.0 generated\n",
-      "================================================================================\n",
-      "\n",
-      " Phenopacket saved to: output/apple_sally_phenopacket_v2.json\n",
-      "\n",
-      " Validation: Round-trip test...\n",
-      " Validation passed\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Initial Demo\n",
-    "\"\"\"\n",
-    "PRENATALPPKT ETL PIPELINE\n",
-    "Observer JSON → TermBins → Phenopacket v2.0\n",
-    "\n",
-    "Uses the official GA4GH phenopackets library per:\n",
-    "https://phenopacket-schema.readthedocs.io/en/latest/python.html\n",
-    "\"\"\"\n",
-    "\n",
-    "import json\n",
-    "import re\n",
-    "from datetime import datetime, timezone\n",
-    "from pathlib import Path\n",
-    "\n",
-    "from google.protobuf.json_format import MessageToJson\n",
-    "from google.protobuf.timestamp_pb2 import Timestamp\n",
-    "import phenopackets.schema.v2 as pps2\n",
-    "\n",
-    "from prenatalppkt.etl.extractors import observer\n",
-    "from prenatalppkt.gestational_age import GestationalAge\n",
-    "\n",
-    "print(\"=\" * 80)\n",
-    "print(\"PRENATALPPKT ETL PIPELINE\")\n",
-    "print(\"Observer JSON → TermBins → Phenopacket v2.0\")\n",
-    "print(\"=\" * 80)\n",
-    "\n",
-    "# -----------------------------------------------------------------------------\n",
-    "# STEP 1: Load Apple Sally Observer JSON\n",
-    "# -----------------------------------------------------------------------------\n",
-    "print(\"\\n STEP 1: Loading Observer JSON...\")\n",
-    "\n",
-    "data_path = Path(\"tests/data/Apple_Sally_pretty.json\")\n",
-    "with open(data_path) as f:\n",
-    "    observer_data = json.load(f)\n",
-    "\n",
-    "print(f\"Loaded: {data_path}\")\n",
-    "print(f\"Fetuses: {len(observer_data.get('fetuses', []))}\")\n",
-    "\n",
-    "first_fetus = observer_data[\"fetuses\"][0]\n",
-    "measurements = first_fetus.get(\"measurements\", [])\n",
-    "print(f\"Measurements: {len(measurements)}\")\n",
-    "print(\n",
-    "    f\"Sample: {measurements[0]['label']} = \"\n",
-    "    f\"{measurements[0]['value']} {measurements[0]['unit_of_measure']}\"\n",
-    ")\n",
-    "\n",
-    "# -----------------------------------------------------------------------------\n",
-    "# STEP 2: Extract TermBins using Observer extractor\n",
-    "# -----------------------------------------------------------------------------\n",
-    "print(\"\\n  STEP 2: Extracting biometry measurements to TermBins...\")\n",
-    "\n",
-    "term_bins = observer.extract(observer_data)\n",
-    "print(f\" Extracted {len(term_bins)} TermBins\")\n",
-    "\n",
-    "for i, tb in enumerate(term_bins, 1):\n",
-    "    print(f\"\\n  [{i}] {tb.description}\")\n",
-    "    print(f\"      HPO: {tb.hpo_id} - {tb.hpo_label}\")\n",
-    "    print(f\"      Normal: {tb.normal}\")\n",
-    "\n",
-    "# -----------------------------------------------------------------------------\n",
-    "# STEP 3: Convert TermBins → Phenotypic Features (using phenopackets library)\n",
-    "# -----------------------------------------------------------------------------\n",
-    "print(\"\\n STEP 3: Converting TermBins to PhenotypicFeatures...\")\n",
-    "\n",
-    "\n",
-    "def parse_ga_from_description(description: str) -> tuple[int, int]:\n",
-    "    \"\"\"Extract weeks and days from TermBin description.\"\"\"\n",
-    "    match = re.search(r\"at (\\d+)w(\\d+)d\", description)\n",
-    "    if match:\n",
-    "        return int(match.group(1)), int(match.group(2))\n",
-    "    # Fallback\n",
-    "    first_m = observer_data[\"fetuses\"][0][\"measurements\"][0]\n",
-    "    ga = GestationalAge.from_weeks(first_m.get(\"calculated_ega\", 26.9))\n",
-    "    return ga.weeks, ga.days\n",
-    "\n",
-    "\n",
-    "phenotypic_features = []\n",
-    "\n",
-    "for tb in term_bins:\n",
-    "    weeks, days = parse_ga_from_description(tb.description)\n",
-    "\n",
-    "    # Create GestationalAge message\n",
-    "    gestational_age = pps2.GestationalAge(weeks=weeks, days=days)\n",
-    "\n",
-    "    # Create TimeElement with gestational_age\n",
-    "    onset = pps2.TimeElement(gestational_age=gestational_age)\n",
-    "\n",
-    "    # Create OntologyClass for the HPO term\n",
-    "    hpo_type = pps2.OntologyClass(id=tb.hpo_id, label=tb.hpo_label)\n",
-    "\n",
-    "    # Create PhenotypicFeature\n",
-    "    pf = pps2.PhenotypicFeature(\n",
-    "        type=hpo_type,\n",
-    "        excluded=tb.normal,  # If normal=True, abnormality is excluded\n",
-    "        onset=onset,\n",
-    "        description=tb.description,\n",
-    "    )\n",
-    "\n",
-    "    phenotypic_features.append(pf)\n",
-    "\n",
-    "print(f\" Generated {len(phenotypic_features)} PhenotypicFeatures\")\n",
-    "\n",
-    "for i, pf in enumerate(phenotypic_features, 1):\n",
-    "    status = \"EXCLUDED (normal)\" if pf.excluded else \"OBSERVED (abnormal)\"\n",
-    "    print(f\"\\n  [{i}] {pf.type.id}\")\n",
-    "    print(f\"      Status: {status}\")\n",
-    "    print(f\"      Description: {pf.description}\")\n",
-    "\n",
-    "# -----------------------------------------------------------------------------\n",
-    "# STEP 4: Build Complete Phenopacket v2.0\n",
-    "# -----------------------------------------------------------------------------\n",
-    "print(\"\\n STEP 4: Building Phenopacket v2.0...\")\n",
-    "\n",
-    "# Get subject GA from first measurement\n",
-    "first_measurement = observer_data[\"fetuses\"][0][\"measurements\"][0]\n",
-    "subject_ga_weeks = first_measurement.get(\"calculated_ega\", 26.9)\n",
-    "subject_ga = GestationalAge.from_weeks(subject_ga_weeks)\n",
-    "\n",
-    "# Create Individual (subject) with GestationalAge\n",
-    "subject_time = pps2.TimeElement(\n",
-    "    gestational_age=pps2.GestationalAge(weeks=subject_ga.weeks, days=subject_ga.days)\n",
-    ")\n",
-    "\n",
-    "subject = pps2.Individual(\n",
-    "    id=\"fetus-1\",\n",
-    "    sex=pps2.Sex.UNKNOWN_SEX,\n",
-    "    time_at_last_encounter=subject_time,\n",
-    ")\n",
-    "\n",
-    "# Create timestamp for metadata\n",
-    "now = datetime.now(timezone.utc)\n",
-    "created_timestamp = Timestamp()\n",
-    "created_timestamp.FromDatetime(now)\n",
-    "\n",
-    "# Create HPO Resource\n",
-    "hpo_resource = pps2.Resource(\n",
-    "    id=\"hp\",\n",
-    "    name=\"Human Phenotype Ontology\",\n",
-    "    url=\"http://purl.obolibrary.org/obo/hp.owl\",\n",
-    "    version=\"2025-11-24\",\n",
-    "    namespace_prefix=\"HP\",\n",
-    "    iri_prefix=\"http://purl.obolibrary.org/obo/HP_\",\n",
-    ")\n",
-    "\n",
-    "# Create MetaData\n",
-    "metadata = pps2.MetaData(\n",
-    "    created=created_timestamp,\n",
-    "    created_by=\"prenatalppkt-etl-pipeline\",\n",
-    "    phenopacket_schema_version=\"2.0\",\n",
-    ")\n",
-    "metadata.resources.append(hpo_resource)\n",
-    "\n",
-    "# Create the Phenopacket\n",
-    "phenopacket = pps2.Phenopacket(\n",
-    "    id=\"apple-sally-fetus-1\",\n",
-    "    subject=subject,\n",
-    "    meta_data=metadata,\n",
-    ")\n",
-    "phenopacket.phenotypic_features.extend(phenotypic_features)\n",
-    "\n",
-    "print(\"✓ Phenopacket created successfully\")\n",
-    "\n",
-    "# -----------------------------------------------------------------------------\n",
-    "# STEP 5: Display Results as JSON\n",
-    "# -----------------------------------------------------------------------------\n",
-    "print(\"\\n\" + \"=\" * 80)\n",
-    "print(\" PHENOPACKET v2.0 OUTPUT (JSON)\")\n",
-    "print(\"=\" * 80)\n",
-    "\n",
-    "# Convert protobuf message to JSON using official method\n",
-    "phenopacket_json = MessageToJson(phenopacket, preserving_proto_field_name=True)\n",
-    "print(phenopacket_json)\n",
-    "\n",
-    "# -----------------------------------------------------------------------------\n",
-    "# STEP 6: Validation Summary\n",
-    "# -----------------------------------------------------------------------------\n",
-    "print(\"\\n\" + \"=\" * 80)\n",
-    "print(\" VALIDATION SUMMARY\")\n",
-    "print(\"=\" * 80)\n",
-    "\n",
-    "print(\"\\n Phenopacket Structure:\")\n",
-    "print(f\"   ID: {phenopacket.id}\")\n",
-    "print(f\"   Subject ID: {phenopacket.subject.id}\")\n",
-    "print(f\"   Subject GA: {subject_ga.weeks}w{subject_ga.days}d\")\n",
-    "print(f\"   Sex: {pps2.Sex.Name(phenopacket.subject.sex)}\")\n",
-    "print(f\"   Phenotypic Features: {len(phenopacket.phenotypic_features)}\")\n",
-    "print(f\"   Schema Version: {phenopacket.meta_data.phenopacket_schema_version}\")\n",
-    "print(f\"   HPO Resource: {phenopacket.meta_data.resources[0].version}\")\n",
-    "\n",
-    "print(\"\\n Phenotypic Features Detail:\")\n",
-    "for i, pf in enumerate(phenopacket.phenotypic_features, 1):\n",
-    "    status = \" Normal (excluded)\" if pf.excluded else \"Abnormal (observed)\"\n",
-    "    ga = pf.onset.gestational_age\n",
-    "    print(f\"\\n  [{i}] {pf.type.id} - {pf.type.label}\")\n",
-    "    print(f\"      {status}\")\n",
-    "    print(f\"      Onset: {ga.weeks}w{ga.days}d\")\n",
-    "    print(f\"      Detail: {pf.description}\")\n",
-    "\n",
-    "# Count normal vs abnormal\n",
-    "normal_count = sum(1 for pf in phenopacket.phenotypic_features if pf.excluded)\n",
-    "abnormal_count = len(phenopacket.phenotypic_features) - normal_count\n",
-    "\n",
-    "print(\"\\n Summary Statistics:\")\n",
-    "print(f\"  Total features: {len(phenopacket.phenotypic_features)}\")\n",
-    "print(f\"  Normal (excluded): {normal_count}\")\n",
-    "print(f\"  Abnormal (observed): {abnormal_count}\")\n",
-    "\n",
-    "print(\"\\n\" + \"=\" * 80)\n",
-    "print(\" SUCCESS: Valid Phenopacket v2.0 generated\")\n",
-    "print(\"=\" * 80)\n",
-    "\n",
-    "# Save to file\n",
-    "output_path = Path(\"output/apple_sally_phenopacket_v2.json\")\n",
-    "output_path.parent.mkdir(exist_ok=True)\n",
-    "with open(output_path, \"w\") as f:\n",
-    "    f.write(phenopacket_json)\n",
-    "print(f\"\\n Phenopacket saved to: {output_path}\")\n",
-    "\n",
-    "# Validate by round-tripping\n",
-    "print(\"\\n Validation: Round-trip test...\")\n",
-    "from google.protobuf.json_format import Parse\n",
-    "\n",
-    "parsed_back = Parse(phenopacket_json, pps2.Phenopacket())\n",
-    "assert parsed_back.id == phenopacket.id\n",
-    "assert len(parsed_back.phenotypic_features) == len(phenopacket.phenotypic_features)\n",
-    "print(\" Validation passed\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "id": "3685f9e5",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "DEBUG:prenatalppkt.mapping_loader:Loaded 8 bins for head_circumference\n",
-      "DEBUG:prenatalppkt.mapping_loader:Loaded 8 bins for biparietal_diameter\n",
-      "DEBUG:prenatalppkt.mapping_loader:Loaded 8 bins for femur_length\n",
-      "DEBUG:prenatalppkt.mapping_loader:Loaded 8 bins for abdominal_circumference\n",
-      "DEBUG:prenatalppkt.mapping_loader:Loaded 8 bins for occipitofrontal_diameter\n",
-      "DEBUG:prenatalppkt.etl.term_bin_factory:Loaded mappings for: ['head_circumference', 'biparietal_diameter', 'femur_length', 'abdominal_circumference', 'occipitofrontal_diameter']\n",
-      "DEBUG:prenatalppkt.etl.extractors.observer:Starting Observer JSON extraction\n",
-      "DEBUG:prenatalppkt.etl.extractors.observer:Processing fetus 1\n",
-      "DEBUG:prenatalppkt.etl.extractors.observer:Found 6 measurements\n",
-      "DEBUG:prenatalppkt.etl.extractors.observer:Processing measurement: AC\n",
-      "DEBUG:prenatalppkt.etl.extractors.observer:AC has percentile=55.6% (valid)\n",
-      "DEBUG:prenatalppkt.etl.extractors.observer:Creating TermBin for AC: value=226.20000000000002mm, percentile=55.6%, ga=<GestationalAge: 26 weeks, 6 days>\n",
-      "DEBUG:prenatalppkt.etl.term_bin_factory:Creating TermBin: name=AC, value=226.20000000000002mm, percentile=55.6%, ga=<GestationalAge: 26 weeks, 6 days>, method=None\n",
-      "DEBUG:prenatalppkt.etl.term_bin_factory:Selected HPO: HP:0034207 - Abnormal fetal gastrointestinal system morphology\n",
-      "DEBUG:prenatalppkt.etl.term_bin_factory:Created TermBin: HP:0034207 - normal=True\n",
-      "DEBUG:prenatalppkt.etl.extractors.observer:Processing measurement: BPD\n",
-      "DEBUG:prenatalppkt.etl.extractors.observer:BPD has percentile=51.2% (valid)\n",
-      "DEBUG:prenatalppkt.etl.extractors.observer:Creating TermBin for BPD: value=66.8mm, percentile=51.2%, ga=<GestationalAge: 26 weeks, 6 days>\n",
-      "DEBUG:prenatalppkt.etl.term_bin_factory:Creating TermBin: name=BPD, value=66.8mm, percentile=51.2%, ga=<GestationalAge: 26 weeks, 6 days>, method=None\n",
-      "DEBUG:prenatalppkt.etl.term_bin_factory:Selected HPO: HP:0000240 - Abnormality of skull size\n",
-      "DEBUG:prenatalppkt.etl.term_bin_factory:Created TermBin: HP:0000240 - normal=True\n",
-      "DEBUG:prenatalppkt.etl.extractors.observer:Processing measurement: HC\n",
-      "DEBUG:prenatalppkt.etl.extractors.observer:HC has percentile=42.5% (valid)\n",
-      "DEBUG:prenatalppkt.etl.extractors.observer:Creating TermBin for HC: value=250.0mm, percentile=42.5%, ga=<GestationalAge: 26 weeks, 6 days>\n",
-      "DEBUG:prenatalppkt.etl.term_bin_factory:Creating TermBin: name=HC, value=250.0mm, percentile=42.5%, ga=<GestationalAge: 26 weeks, 6 days>, method=None\n",
-      "DEBUG:prenatalppkt.etl.term_bin_factory:Selected HPO: HP:0000240 - Abnormality of skull size\n",
-      "DEBUG:prenatalppkt.etl.term_bin_factory:Created TermBin: HP:0000240 - normal=True\n",
-      "DEBUG:prenatalppkt.etl.extractors.observer:Processing measurement: Femur\n",
-      "DEBUG:prenatalppkt.etl.extractors.observer:Femur has percentile=46.8% (valid)\n",
-      "DEBUG:prenatalppkt.etl.extractors.observer:Creating TermBin for Femur: value=50.099999999999994mm, percentile=46.8%, ga=<GestationalAge: 27 weeks, 0 days>\n",
-      "DEBUG:prenatalppkt.etl.term_bin_factory:Creating TermBin: name=Femur, value=50.099999999999994mm, percentile=46.8%, ga=<GestationalAge: 27 weeks, 0 days>, method=None\n",
-      "DEBUG:prenatalppkt.etl.term_bin_factory:Selected HPO: HP:0002823 - Abnormal femur morphology\n",
-      "DEBUG:prenatalppkt.etl.term_bin_factory:Created TermBin: HP:0002823 - normal=True\n",
-      "DEBUG:prenatalppkt.etl.extractors.observer:Processing measurement: Nuchal Fold\n",
-      "DEBUG:prenatalppkt.etl.extractors.observer:Nuchal Fold has percentile=0% (valid)\n",
-      "DEBUG:prenatalppkt.etl.extractors.observer:Creating TermBin for Nuchal Fold: value=10.0mm, percentile=0%, ga=<GestationalAge: 0 weeks, 0 days>\n",
-      "DEBUG:prenatalppkt.etl.term_bin_factory:Creating TermBin: name=Nuchal Fold, value=10.0mm, percentile=0.0%, ga=<GestationalAge: 0 weeks, 0 days>, method=None\n",
-      "WARNING:prenatalppkt.etl.term_bin_factory:No HPO mapping for optional measurement 'Nuchal Fold' - skipping. TODO(@VarenyaJ): Add HPO terms when available\n",
-      "DEBUG:prenatalppkt.etl.extractors.observer:Processing measurement: Cerebellum\n",
-      "DEBUG:prenatalppkt.etl.extractors.observer:Cerebellum has percentile=0% (valid)\n",
-      "DEBUG:prenatalppkt.etl.extractors.observer:Creating TermBin for Cerebellum: value=30.0mm, percentile=0%, ga=<GestationalAge: 27 weeks, 2 days>\n",
-      "DEBUG:prenatalppkt.etl.term_bin_factory:Creating TermBin: name=Cerebellum, value=30.0mm, percentile=0.0%, ga=<GestationalAge: 27 weeks, 2 days>, method=None\n",
-      "WARNING:prenatalppkt.etl.term_bin_factory:No HPO mapping for optional measurement 'Cerebellum' - skipping. TODO(@VarenyaJ): Add HPO terms when available\n",
-      "DEBUG:prenatalppkt.etl.extractors.observer:Successfully parsed 4 measurements\n",
-      "DEBUG:prenatalppkt.etl.term_bin_factory:Validating 4 TermBins for required measurements\n",
-      "DEBUG:prenatalppkt.etl.term_bin_factory:Found measurement: AC\n",
-      "DEBUG:prenatalppkt.etl.term_bin_factory:Found measurement: BPD\n",
-      "DEBUG:prenatalppkt.etl.term_bin_factory:Found measurement: HC\n",
-      "DEBUG:prenatalppkt.etl.term_bin_factory:Found measurement: Femur\n",
-      "DEBUG:prenatalppkt.etl.term_bin_factory:Present: {'HC', 'Femur', 'AC', 'BPD'}\n",
-      "DEBUG:prenatalppkt.etl.term_bin_factory:Required: {'HC', 'Femur', 'AC', 'BPD'}\n",
-      "DEBUG:prenatalppkt.etl.term_bin_factory:All required measurements present\n",
-      "INFO:prenatalppkt.etl.extractors.observer:Extracted 4 TermBins from Observer JSON\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      " STEP 1: Loading Observer JSON...\n",
-      "Loaded: tests/data/Apple_Sally_pretty.json\n",
-      "Fetuses: 1\n",
-      "Measurements: 6\n",
-      "Sample: AC =  22.62 cm\n",
-      "\n",
-      "  STEP 2: Extracting biometry measurements to TermBins...\n",
-      " Extracted 4 TermBins\n",
-      "\n",
-      "  [1] AC: 226.2 mm (55.6%) at 26w6d [Fetus 1]\n",
-      "      HPO: HP:0034207 - Abnormal fetal gastrointestinal system morphology\n",
-      "      Normal: True\n",
-      "\n",
-      "  [2] BPD: 66.8 mm (51.2%) at 26w6d [Fetus 1]\n",
-      "      HPO: HP:0000240 - Abnormality of skull size\n",
-      "      Normal: True\n",
-      "\n",
-      "  [3] HC: 250.0 mm (42.5%) at 26w6d [Fetus 1]\n",
-      "      HPO: HP:0000240 - Abnormality of skull size\n",
-      "      Normal: True\n",
-      "\n",
-      "  [4] Femur: 50.1 mm (46.8%) at 27w0d [Fetus 1]\n",
-      "      HPO: HP:0002823 - Abnormal femur morphology\n",
-      "      Normal: True\n",
-      "\n",
-      " STEP 3: Converting TermBins to PhenotypicFeatures...\n",
-      " Generated 4 PhenotypicFeatures\n",
-      "\n",
-      "  [1] HP:0034207\n",
-      "      Status: EXCLUDED (normal)\n",
-      "      Description: AC: 226.2 mm (55.6%) at 26w6d [Fetus 1]\n",
-      "\n",
-      "  [2] HP:0000240\n",
-      "      Status: EXCLUDED (normal)\n",
-      "      Description: BPD: 66.8 mm (51.2%) at 26w6d [Fetus 1]\n",
-      "\n",
-      "  [3] HP:0000240\n",
-      "      Status: EXCLUDED (normal)\n",
-      "      Description: HC: 250.0 mm (42.5%) at 26w6d [Fetus 1]\n",
-      "\n",
-      "  [4] HP:0002823\n",
-      "      Status: EXCLUDED (normal)\n",
-      "      Description: Femur: 50.1 mm (46.8%) at 27w0d [Fetus 1]\n",
-      "\n",
-      " STEP 4: Building Phenopacket v2.0...\n",
-      "{\n",
-      "  \"id\": \"apple-sally-fetus-1\",\n",
-      "  \"subject\": {\n",
-      "    \"id\": \"fetus-1\",\n",
-      "    \"time_at_last_encounter\": {\n",
-      "      \"gestational_age\": {\n",
-      "        \"weeks\": 26,\n",
-      "        \"days\": 6\n",
-      "      }\n",
-      "    }\n",
-      "  },\n",
-      "  \"phenotypic_features\": [\n",
-      "    {\n",
-      "      \"description\": \"AC: 226.2 mm (55.6%) at 26w6d [Fetus 1]\",\n",
-      "      \"type\": {\n",
-      "        \"id\": \"HP:0034207\",\n",
-      "        \"label\": \"Abnormal fetal gastrointestinal system morphology\"\n",
-      "      },\n",
-      "      \"excluded\": true,\n",
-      "      \"onset\": {\n",
-      "        \"gestational_age\": {\n",
-      "          \"weeks\": 26,\n",
-      "          \"days\": 6\n",
-      "        }\n",
-      "      }\n",
-      "    },\n",
-      "    {\n",
-      "      \"description\": \"BPD: 66.8 mm (51.2%) at 26w6d [Fetus 1]\",\n",
-      "      \"type\": {\n",
-      "        \"id\": \"HP:0000240\",\n",
-      "        \"label\": \"Abnormality of skull size\"\n",
-      "      },\n",
-      "      \"excluded\": true,\n",
-      "      \"onset\": {\n",
-      "        \"gestational_age\": {\n",
-      "          \"weeks\": 26,\n",
-      "          \"days\": 6\n",
-      "        }\n",
-      "      }\n",
-      "    },\n",
-      "    {\n",
-      "      \"description\": \"HC: 250.0 mm (42.5%) at 26w6d [Fetus 1]\",\n",
-      "      \"type\": {\n",
-      "        \"id\": \"HP:0000240\",\n",
-      "        \"label\": \"Abnormality of skull size\"\n",
-      "      },\n",
-      "      \"excluded\": true,\n",
-      "      \"onset\": {\n",
-      "        \"gestational_age\": {\n",
-      "          \"weeks\": 26,\n",
-      "          \"days\": 6\n",
-      "        }\n",
-      "      }\n",
-      "    },\n",
-      "    {\n",
-      "      \"description\": \"Femur: 50.1 mm (46.8%) at 27w0d [Fetus 1]\",\n",
-      "      \"type\": {\n",
-      "        \"id\": \"HP:0002823\",\n",
-      "        \"label\": \"Abnormal femur morphology\"\n",
-      "      },\n",
-      "      \"excluded\": true,\n",
-      "      \"onset\": {\n",
-      "        \"gestational_age\": {\n",
-      "          \"weeks\": 27\n",
-      "        }\n",
-      "      }\n",
-      "    }\n",
-      "  ],\n",
-      "  \"meta_data\": {\n",
-      "    \"created\": \"2026-01-26T15:21:08.337338Z\",\n",
-      "    \"created_by\": \"prenatalppkt-etl-pipeline\",\n",
-      "    \"resources\": [\n",
-      "      {\n",
-      "        \"id\": \"hp\",\n",
-      "        \"name\": \"Human Phenotype Ontology\",\n",
-      "        \"url\": \"http://purl.obolibrary.org/obo/hp.owl\",\n",
-      "        \"version\": \"2025-11-24\",\n",
-      "        \"namespace_prefix\": \"HP\",\n",
-      "        \"iri_prefix\": \"http://purl.obolibrary.org/obo/HP_\"\n",
-      "      }\n",
-      "    ],\n",
-      "    \"phenopacket_schema_version\": \"2.0\"\n",
-      "  }\n",
-      "}\n"
+      "DEBUG:hpotk.util:Using default encoding 'utf-8'\n",
+      "DEBUG:hpotk.util:Opening /tmp/hp.json\n",
+      "DEBUG:hpotk.util:Looks like a local file: /tmp/hp.json\n",
+      "DEBUG:hpotk.util:Looks like decompressed data\n"
      ]
-    }
-   ],
-   "source": [
-    "# Shorter Test\n",
-    "\n",
-    "import json\n",
-    "import re\n",
-    "from datetime import datetime, timezone\n",
-    "from pathlib import Path\n",
-    "from google.protobuf.json_format import MessageToJson\n",
-    "from google.protobuf.timestamp_pb2 import Timestamp\n",
-    "import phenopackets.schema.v2 as pps2\n",
-    "from prenatalppkt.etl.extractors import observer\n",
-    "from prenatalppkt.gestational_age import GestationalAge\n",
-    "\n",
-    "print(\"\\n STEP 1: Loading Observer JSON...\")\n",
-    "data_path = Path(\"tests/data/Apple_Sally_pretty.json\")\n",
-    "with open(data_path) as f:\n",
-    "    observer_data = json.load(f)\n",
-    "print(f\"Loaded: {data_path}\")\n",
-    "print(f\"Fetuses: {len(observer_data.get('fetuses', []))}\")\n",
-    "\n",
-    "first_fetus = observer_data[\"fetuses\"][0]\n",
-    "measurements = first_fetus.get(\"measurements\", [])\n",
-    "print(f\"Measurements: {len(measurements)}\")\n",
-    "print(f\"Sample: {measurements[0]['label']} = \", f\"{measurements[0]['value']} {measurements[0]['unit_of_measure']}\")\n",
-    "\n",
-    "print(\"\\n  STEP 2: Extracting biometry measurements to TermBins...\")\n",
-    "term_bins = observer.extract(observer_data)\n",
-    "print(f\" Extracted {len(term_bins)} TermBins\")\n",
-    "for i, tb in enumerate(term_bins, 1):\n",
-    "    print(f\"\\n  [{i}] {tb.description}\")\n",
-    "    print(f\"      HPO: {tb.hpo_id} - {tb.hpo_label}\")\n",
-    "    print(f\"      Normal: {tb.normal}\")\n",
-    "\n",
-    "print(\"\\n STEP 3: Converting TermBins to PhenotypicFeatures...\")\n",
-    "def parse_ga_from_description(description: str) -> tuple[int, int]:\n",
-    "    \"\"\"Extract weeks and days from TermBin description.\"\"\"\n",
-    "    match = re.search(r\"at (\\d+)w(\\d+)d\", description)\n",
-    "    if match:\n",
-    "        return int(match.group(1)), int(match.group(2))\n",
-    "    # Fallback\n",
-    "    first_m = observer_data[\"fetuses\"][0][\"measurements\"][0]\n",
-    "    ga = GestationalAge.from_weeks(first_m.get(\"calculated_ega\", 26.9))\n",
-    "    return ga.weeks, ga.days\n",
-    "phenotypic_features = []\n",
-    "for tb in term_bins:\n",
-    "    weeks, days = parse_ga_from_description(tb.description)\n",
-    "    # Create GestationalAge message\n",
-    "    gestational_age = pps2.GestationalAge(weeks=weeks, days=days)\n",
-    "    # Create TimeElement with gestational_age\n",
-    "    onset = pps2.TimeElement(gestational_age=gestational_age)\n",
-    "    # Create OntologyClass for the HPO term\n",
-    "    hpo_type = pps2.OntologyClass(id=tb.hpo_id, label=tb.hpo_label)\n",
-    "    # Create PhenotypicFeature\n",
-    "    pf = pps2.PhenotypicFeature( type=hpo_type, excluded=tb.normal, onset=onset, description=tb.description)\n",
-    "    phenotypic_features.append(pf)\n",
-    "print(f\" Generated {len(phenotypic_features)} PhenotypicFeatures\")\n",
-    "for i, pf in enumerate(phenotypic_features, 1):\n",
-    "    status = \"EXCLUDED (normal)\" if pf.excluded else \"OBSERVED (abnormal)\"\n",
-    "    print(f\"\\n  [{i}] {pf.type.id}\")\n",
-    "    print(f\"      Status: {status}\")\n",
-    "    print(f\"      Description: {pf.description}\")\n",
-    "\n",
-    "\n",
-    "print(\"\\n STEP 4: Building Phenopacket v2.0...\")\n",
-    "# Get subject GA from first measurement\n",
-    "first_measurement = observer_data[\"fetuses\"][0][\"measurements\"][0]\n",
-    "subject_ga_weeks = first_measurement.get(\"calculated_ega\", 26.9)\n",
-    "subject_ga = GestationalAge.from_weeks(subject_ga_weeks)\n",
-    "# Create Individual (subject) with GestationalAge\n",
-    "subject_time = pps2.TimeElement(gestational_age=pps2.GestationalAge(weeks=subject_ga.weeks, days=subject_ga.days))\n",
-    "\n",
-    "subject = pps2.Individual(id=\"fetus-1\", sex=pps2.Sex.UNKNOWN_SEX, time_at_last_encounter=subject_time)\n",
-    "\n",
-    "# Create timestamp for metadata\n",
-    "now = datetime.now(timezone.utc)\n",
-    "created_timestamp = Timestamp()\n",
-    "created_timestamp.FromDatetime(now)\n",
-    "\n",
-    "# Create HPO Resource\n",
-    "hpo_resource = pps2.Resource(id=\"hp\", name=\"Human Phenotype Ontology\", url=\"http://purl.obolibrary.org/obo/hp.owl\", version=\"2025-11-24\", namespace_prefix=\"HP\", iri_prefix=\"http://purl.obolibrary.org/obo/HP_\")\n",
-    "\n",
-    "# Create MetaData\n",
-    "metadata = pps2.MetaData(created=created_timestamp, created_by=\"prenatalppkt-etl-pipeline\", phenopacket_schema_version=\"2.0\")\n",
-    "metadata.resources.append(hpo_resource)\n",
-    "\n",
-    "# Create the Phenopacket\n",
-    "phenopacket = pps2.Phenopacket(id=\"apple-sally-fetus-1\", subject=subject, meta_data=metadata)\n",
-    "phenopacket.phenotypic_features.extend(phenotypic_features)\n",
-    "\n",
-    "# Convert protobuf message to JSON using official method\n",
-    "phenopacket_json = MessageToJson(phenopacket, preserving_proto_field_name=True)\n",
-    "print(phenopacket_json)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "1e24f7ff",
-   "metadata": {},
-   "source": [
-    "# New"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "0f79d3fe",
-   "metadata": {},
-   "outputs": [
+    },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
       "================================================================================\n",
-      "PRENATALPPKT EXPANDED ETL PIPELINE\n",
-      "Observer JSON -> Biometry + Clinical Sections -> Phenopacket v2.0\n",
+      "PRENATALPPKT ETL PIPELINE\n",
+      "Observer JSON -> Section Parsing -> Phenopacket v2.0\n",
       "================================================================================\n",
       "\n",
-      "[STEP 1] Loading HPO Concept Recognizer...\n"
+      "[STEP 1] Loading the HPO Concept Recognizer...\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "DEBUG:hpotk.util:Using default encoding 'utf-8'\n",
-      "DEBUG:hpotk.util:Opening /tmp/hp.json\n",
-      "DEBUG:hpotk.util:Looks like a local file: /tmp/hp.json\n",
-      "DEBUG:hpotk.util:Looks like decompressed data\n",
       "DEBUG:hpotk.ontology.load.obographs._load:Extracting ontology terms\n",
       "DEBUG:hpotk.ontology.load.obographs._factory:Unknown synonym type http://purl.obolibrary.org/obo/hp#allelic_requirement\n",
       "DEBUG:hpotk.ontology.load.obographs._factory:Unknown synonym type http://purl.obolibrary.org/obo/hp#allelic_requirement\n",
@@ -905,75 +118,105 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "  ? HPO version: 2025-10-22\n",
-      "  ? Concept recognizer: HpoExactConceptRecognizer\n",
+      "HPO version: 2025-10-22\n",
       "\n",
       "[STEP 2] Loading Observer JSON...\n",
-      "  ? Loaded: tests/data/Apple_Sally_pretty.json\n",
-      "  ? Fetuses: 1\n",
-      "  ? Measurements: 6\n",
-      "  ? Sample: AC = 22.62 cm\n",
+      "Loaded: Apple_Sally_pretty.json\n",
+      "Fetuses: 1\n",
       "\n",
-      "[STEP 3] Extracting biometry measurements to TermBins...\n",
-      "  ? Extracted 4 TermBins:\n",
-      "    [1] HP:0034207 (Abnormal fetal gastrointestinal system morphology) - ? Normal\n",
-      "        AC: 226.2 mm (55.6%) at 26w6d [Fetus 1]\n",
-      "    [2] HP:0000240 (Abnormality of skull size) - ? Normal\n",
-      "        BPD: 66.8 mm (51.2%) at 26w6d [Fetus 1]\n",
-      "    [3] HP:0000240 (Abnormality of skull size) - ? Normal\n",
-      "        HC: 250.0 mm (42.5%) at 26w6d [Fetus 1]\n",
-      "    [4] HP:0002823 (Abnormal femur morphology) - ? Normal\n",
-      "        Femur: 50.1 mm (46.8%) at 27w0d [Fetus 1]\n",
+      "[STEP 3] Extracting biometry measurements...\n",
+      "Extracted 4 TermBins\n",
+      "    - AC: 226.2 mm (55.6%) at 26w6d [Fetus 1] [Normal]\n",
+      "      HPO: HP:0034207 - Abnormal fetal gastrointestinal system morphology\n",
+      "    - BPD: 66.8 mm (51.2%) at 26w6d [Fetus 1] [Normal]\n",
+      "      HPO: HP:0000240 - Abnormality of skull size\n",
+      "    - HC: 250.0 mm (42.5%) at 26w6d [Fetus 1] [Normal]\n",
+      "      HPO: HP:0000240 - Abnormality of skull size\n",
+      "    - Femur: 50.1 mm (46.8%) at 27w0d [Fetus 1] [Normal]\n",
+      "      HPO: HP:0002823 - Abnormal femur morphology\n",
       "\n",
       "[STEP 4] Parsing clinical sections...\n",
       "\n",
-      "  --- Clinical Indication ---\n",
-      "  Indication: (not found)\n",
-      "\n",
-      "  --- Pregnancy Dating ---\n",
-      "  LMP: 0001-01-01\n",
-      "  EDD: None\n",
-      "  Dating Method: None\n",
-      "  GA by Ultrasound: None\n",
-      "\n",
-      "  --- Clinical Impression ---\n",
-      "  Impression (1294 chars): \"The patient was referred for a fetal anatomical survey.    Sonographic measurements were consistent with the expected gestational age. The amniotic fluid volume was normal. A detailed fetal anatomic s...\"\n",
-      "  Growth Assessment: None\n",
+      "  [4a] Clinical Indication:\n",
+      "       Reason: N/A...\n",
+      "\n",
+      "  [4b] Pregnancy Dating:\n",
+      "       LMP: 0001-01-01\n",
+      "       EDD: None\n",
+      "       GA at exam: N/A weeks\n",
+      "\n",
+      "  [4c] Clinical Impression:\n",
+      "S...   Text: The patient was referred for a fetal anatomical survey.  \n",
+      "       HPO terms found: 4\n",
+      "\n",
+      "[STEP 5] Parsing fetal-specific sections...\n",
+      "\n",
+      "  [5a] Fetal Anatomy:\n",
+      "       Normal structures: 0\n",
+      "       Abnormal structures: 0\n",
+      "       Not visualized: 0\n",
+      "       Anomalies detected: 0\n",
+      "       HPO terms extracted: 1\n",
+      "\n",
+      "  [5b] Estimated Fetal Weight:\n",
+      "       EFW: 1014.8 grams\n",
+      "       Percentile: 55.6%\n",
+      "       Method: Hadlock (AC, FL, HC)\n",
+      "       Growth category: AGA\n",
+      "       Within normal range: True\n",
+      "\n",
+      "  [5c] Fetal Ratios:\n",
+      "       Ratios calculated: 3\n",
+      "       All within range: True\n",
+      "       Proportionality: Normal\n",
+      "         ? HC/AC: 1.105\n",
+      "         ? FL/AC: 22.149\n",
+      "         ? FL/BPD: 75\n",
+      "\n",
+      "[STEP 6] Building PhenotypicFeatures...\n",
+      "Growth category AGA (normal) - no HPO term needed\n",
+      "\n",
+      "  Summary by source:\n",
+      "    - Biometry: 4 features\n",
+      "    - Clinical Text: 4 features\n",
+      "    - Anatomy: 1 features\n",
+      "  Total: 9 PhenotypicFeatures\n",
+      "\n",
+      "[STEP 7] Assembling Phenopacket v2.0...\n",
+      "Phenopacket ID: apple-sally-fetus-1-complete\n",
+      "Subject: fetus-1\n",
+      "Features: 9\n",
+      "\n",
+      "[STEP 8] Output & Validation...\n",
+      "Round-trip validation passed\n",
+      "Saved to: output/apple_sally_phenopacket_complete.json\n",
       "\n",
-      "  --- HPO Concept Recognition from Clinical Text ---\n",
-      "  Found 4 HPO terms in clinical narrative:\n",
-      "    ? HP:0001274: Agenesis of corpus callosum\n",
-      "    ? HP:0000256: Macrocephaly\n",
-      "    ? HP:0001305: Dandy-Walker malformation\n",
-      "    ? HP:0002119: Ventriculomegaly\n",
-      "\n",
-      "[STEP 5] Previewing anatomy findings...\n",
-      "  Normal (0): ...\n",
-      "  Abnormal (0): (none)\n",
-      "  Not visualized (0): ...\n",
-      "  (Note: Anatomy section parser not yet implemented in ETL)\n",
-      "\n",
-      "[STEP 6] Converting to PhenotypicFeatures...\n",
-      "\n",
-      "  --- From Biometry ---\n",
-      "  ? Added 4 features from biometry\n",
+      "================================================================================\n",
+      "PHENOPACKET GENERATION COMPLETE\n",
+      "================================================================================\n",
       "\n",
-      "  --- From Clinical Text ---\n",
-      "  ? Added 4 features from clinical text\n",
+      "[Clinical Context]\n",
+      "  Indication: N/A...\n",
+      "  GA at exam: N/A weeks\n",
+      "  EFW: 1014.8g (AGA)\n",
+      "  Proportionality: Normal\n",
       "\n",
-      "  Total PhenotypicFeatures: 8\n",
+      "[Phenotypic Features by Source]\n",
+      "  Biometry: 4\n",
+      "  Clinical Text: 4\n",
+      "  Anatomy: 1\n",
       "\n",
-      "[STEP 7] Building Phenopacket v2.0...\n",
-      "  ? Phenopacket assembled successfully\n",
-      "    ID: apple-sally-fetus-1\n",
-      "    Subject: fetus-1 at 26w6d\n",
-      "    Features: 8\n",
+      "[Feature Status]\n",
+      "  Observed (abnormal): 5\n",
+      "  Excluded (normal): 4\n",
       "\n",
       "================================================================================\n",
-      "PHENOPACKET v2.0 OUTPUT (JSON)\n",
+      "SUCCESS: Complete phenopacket at output/apple_sally_phenopacket_complete.json\n",
       "================================================================================\n",
+      "\n",
+      "[Phenopacket JSON Output]\n",
       "{\n",
-      "  \"id\": \"apple-sally-fetus-1\",\n",
+      "  \"id\": \"apple-sally-fetus-1-complete\",\n",
       "  \"subject\": {\n",
       "    \"id\": \"fetus-1\",\n",
       "    \"time_at_last_encounter\": {\n",
@@ -985,7 +228,7 @@
       "  },\n",
       "  \"phenotypic_features\": [\n",
       "    {\n",
-      "      \"description\": \"[Biometry] AC: 226.2 mm (55.6%) at 26w6d [Fetus 1]\",\n",
+      "      \"description\": \"Biometry: AC: 226.2 mm (55.6%) at 26w6d [Fetus 1]\",\n",
       "      \"type\": {\n",
       "        \"id\": \"HP:0034207\",\n",
       "        \"label\": \"Abnormal fetal gastrointestinal system morphology\"\n",
@@ -999,7 +242,7 @@
       "      }\n",
       "    },\n",
       "    {\n",
-      "      \"description\": \"[Biometry] BPD: 66.8 mm (51.2%) at 26w6d [Fetus 1]\",\n",
+      "      \"description\": \"Biometry: BPD: 66.8 mm (51.2%) at 26w6d [Fetus 1]\",\n",
       "      \"type\": {\n",
       "        \"id\": \"HP:0000240\",\n",
       "        \"label\": \"Abnormality of skull size\"\n",
@@ -1013,7 +256,7 @@
       "      }\n",
       "    },\n",
       "    {\n",
-      "      \"description\": \"[Biometry] HC: 250.0 mm (42.5%) at 26w6d [Fetus 1]\",\n",
+      "      \"description\": \"Biometry: HC: 250.0 mm (42.5%) at 26w6d [Fetus 1]\",\n",
       "      \"type\": {\n",
       "        \"id\": \"HP:0000240\",\n",
       "        \"label\": \"Abnormality of skull size\"\n",
@@ -1027,7 +270,7 @@
       "      }\n",
       "    },\n",
       "    {\n",
-      "      \"description\": \"[Biometry] Femur: 50.1 mm (46.8%) at 27w0d [Fetus 1]\",\n",
+      "      \"description\": \"Biometry: Femur: 50.1 mm (46.8%) at 27w0d [Fetus 1]\",\n",
       "      \"type\": {\n",
       "        \"id\": \"HP:0002823\",\n",
       "        \"label\": \"Abnormal femur morphology\"\n",
@@ -1040,10 +283,10 @@
       "      }\n",
       "    },\n",
       "    {\n",
-      "      \"description\": \"[Clinical Impression] Extracted from narrative text via HPO Concept Recognition\",\n",
+      "      \"description\": \"Clinical impression: Macrocephaly\",\n",
       "      \"type\": {\n",
-      "        \"id\": \"HP:0001274\",\n",
-      "        \"label\": \"Agenesis of corpus callosum\"\n",
+      "        \"id\": \"HP:0000256\",\n",
+      "        \"label\": \"Macrocephaly\"\n",
       "      },\n",
       "      \"onset\": {\n",
       "        \"gestational_age\": {\n",
@@ -1053,10 +296,10 @@
       "      }\n",
       "    },\n",
       "    {\n",
-      "      \"description\": \"[Clinical Impression] Extracted from narrative text via HPO Concept Recognition\",\n",
+      "      \"description\": \"Clinical impression: Agenesis of corpus callosum\",\n",
       "      \"type\": {\n",
-      "        \"id\": \"HP:0000256\",\n",
-      "        \"label\": \"Macrocephaly\"\n",
+      "        \"id\": \"HP:0001274\",\n",
+      "        \"label\": \"Agenesis of corpus callosum\"\n",
       "      },\n",
       "      \"onset\": {\n",
       "        \"gestational_age\": {\n",
@@ -1066,7 +309,7 @@
       "      }\n",
       "    },\n",
       "    {\n",
-      "      \"description\": \"[Clinical Impression] Extracted from narrative text via HPO Concept Recognition\",\n",
+      "      \"description\": \"Clinical impression: Dandy-Walker malformation\",\n",
       "      \"type\": {\n",
       "        \"id\": \"HP:0001305\",\n",
       "        \"label\": \"Dandy-Walker malformation\"\n",
@@ -1079,7 +322,7 @@
       "      }\n",
       "    },\n",
       "    {\n",
-      "      \"description\": \"[Clinical Impression] Extracted from narrative text via HPO Concept Recognition\",\n",
+      "      \"description\": \"Clinical impression: Ventriculomegaly\",\n",
       "      \"type\": {\n",
       "        \"id\": \"HP:0002119\",\n",
       "        \"label\": \"Ventriculomegaly\"\n",
@@ -1090,11 +333,24 @@
       "          \"days\": 6\n",
       "        }\n",
       "      }\n",
+      "    },\n",
+      "    {\n",
+      "      \"description\": \"Anatomy finding: Neural tube defect\",\n",
+      "      \"type\": {\n",
+      "        \"id\": \"HP:0045005\",\n",
+      "        \"label\": \"Neural tube defect\"\n",
+      "      },\n",
+      "      \"onset\": {\n",
+      "        \"gestational_age\": {\n",
+      "          \"weeks\": 26,\n",
+      "          \"days\": 6\n",
+      "        }\n",
+      "      }\n",
       "    }\n",
       "  ],\n",
       "  \"meta_data\": {\n",
-      "    \"created\": \"2026-01-26T15:21:11.051438Z\",\n",
-      "    \"created_by\": \"prenatalppkt-etl-pipeline\",\n",
+      "    \"created\": \"2026-02-05T19:49:31.564591Z\",\n",
+      "    \"created_by\": \"prenatalppkt-etl-pipeline-v2\",\n",
       "    \"resources\": [\n",
       "      {\n",
       "        \"id\": \"hp\",\n",
@@ -1107,85 +363,22 @@
       "    ],\n",
       "    \"phenopacket_schema_version\": \"2.0\"\n",
       "  }\n",
-      "}\n",
-      "\n",
-      "================================================================================\n",
-      "VALIDATION & SUMMARY\n",
-      "================================================================================\n",
-      "\n",
-      "[Validation] Round-trip test...\n",
-      "  ? Round-trip validation passed\n",
-      "\n",
-      "[Summary] Phenotypic Features:\n",
-      "  Total: 8\n",
-      "    From Biometry: 4\n",
-      "    From Clinical Text: 4\n",
-      "  Normal (excluded): 4\n",
-      "  Abnormal (observed): 4\n",
-      "\n",
-      "[Detail] All Phenotypic Features:\n",
-      "------------------------------------------------------------\n",
-      "\n",
-      "  [1] HP:0034207 - Abnormal fetal gastrointestinal system morphology\n",
-      "      Source: Biometry\n",
-      "      Status: EXCLUDED (normal)\n",
-      "      Onset: 26w6d\n",
-      "\n",
-      "  [2] HP:0000240 - Abnormality of skull size\n",
-      "      Source: Biometry\n",
-      "      Status: EXCLUDED (normal)\n",
-      "      Onset: 26w6d\n",
-      "\n",
-      "  [3] HP:0000240 - Abnormality of skull size\n",
-      "      Source: Biometry\n",
-      "      Status: EXCLUDED (normal)\n",
-      "      Onset: 26w6d\n",
-      "\n",
-      "  [4] HP:0002823 - Abnormal femur morphology\n",
-      "      Source: Biometry\n",
-      "      Status: EXCLUDED (normal)\n",
-      "      Onset: 27w0d\n",
-      "\n",
-      "  [5] HP:0001274 - Agenesis of corpus callosum\n",
-      "      Source: Clinical Text\n",
-      "      Status: OBSERVED (abnormal)\n",
-      "      Onset: 26w6d\n",
-      "\n",
-      "  [6] HP:0000256 - Macrocephaly\n",
-      "      Source: Clinical Text\n",
-      "      Status: OBSERVED (abnormal)\n",
-      "      Onset: 26w6d\n",
-      "\n",
-      "  [7] HP:0001305 - Dandy-Walker malformation\n",
-      "      Source: Clinical Text\n",
-      "      Status: OBSERVED (abnormal)\n",
-      "      Onset: 26w6d\n",
-      "\n",
-      "  [8] HP:0002119 - Ventriculomegaly\n",
-      "      Source: Clinical Text\n",
-      "      Status: OBSERVED (abnormal)\n",
-      "      Onset: 26w6d\n",
-      "\n",
-      "================================================================================\n",
-      "SUCCESS: Phenopacket saved to output/apple_sally_phenopacket_expanded.json\n",
-      "================================================================================\n"
+      "}\n"
      ]
     }
    ],
    "source": [
     "\"\"\"\n",
-    "PRENATALPPKT EXPANDED ETL PIPELINE\n",
-    "Observer JSON -> Biometry + Clinical Sections -> Phenopacket v2.0\n",
     "\n",
-    "Demonstrates the complete ETL pipeline:\n",
+    "Steps:\n",
     "1. Biometry extraction -> List[TermBin] -> quantitative HPO terms\n",
     "2. Clinical indication -> reason for exam\n",
     "3. Pregnancy dating -> LMP, EDD, gestational age context\n",
     "4. Clinical impression -> qualitative HPO terms from free text\n",
-    "5. Phenopacket assembly -> GA4GH Phenopacket v2.0 JSON\n",
-    "\n",
-    "Uses the official GA4GH phenopackets library per:\n",
-    "https://phenopacket-schema.readthedocs.io/en/latest/python.html\n",
+    "5. Fetal anatomy -> structured findings + HPO terms from anomalies\n",
+    "6. Estimated fetal weight -> SGA/AGA/LGA classification\n",
+    "7. Fetal ratios -> proportionality assessment\n",
+    "8. Phenopacket assembly -> GA4GH Phenopacket v2.0 JSON\n",
     "\"\"\"\n",
     "\n",
     "import gzip\n",
@@ -1206,6 +399,9 @@
     "   parse_clinical_indication,\n",
     "   parse_pregnancy_dating,\n",
     "   parse_clinical_impression,\n",
+    "   parse_fetal_anatomy,\n",
+    "   parse_estimated_fetal_weight,\n",
+    "   parse_fetal_ratios,\n",
     ")\n",
     "\n",
     "# HPO Concept Recognition\n",
@@ -1215,19 +411,18 @@
     "from prenatalppkt.gestational_age import GestationalAge\n",
     "\n",
     "print(\"=\" * 80)\n",
-    "print(\"PRENATALPPKT EXPANDED ETL PIPELINE\")\n",
-    "print(\"Observer JSON -> Biometry + Clinical Sections -> Phenopacket v2.0\")\n",
+    "print(\"PRENATALPPKT ETL PIPELINE\")\n",
+    "print(\"Observer JSON -> Section Parsing -> Phenopacket v2.0\")\n",
     "print(\"=\" * 80)\n",
     "\n",
     "# =============================================================================\n",
     "# STEP 1: Load HPO Concept Recognizer\n",
     "# =============================================================================\n",
-    "print(\"\\n[STEP 1] Loading HPO Concept Recognizer...\")\n",
+    "print(\"\\n[STEP 1] Loading the HPO Concept Recognizer...\")\n",
     "\n",
     "HP_JSON_GZ = Path(\"tests/data/hp.json.gz\")\n",
     "TMP_HP_JSON = Path(\"/tmp/hp.json\")\n",
     "\n",
-    "# Decompress hp.json.gz to temp location\n",
     "with gzip.open(HP_JSON_GZ, \"rt\", encoding=\"utf-8\") as f_in:\n",
     "   with open(TMP_HP_JSON, \"w\", encoding=\"utf-8\") as f_out:\n",
     "       f_out.write(f_in.read())\n",
@@ -1235,202 +430,210 @@
     "hpo_parser = HpoParser(hpo_json_file=str(TMP_HP_JSON))\n",
     "hpo_cr = hpo_parser.get_hpo_concept_recognizer()\n",
     "\n",
-    "print(f\"  ? HPO version: {hpo_parser.get_version()}\")\n",
-    "print(f\"  ? Concept recognizer: {type(hpo_cr).__name__}\")\n",
+    "print(f\"HPO version: {hpo_parser.get_version()}\")\n",
     "\n",
     "# =============================================================================\n",
-    "# STEP 2: Load Observer JSON Data\n",
+    "# STEP 2: Load Observer JSON\n",
     "# =============================================================================\n",
     "print(\"\\n[STEP 2] Loading Observer JSON...\")\n",
     "\n",
-    "DATA_PATH = Path(\"tests/data/Apple_Sally_pretty.json\")\n",
-    "\n",
-    "with open(DATA_PATH) as f:\n",
+    "data_path = Path(\"tests/data/Apple_Sally_pretty.json\")\n",
+    "with open(data_path) as f:\n",
     "   observer_data = json.load(f)\n",
     "\n",
-    "# Keep raw JSON string for section parsers\n",
-    "with open(DATA_PATH) as f:\n",
-    "   observer_json_str = f.read()\n",
-    "\n",
-    "print(f\"  ? Loaded: {DATA_PATH}\")\n",
-    "print(f\"  ? Fetuses: {len(observer_data.get('fetuses', []))}\")\n",
-    "\n",
-    "first_fetus = observer_data[\"fetuses\"][0]\n",
-    "measurements = first_fetus.get(\"measurements\", [])\n",
-    "print(f\"  ? Measurements: {len(measurements)}\")\n",
-    "print(f\"  ? Sample: {measurements[0]['label']} = {measurements[0]['value']} {measurements[0]['unit_of_measure']}\")\n",
+    "print(f\"Loaded: {data_path.name}\")\n",
+    "print(f\"Fetuses: {len(observer_data.get('fetuses', []))}\")\n",
     "\n",
     "# =============================================================================\n",
     "# STEP 3: Extract Biometry -> TermBins\n",
     "# =============================================================================\n",
-    "print(\"\\n[STEP 3] Extracting biometry measurements to TermBins...\")\n",
+    "print(\"\\n[STEP 3] Extracting biometry measurements...\")\n",
     "\n",
     "term_bins = observer.extract(observer_data)\n",
+    "print(f\"Extracted {len(term_bins)} TermBins\")\n",
+    "\n",
+    "# Helper function to parse GA from TermBin description\n",
+    "def parse_ga_from_description(description: str) -> tuple[int, int]:\n",
+    "   \"\"\"Extract weeks and days from TermBin description like 'HC: 250.0 mm (42.5%) at 26w6d'\"\"\"\n",
+    "   match = re.search(r\"at (\\d+)w(\\d+)d\", description)\n",
+    "   if match:\n",
+    "       return int(match.group(1)), int(match.group(2))\n",
+    "   return 27, 0  # fallback values for XwYd\n",
     "\n",
-    "print(f\"  ? Extracted {len(term_bins)} TermBins:\")\n",
-    "for i, tb in enumerate(term_bins, 1):\n",
-    "   status = \"? Normal\" if tb.normal else \"? Abnormal\"\n",
-    "   print(f\"    [{i}] {tb.hpo_id} ({tb.hpo_label}) - {status}\")\n",
-    "   print(f\"        {tb.description}\")\n",
+    "# Display TermBins - note: TermBin has description, hpo_id, hpo_label, normal, range\n",
+    "# NOT label, value_mm, percentile directly\n",
+    "for tb in term_bins:\n",
+    "   status = \"Normal\" if tb.normal else \"ABNORMAL\"\n",
+    "   print(f\"    - {tb.description} [{status}]\")\n",
+    "   print(f\"      HPO: {tb.hpo_id} - {tb.hpo_label}\")\n",
     "\n",
     "# =============================================================================\n",
     "# STEP 4: Parse Clinical Sections\n",
     "# =============================================================================\n",
     "print(\"\\n[STEP 4] Parsing clinical sections...\")\n",
     "\n",
-    "SOURCE_FORMAT = \"observer_json\"\n",
-    "\n",
-    "# 4a: Clinical Indication\n",
-    "print(\"\\n  --- Clinical Indication ---\")\n",
-    "indication = parse_clinical_indication(observer_json_str, SOURCE_FORMAT)\n",
-    "indication_text = indication.get(\"indication_text\", \"\")\n",
-    "if indication_text:\n",
-    "   print(f\"  Indication: {indication_text[:100]}{'...' if len(indication_text) > 100 else ''}\")\n",
-    "else:\n",
-    "   print(\"  Indication: (not found)\")\n",
-    "\n",
-    "# 4b: Pregnancy Dating\n",
-    "print(\"\\n  --- Pregnancy Dating ---\")\n",
-    "dating = parse_pregnancy_dating(observer_json_str, SOURCE_FORMAT)\n",
-    "print(f\"  LMP: {dating.get('lmp', '(not found)')}\")\n",
-    "print(f\"  EDD: {dating.get('edd', '(not found)')}\")\n",
-    "print(f\"  Dating Method: {dating.get('dating_method', '(not found)')}\")\n",
-    "print(f\"  GA by Ultrasound: {dating.get('ga_by_ultrasound', '(not found)')}\")\n",
-    "\n",
-    "# 4c: Clinical Impression\n",
-    "print(\"\\n  --- Clinical Impression ---\")\n",
-    "impression = parse_clinical_impression(observer_json_str, SOURCE_FORMAT)\n",
-    "impression_text = impression.get(\"impression_text\", \"\")\n",
-    "\n",
-    "if impression_text:\n",
-    "   # Clean up for display\n",
-    "   preview = impression_text[:200].replace('\\r', ' ').replace('\\n', ' ')\n",
-    "   print(f\"  Impression ({len(impression_text)} chars): \\\"{preview}...\\\"\")\n",
-    "else:\n",
-    "   print(\"  Impression: (not found)\")\n",
-    "\n",
-    "print(f\"  Growth Assessment: {impression.get('growth_assessment', '(not detected)')}\")\n",
-    "\n",
-    "# 4d: Extract HPO terms from clinical narrative\n",
-    "print(\"\\n  --- HPO Concept Recognition from Clinical Text ---\")\n",
-    "if impression_text:\n",
-    "   hpo_terms_from_text = hpo_cr.parse(impression_text)\n",
-    "   print(f\"  Found {len(hpo_terms_from_text)} HPO terms in clinical narrative:\")\n",
-    "   for term in hpo_terms_from_text:\n",
-    "       print(f\"    ? {term.hpo_id}: {term.hpo_label}\")\n",
-    "else:\n",
-    "   hpo_terms_from_text = []\n",
-    "   print(\"  (no impression text to parse)\")\n",
-    "\n",
-    "if not hpo_terms_from_text:\n",
-    "   print(\"  (no HPO terms matched)\")\n",
+    "# 4a. Clinical Indication\n",
+    "indication = parse_clinical_indication(observer_data, \"observer_json\")\n",
+    "indication_text = indication.get('indication_text', 'N/A') or 'N/A'\n",
+    "print(f\"\\n  [4a] Clinical Indication:\")\n",
+    "print(f\"       Reason: {indication_text[:60]}...\")\n",
+    "\n",
+    "# 4b. Pregnancy Dating\n",
+    "dating = parse_pregnancy_dating(observer_data, \"observer_json\")\n",
+    "print(f\"\\n  [4b] Pregnancy Dating:\")\n",
+    "print(f\"       LMP: {dating.get('lmp', 'N/A')}\")\n",
+    "print(f\"       EDD: {dating.get('edd', 'N/A')}\")\n",
+    "print(f\"       GA at exam: {dating.get('ga_weeks', 'N/A')} weeks\")\n",
+    "\n",
+    "# 4c. Clinical Impression (with HPO extraction)\n",
+    "impression = parse_clinical_impression(observer_data, \"observer_json\", hpo_cr=hpo_cr)\n",
+    "impression_text = impression.get('impression_text', 'N/A') or 'N/A'\n",
+    "print(f\"\\n  [4c] Clinical Impression:\")\n",
+    "print(f\"       Text: {impression_text[:60]}...\")\n",
+    "print(f\"       HPO terms found: {len(impression.get('hpo_terms', []))}\")\n",
     "\n",
     "# =============================================================================\n",
-    "# STEP 5: Preview Anatomy Findings (Structured Data)\n",
+    "# STEP 5: Parse Fetal-Specific Sections (NEW)\n",
     "# =============================================================================\n",
-    "print(\"\\n[STEP 5] Previewing anatomy findings...\")\n",
-    "\n",
-    "fetus_data = observer_data[\"fetuses\"][0].get(\"fetus\", {})\n",
-    "anatomy_list = fetus_data.get(\"anatomy\", [])\n",
-    "\n",
-    "normal_structures = []\n",
-    "abnormal_structures = []\n",
-    "unseen_structures = []\n",
-    "anomalies_found = []\n",
-    "\n",
-    "for item in anatomy_list:\n",
-    "   main = item.get(\"main\", {})\n",
-    "   label = main.get(\"label\", \"Unknown\")\n",
-    "   state = main.get(\"anat_state\", \"\")\n",
-    "   \n",
-    "   if state == \"Normal\":\n",
-    "       normal_structures.append(label)\n",
-    "   elif state == \"Abnormal\":\n",
-    "       abnormal_structures.append(label)\n",
-    "       # Check for specific anomalies\n",
-    "       anomalies = item.get(\"anomalies\", [])\n",
-    "       if anomalies:\n",
-    "           for anom in anomalies:\n",
-    "               desc = anom.get(\"description\", \"?\")\n",
-    "               anomalies_found.append(f\"{label}: {desc}\")\n",
-    "   elif state == \"Unseen\":\n",
-    "       unseen_structures.append(label)\n",
-    "\n",
-    "print(f\"  Normal ({len(normal_structures)}): {', '.join(normal_structures[:5])}...\")\n",
-    "print(f\"  Abnormal ({len(abnormal_structures)}): {', '.join(abnormal_structures) if abnormal_structures else '(none)'}\")\n",
-    "print(f\"  Not visualized ({len(unseen_structures)}): {', '.join(unseen_structures[:3])}...\")\n",
-    "\n",
-    "if anomalies_found:\n",
-    "   print(f\"  ? Anomalies detected:\")\n",
-    "   for anom in anomalies_found:\n",
-    "       print(f\"    - {anom}\")\n",
-    "\n",
-    "print(\"  (Note: Anatomy section parser not yet implemented in ETL)\")\n",
+    "print(\"\\n[STEP 5] Parsing fetal-specific sections...\")\n",
+    "\n",
+    "# 5a. Fetal Anatomy (with HPO extraction from anomalies)\n",
+    "anatomy = parse_fetal_anatomy(observer_data, \"observer_json\", hpo_cr=hpo_cr)\n",
+    "print(f\"\\n  [5a] Fetal Anatomy:\")\n",
+    "print(f\"       Normal structures: {len(anatomy.get('normal_structures', []))}\")\n",
+    "print(f\"       Abnormal structures: {len(anatomy.get('abnormal_structures', []))}\")\n",
+    "print(f\"       Not visualized: {len(anatomy.get('not_visualized', []))}\")\n",
+    "print(f\"       Anomalies detected: {len(anatomy.get('anomalies', []))}\")\n",
+    "print(f\"       HPO terms extracted: {len(anatomy.get('hpo_terms', []))}\")\n",
+    "\n",
+    "for anomaly in anatomy.get(\"anomalies\", [])[:3]:\n",
+    "   print(f\"         o {anomaly.get('description', 'N/A')} ({anomaly.get('variant_type', 'N/A')})\")\n",
+    "\n",
+    "# 5b. Estimated Fetal Weight\n",
+    "efw = parse_estimated_fetal_weight(observer_data, \"observer_json\")\n",
+    "print(f\"\\n  [5b] Estimated Fetal Weight:\")\n",
+    "print(f\"       EFW: {efw.get('efw_grams', 'N/A')} grams\")\n",
+    "print(f\"       Percentile: {efw.get('percentile', 'N/A')}%\")\n",
+    "print(f\"       Method: {efw.get('method', 'N/A')}\")\n",
+    "print(f\"       Growth category: {efw.get('growth_category', 'N/A')}\")\n",
+    "print(f\"       Within normal range: {efw.get('within_normal_range', 'N/A')}\")\n",
+    "\n",
+    "# 5c. Fetal Ratios\n",
+    "ratios = parse_fetal_ratios(observer_data, \"observer_json\")\n",
+    "print(f\"\\n  [5c] Fetal Ratios:\")\n",
+    "print(f\"       Ratios calculated: {len(ratios.get('ratios', []))}\")\n",
+    "print(f\"       All within range: {ratios.get('all_within_range', 'N/A')}\")\n",
+    "print(f\"       Proportionality: {ratios.get('proportionality_assessment', 'N/A')}\")\n",
+    "\n",
+    "for ratio in ratios.get(\"ratios\", [])[:3]:\n",
+    "   name = ratio.get(\"name\", \"N/A\")\n",
+    "   value = ratio.get(\"value\", \"N/A\")\n",
+    "   in_range = \"[OK]\" if ratio.get(\"within_range\") else \"[!]\"\n",
+    "   print(f\"         {in_range} {name}: {value}\")\n",
     "\n",
     "# =============================================================================\n",
-    "# STEP 6: Convert to PhenotypicFeatures\n",
+    "# STEP 6: Build PhenotypicFeatures from ALL sources\n",
     "# =============================================================================\n",
-    "print(\"\\n[STEP 6] Converting to PhenotypicFeatures...\")\n",
-    "\n",
-    "\n",
-    "def parse_ga_from_description(description: str, fallback_weeks: float = 26.9) -> tuple[int, int]:\n",
-    "   \"\"\"Extract weeks and days from TermBin description.\"\"\"\n",
-    "   match = re.search(r\"at (\\d+)w(\\d+)d\", description)\n",
-    "   if match:\n",
-    "       return int(match.group(1)), int(match.group(2))\n",
-    "   ga = GestationalAge.from_weeks(fallback_weeks)\n",
-    "   return ga.weeks, ga.days\n",
-    "\n",
-    "\n",
-    "# Get subject GA for features without specific timing\n",
-    "first_measurement = observer_data[\"fetuses\"][0][\"measurements\"][0]\n",
-    "subject_ga_weeks = first_measurement.get(\"calculated_ega\", 26.9)\n",
-    "subject_ga = GestationalAge.from_weeks(subject_ga_weeks)\n",
+    "print(\"\\n[STEP 6] Building PhenotypicFeatures...\")\n",
     "\n",
     "phenotypic_features = []\n",
     "\n",
-    "# 6a: Convert biometry TermBins -> PhenotypicFeatures\n",
-    "print(\"\\n  --- From Biometry ---\")\n",
+    "# Get subject GA from dating or fallback to first measurement\n",
+    "ga_weeks = dating.get(\"ga_weeks\")\n",
+    "if ga_weeks:\n",
+    "   subject_ga = GestationalAge.from_weeks(float(ga_weeks))\n",
+    "else:\n",
+    "   # Fallback: parse from first TermBin description\n",
+    "   if term_bins:\n",
+    "       weeks, days = parse_ga_from_description(term_bins[0].description)\n",
+    "       subject_ga = GestationalAge(weeks=weeks, days=days)\n",
+    "   else:\n",
+    "       subject_ga = GestationalAge(weeks=27, days=0)\n",
+    "\n",
+    "# 6a. From biometry TermBins\n",
     "for tb in term_bins:\n",
-    "   weeks, days = parse_ga_from_description(tb.description, subject_ga_weeks)\n",
-    "   \n",
+    "   weeks, days = parse_ga_from_description(tb.description)\n",
+    "   onset = pps2.TimeElement(\n",
+    "       gestational_age=pps2.GestationalAge(weeks=weeks, days=days)\n",
+    "   )\n",
     "   pf = pps2.PhenotypicFeature(\n",
     "       type=pps2.OntologyClass(id=tb.hpo_id, label=tb.hpo_label),\n",
     "       excluded=tb.normal,  # normal=True means abnormality is EXCLUDED\n",
+    "       description=f\"Biometry: {tb.description}\",\n",
+    "       onset=onset,\n",
+    "   )\n",
+    "   phenotypic_features.append((\"Biometry\", pf))\n",
+    "\n",
+    "# 6b. From clinical impression HPO terms (SimpleTerm objects with hpo_id, hpo_label)\n",
+    "for term in impression.get(\"hpo_terms\", []):\n",
+    "   pf = pps2.PhenotypicFeature(\n",
+    "       type=pps2.OntologyClass(id=term.hpo_id, label=term.hpo_label),\n",
+    "       excluded=False,\n",
+    "       description=f\"Clinical impression: {term.hpo_label}\",\n",
     "       onset=pps2.TimeElement(\n",
-    "           gestational_age=pps2.GestationalAge(weeks=weeks, days=days)\n",
+    "           gestational_age=pps2.GestationalAge(weeks=subject_ga.weeks, days=subject_ga.days)\n",
     "       ),\n",
-    "       description=f\"[Biometry] {tb.description}\",\n",
     "   )\n",
-    "   phenotypic_features.append(pf)\n",
-    "\n",
-    "print(f\"  ? Added {len(term_bins)} features from biometry\")\n",
+    "   phenotypic_features.append((\"Clinical Text\", pf))\n",
     "\n",
-    "# 6b: Convert clinical text HPO terms -> PhenotypicFeatures\n",
-    "print(\"\\n  --- From Clinical Text ---\")\n",
-    "text_feature_count = 0\n",
-    "for term in hpo_terms_from_text:\n",
-    "   # Findings mentioned in clinical impression are OBSERVED (not excluded)\n",
+    "# 6c. From fetal anatomy HPO terms (SimpleTerm objects with hpo_id, hpo_label) (NEW)\n",
+    "for term in anatomy.get(\"hpo_terms\", []):\n",
     "   pf = pps2.PhenotypicFeature(\n",
     "       type=pps2.OntologyClass(id=term.hpo_id, label=term.hpo_label),\n",
-    "       excluded=False,  # These are observed findings\n",
+    "       excluded=False,\n",
+    "       description=f\"Anatomy finding: {term.hpo_label}\",\n",
+    "       onset=pps2.TimeElement(\n",
+    "           gestational_age=pps2.GestationalAge(weeks=subject_ga.weeks, days=subject_ga.days)\n",
+    "       ),\n",
+    "   )\n",
+    "   phenotypic_features.append((\"Anatomy\", pf))\n",
+    "\n",
+    "# 6d. Growth category as phenotypic feature (NEW)\n",
+    "growth_hpo_map = {\n",
+    "   \"SGA\": (\"HP:0001518\", \"Small for gestational age\"),\n",
+    "   \"LGA\": (\"HP:0001520\", \"Large for gestational age\"),\n",
+    "}\n",
+    "growth_cat = efw.get(\"growth_category\")\n",
+    "if growth_cat in growth_hpo_map:\n",
+    "   hpo_id, hpo_label = growth_hpo_map[growth_cat]\n",
+    "   pf = pps2.PhenotypicFeature(\n",
+    "       type=pps2.OntologyClass(id=hpo_id, label=hpo_label),\n",
+    "       excluded=False,\n",
+    "       description=f\"EFW {efw.get('efw_grams')}g at {efw.get('percentile')}th percentile\",\n",
+    "       onset=pps2.TimeElement(\n",
+    "           gestational_age=pps2.GestationalAge(weeks=subject_ga.weeks, days=subject_ga.days)\n",
+    "       ),\n",
+    "   )\n",
+    "   phenotypic_features.append((\"Growth\", pf))\n",
+    "# AGA is normal - we could add as excluded feature or skip\n",
+    "elif growth_cat == \"AGA\":\n",
+    "   print(\"Growth category AGA (normal) - no HPO term needed\")\n",
+    "\n",
+    "# 6e. Proportionality assessment as phenotypic feature (NEW)\n",
+    "if ratios.get(\"proportionality_assessment\") == \"Asymmetric\":\n",
+    "   pf = pps2.PhenotypicFeature(\n",
+    "       type=pps2.OntologyClass(id=\"HP:0001511\", label=\"Intrauterine growth retardation\"),\n",
+    "       excluded=False,\n",
+    "       description=\"Asymmetric growth pattern detected from biometric ratios\",\n",
     "       onset=pps2.TimeElement(\n",
     "           gestational_age=pps2.GestationalAge(weeks=subject_ga.weeks, days=subject_ga.days)\n",
     "       ),\n",
-    "       description=f\"[Clinical Impression] Extracted from narrative text via HPO Concept Recognition\",\n",
     "   )\n",
-    "   phenotypic_features.append(pf)\n",
-    "   text_feature_count += 1\n",
+    "   phenotypic_features.append((\"Ratios\", pf))\n",
     "\n",
-    "print(f\"  ? Added {text_feature_count} features from clinical text\")\n",
-    "print(f\"\\n  Total PhenotypicFeatures: {len(phenotypic_features)}\")\n",
+    "print(f\"\\n  Summary by source:\")\n",
+    "sources = {}\n",
+    "for source, pf in phenotypic_features:\n",
+    "   sources[source] = sources.get(source, 0) + 1\n",
+    "for source, count in sources.items():\n",
+    "   print(f\"    - {source}: {count} features\")\n",
+    "print(f\"  Total: {len(phenotypic_features)} PhenotypicFeatures\")\n",
     "\n",
     "# =============================================================================\n",
-    "# STEP 7: Build Complete Phenopacket v2.0\n",
+    "# STEP 7: Assemble Phenopacket v2.0\n",
     "# =============================================================================\n",
-    "print(\"\\n[STEP 7] Building Phenopacket v2.0...\")\n",
+    "print(\"\\n[STEP 7] Assembling Phenopacket v2.0...\")\n",
     "\n",
-    "# Subject (fetus)\n",
     "subject = pps2.Individual(\n",
     "   id=\"fetus-1\",\n",
     "   sex=pps2.Sex.UNKNOWN_SEX,\n",
@@ -1439,7 +642,6 @@
     "   ),\n",
     ")\n",
     "\n",
-    "# Metadata\n",
     "now = datetime.now(timezone.utc)\n",
     "created_timestamp = Timestamp()\n",
     "created_timestamp.FromDatetime(now)\n",
@@ -1448,89 +650,81 @@
     "   id=\"hp\",\n",
     "   name=\"Human Phenotype Ontology\",\n",
     "   url=\"http://purl.obolibrary.org/obo/hp.owl\",\n",
-    "   version=hpo_parser.get_version() or \"2025-01-01\",\n",
+    "   version=hpo_parser.get_version() or \"2025-01-01\", # TODO (@VarenyaJ): Change version date if update the compressed hp.json\n",
     "   namespace_prefix=\"HP\",\n",
     "   iri_prefix=\"http://purl.obolibrary.org/obo/HP_\",\n",
     ")\n",
     "\n",
     "metadata = pps2.MetaData(\n",
     "   created=created_timestamp,\n",
-    "   created_by=\"prenatalppkt-etl-pipeline\",\n",
+    "   created_by=\"prenatalppkt-etl-pipeline-v2\",\n",
     "   phenopacket_schema_version=\"2.0\",\n",
     ")\n",
     "metadata.resources.append(hpo_resource)\n",
     "\n",
-    "# Assemble the Phenopacket\n",
     "phenopacket = pps2.Phenopacket(\n",
-    "   id=\"apple-sally-fetus-1\",\n",
+    "   id=\"apple-sally-fetus-1-complete\",\n",
     "   subject=subject,\n",
     "   meta_data=metadata,\n",
     ")\n",
-    "phenopacket.phenotypic_features.extend(phenotypic_features)\n",
+    "phenopacket.phenotypic_features.extend([pf for _, pf in phenotypic_features])\n",
     "\n",
-    "print(\"  ? Phenopacket assembled successfully\")\n",
-    "print(f\"    ID: {phenopacket.id}\")\n",
-    "print(f\"    Subject: {phenopacket.subject.id} at {subject_ga.weeks}w{subject_ga.days}d\")\n",
-    "print(f\"    Features: {len(phenopacket.phenotypic_features)}\")\n",
+    "print(f\"Phenopacket ID: {phenopacket.id}\")\n",
+    "print(f\"Subject: {phenopacket.subject.id}\")\n",
+    "print(f\"Features: {len(phenopacket.phenotypic_features)}\")\n",
     "\n",
     "# =============================================================================\n",
-    "# STEP 8: Output JSON\n",
+    "# STEP 8: Output & Validation\n",
     "# =============================================================================\n",
-    "print(\"\\n\" + \"=\" * 80)\n",
-    "print(\"PHENOPACKET v2.0 OUTPUT (JSON)\")\n",
-    "print(\"=\" * 80)\n",
+    "print(\"\\n[STEP 8] Output & Validation...\")\n",
     "\n",
     "phenopacket_json = MessageToJson(phenopacket, preserving_proto_field_name=True)\n",
-    "print(phenopacket_json)\n",
-    "\n",
-    "# =============================================================================\n",
-    "# STEP 9: Validation & Summary\n",
-    "# =============================================================================\n",
-    "print(\"\\n\" + \"=\" * 80)\n",
-    "print(\"VALIDATION & SUMMARY\")\n",
-    "print(\"=\" * 80)\n",
     "\n",
     "# Round-trip validation\n",
-    "print(\"\\n[Validation] Round-trip test...\")\n",
     "parsed_back = Parse(phenopacket_json, pps2.Phenopacket())\n",
     "assert parsed_back.id == phenopacket.id\n",
     "assert len(parsed_back.phenotypic_features) == len(phenopacket.phenotypic_features)\n",
-    "print(\"  ? Round-trip validation passed\")\n",
-    "\n",
-    "# Feature breakdown\n",
-    "biometry_features = [pf for pf in phenopacket.phenotypic_features if \"[Biometry]\" in pf.description]\n",
-    "clinical_features = [pf for pf in phenopacket.phenotypic_features if \"[Clinical\" in pf.description]\n",
-    "excluded_count = sum(1 for pf in phenopacket.phenotypic_features if pf.excluded)\n",
-    "observed_count = len(phenopacket.phenotypic_features) - excluded_count\n",
-    "\n",
-    "print(\"\\n[Summary] Phenotypic Features:\")\n",
-    "print(f\"  Total: {len(phenopacket.phenotypic_features)}\")\n",
-    "print(f\"    From Biometry: {len(biometry_features)}\")\n",
-    "print(f\"    From Clinical Text: {len(clinical_features)}\")\n",
-    "print(f\"  Normal (excluded): {excluded_count}\")\n",
-    "print(f\"  Abnormal (observed): {observed_count}\")\n",
-    "\n",
-    "# Detailed feature list\n",
-    "print(\"\\n[Detail] All Phenotypic Features:\")\n",
-    "print(\"-\" * 60)\n",
-    "for i, pf in enumerate(phenopacket.phenotypic_features, 1):\n",
-    "   status = \"EXCLUDED (normal)\" if pf.excluded else \"OBSERVED (abnormal)\"\n",
-    "   ga = pf.onset.gestational_age\n",
-    "   source = \"Biometry\" if \"[Biometry]\" in pf.description else \"Clinical Text\"\n",
-    "   print(f\"\\n  [{i}] {pf.type.id} - {pf.type.label}\")\n",
-    "   print(f\"      Source: {source}\")\n",
-    "   print(f\"      Status: {status}\")\n",
-    "   print(f\"      Onset: {ga.weeks}w{ga.days}d\")\n",
+    "print(\"Round-trip validation passed\")\n",
     "\n",
     "# Save to file\n",
-    "output_path = Path(\"output/apple_sally_phenopacket_expanded.json\")\n",
+    "output_path = Path(\"output/apple_sally_phenopacket_complete.json\")\n",
     "output_path.parent.mkdir(exist_ok=True)\n",
     "with open(output_path, \"w\") as f:\n",
     "   f.write(phenopacket_json)\n",
+    "print(f\"Saved to: {output_path}\")\n",
+    "\n",
+    "# =============================================================================\n",
+    "# STEP 9: Summary Report\n",
+    "# =============================================================================\n",
+    "print(\"\\n\" + \"=\" * 80)\n",
+    "print(\"PHENOPACKET GENERATION COMPLETE\")\n",
+    "print(\"=\" * 80)\n",
+    "\n",
+    "print(\"\\n[Clinical Context]\")\n",
+    "print(f\"  Indication: {indication_text[:50]}...\")\n",
+    "print(f\"  GA at exam: {dating.get('ga_weeks', 'N/A')} weeks\")\n",
+    "print(f\"  EFW: {efw.get('efw_grams', 'N/A')}g ({efw.get('growth_category', 'N/A')})\")\n",
+    "print(f\"  Proportionality: {ratios.get('proportionality_assessment', 'N/A')}\")\n",
+    "\n",
+    "print(\"\\n[Phenotypic Features by Source]\")\n",
+    "for source, count in sources.items():\n",
+    "   print(f\"  {source}: {count}\")\n",
+    "\n",
+    "observed = sum(1 for _, pf in phenotypic_features if not pf.excluded)\n",
+    "excluded = sum(1 for _, pf in phenotypic_features if pf.excluded)\n",
+    "print(f\"\\n[Feature Status]\")\n",
+    "print(f\"  Observed (abnormal): {observed}\")\n",
+    "print(f\"  Excluded (normal): {excluded}\")\n",
     "\n",
     "print(\"\\n\" + \"=\" * 80)\n",
-    "print(f\"SUCCESS: Phenopacket saved to {output_path}\")\n",
-    "print(\"=\" * 80)"
+    "print(f\"SUCCESS: Complete phenopacket at {output_path}\")\n",
+    "print(\"=\" * 80)\n",
+    "\n",
+    "# =============================================================================\n",
+    "# STEP 10: Display JSON Output\n",
+    "# =============================================================================\n",
+    "print(\"\\n[Phenopacket JSON Output]\")\n",
+    "print(phenopacket_json)"
    ]
   }
  ],