P2GX · VarenyaJ · Mar 14, 2026 · Jan 21, 2026 · Jan 22, 2026 · Jan 22, 2026
diff --git a/.github/actions/python_from_pyproject/action.yaml b/.github/actions/python_from_pyproject/action.yaml
@@ -1,22 +1,22 @@
 name: Install python from pyproject.toml
-description: 'Installs Python from the version found in the pyproject.toml'
+description: Installs Python from the version found in pyproject.toml
 
 inputs:
-  pyproject-file-path:
-      required: False
-      description: "Path to the pyproject.toml including filename"
-      default: "./pyproject.toml"
+ pyproject-file-path:
+   required: false
+   description: Path to the pyproject.toml including filename
+   default: ./pyproject.toml
 
 runs:
-    using: composite
-    steps:
-        - name: Get project version with yq
-          id: get_python_version
-          uses: mikefarah/yq@v4.46.1
-          with:
-              cmd: yq '.project.requires-python' ${{ inputs.pyproject-file-path }}
+ using: composite
+ steps:
+   - name: Read requires-python from pyproject.toml
+     id: get_python_version
+     shell: bash
+     run: |
+       python -c "import tomllib, pathlib; p = pathlib.Path('${{ inputs.pyproject-file-path }}'); req = tomllib.loads(p.read_text())['project']['requires-python']; print(f'result={req}')" >> $GITHUB_OUTPUT
 
-        -   name: Set up Python
-            uses: actions/setup-python@v5.6.0
-            with:
-                python-version: ${{ steps.get_python_version.outputs.result }}
+   - name: Set up Python
+     uses: actions/setup-python@v5.6.0
+     with:
+       python-version: ${{ steps.get_python_version.outputs.result }}
diff --git a/prenatalppkt.ipynb b/prenatalppkt.ipynb
diff --git a/src/prenatalppkt/etl/sections/__init__.py b/src/prenatalppkt/etl/sections/__init__.py
@@ -2,25 +2,32 @@
 Section parsers for non-biometry clinical data.
 
 These parsers extract additional clinical information from ultrasound reports
-beyond fetal biometry measurements. They are designed to eventually integrate
-with HPO Clinical Record (CR) modules for comprehensive phenotype capture.
+beyond fetal biometry measurements. They return Dict objects with parsed data.
 
-Current Status: SKELETON IMPLEMENTATIONS
-- Basic parsing structure in place
-- Returns placeholder data
-- TODO comments describe future implementation
+Implemented parsers:
+- parse_clinical_indication: Extract reason for exam
+- parse_pregnancy_dating: Extract LMP, EDD, gestational age
+- parse_clinical_impression: Extract clinical narrative and HPO terms
+- parse_fetal_anatomy: Extract anatomy findings and HPO terms
+- parse_estimated_fetal_weight: Extract EFW and growth classification
+- parse_fetal_ratios: Extract biometric ratios and proportionality
 
-Future Integration:
-- Map findings to HPO terms using src/prenatalppkt/hpo modules
-- Support symmetric processing across Observer JSON, ViewPoint Text, and HL7
-- Enable full phenotype packet generation
+Skeleton parsers (TODO):
+- parse_maternal_history: OB history, complications
+- parse_placenta: Placental assessment
+- parse_amniotic_fluid: AFI, MVP measurements
+- parse_umbilical_cord: Vessel count, insertion site
 """
 
 from prenatalppkt.etl.sections.maternal_history import parse_maternal_history
 from prenatalppkt.etl.sections.clinical_impression import parse_clinical_impression
 from prenatalppkt.etl.sections.clinical_indication import parse_clinical_indication
 from prenatalppkt.etl.sections.pregnancy_dating import parse_pregnancy_dating
 from prenatalppkt.etl.sections.fetal_anatomy import parse_fetal_anatomy
+from prenatalppkt.etl.sections.estimated_fetal_weight import (
+    parse_estimated_fetal_weight,
+)
+from prenatalppkt.etl.sections.fetal_ratios import parse_fetal_ratios
 from prenatalppkt.etl.sections.placenta import parse_placenta
 from prenatalppkt.etl.sections.amniotic_fluid import parse_amniotic_fluid
 from prenatalppkt.etl.sections.umbilical_cord import parse_umbilical_cord
@@ -31,6 +38,8 @@
     "parse_clinical_indication",
     "parse_pregnancy_dating",
     "parse_fetal_anatomy",
+    "parse_estimated_fetal_weight",
+    "parse_fetal_ratios",
     "parse_placenta",
     "parse_amniotic_fluid",
     "parse_umbilical_cord",

diff --git a/src/prenatalppkt/etl/sections/clinical_impression.py b/src/prenatalppkt/etl/sections/clinical_impression.py
@@ -1,66 +1,192 @@
 """
-Clinical impression section parser (SKELETON).
+Clinical impression / interpretation section parser.
 
-Extracts clinical impressions, diagnoses, and findings from report impression.
-
-TODO @VarenyaJ: Complete implementation, Map clinical findings to HPO terms, Extract structured anomalies from impression text
+Extracts clinical narrative text and optionally extracts HPO terms
+from free text using the HPO Concept Recognizer.
 """
 
-from typing import Dict
+from __future__ import annotations
+
+import json
+import re
+from typing import Dict, List, Optional, Union
 
 
-def parse_clinical_impression(data: str, source_format: str = "viewpoint_text") -> Dict:
+def parse_clinical_impression(
+    data: Union[str, Dict], source_format: str, hpo_cr=None
+) -> Dict:
     """
-    Extract clinical impression from ultrasound report.
+    Parse clinical impression / interpretation section.
+
+    Supports:
+        - observer_json
+        - viewpoint_text
+        - viewpoint_hl7
 
     Args:
-        data: Report content (text, JSON, or HL7)
+        data: Raw input data (JSON string, dict, or text)
         source_format: One of "observer_json", "viewpoint_text", "viewpoint_hl7"
+        hpo_cr: Optional HpoExactConceptRecognizer for HPO term extraction.
+                If provided, will extract HPO terms from impression text.
 
     Returns:
         Dict with keys:
             - impression_text: str - Full impression narrative
-            - diagnoses: List[str] - Identified diagnoses
-            - anomalies: List[Dict] - Structured anomaly data
-            - gestational_age_assessment: str - GA conclusion
-            - growth_assessment: str - Fetal growth conclusion
-            - recommendations: List[str] - Follow-up recommendations
-            - hpo_terms: List[str] - Mapped HPO term IDs (FUTURE)
-
-    TODO @VarenyaJ Implementation Steps:
-        1. Locate impression section:
-           - ViewPoint Text: "Impression" section after "========="
-           - Observer JSON: exam.finalize.generalComment.plain_text
-           - ViewPoint HL7: May be in RequestedProcedure or exam notes
-        2. Parse free-text impression for key findings
-        3. Extract anomalies:
-           - Observer JSON: fetuses[].anatomy[].anomalies[]
-           - Text: Look for patterns like "consistent with", "suggestive of"
-        4. Identify growth conclusions (FGR, LGA, AGA)
-        5. Extract recommendations for follow-up
-        6. Map findings to HPO terms:
-           - Use src/prenatalppkt/hpo.cr_fetal_findings
-           - Handle synonyms and varied clinical language
-
-    TODO @VarenyaJ: DO NOT:
-        - Assume impression section exists (optional in all formats)
-        - Parse impression without context (may reference biometry results)
-        - Miss negative findings (e.g., "no evidence of...")
-        - Ignore severity qualifiers (mild, moderate, severe)
+            - diagnoses: List[str] - Identified diagnoses (future)
+            - anomalies: List[Dict] - Structured anomaly data (future)
+            - gestational_age_assessment: Optional[str] - GA conclusion
+            - growth_assessment: Optional[str] - FGR, LGA, AGA, or None
+            - recommendations: List[str] - Follow-up recommendations (future)
+            - hpo_terms: List[SimpleTerm] - HPO terms extracted via CR
+            - source_format: str
     """
-    # SKELETON: Return empty structure
+    if source_format == "observer_json":
+        if isinstance(data, str):
+            data = json.loads(data)
+        impression_text = _parse_observer_impression(data)
+
+    elif source_format == "viewpoint_text":
+        if not isinstance(data, str):
+            raise ValueError("viewpoint_text data must be a string")
+        impression_text = _parse_viewpoint_text_impression(data)
+
+    elif source_format == "viewpoint_hl7":
+        if not isinstance(data, str):
+            raise ValueError("viewpoint_hl7 data must be a string")
+        impression_text = _parse_viewpoint_hl7_impression(data)
+
+    else:
+        raise ValueError(f"Unsupported source_format: {source_format}")
+
+    # Extract HPO terms if concept recognizer is provided
+    hpo_terms = []
+    if impression_text and hpo_cr is not None:
+        # HpoExactConceptRecognizer uses parse() method, not extract()
+        if hasattr(hpo_cr, "parse"):
+            hpo_terms = hpo_cr.parse(impression_text)
+
     return {
-        "impression_text": "",
+        "impression_text": impression_text,
         "diagnoses": [],
         "anomalies": [],
         "gestational_age_assessment": None,
-        "growth_assessment": None,
+        "growth_assessment": _infer_growth_assessment(impression_text),
         "recommendations": [],
-        "hpo_terms": [],  # FUTURE
+        "hpo_terms": hpo_terms,
+        "source_format": source_format,
     }
 
 
-# TODO @VarenyaJ: Add helper functions:
-# - _extract_anomalies_from_text(text: str) -> List[Dict]
-# - _classify_growth_assessment(text: str) -> str
-# - _extract_recommendations(text: str) -> List[str]
+# ---------------------------------------------------------------------
+# Observer JSON
+# ---------------------------------------------------------------------
+
+
+def _parse_observer_impression(json_data: Dict) -> str:
+    """
+    Extract impression from Observer JSON.
+
+    The finalize block can be at:
+    - Root level: json_data["finalize"]["generalComment"]["plain_text"]
+    - Under exam: json_data["exam"]["finalize"]["generalComment"]["plain_text"]
+
+    We check the root level first (most common), then fall back to exam.
+    """
+    impression = ""
+
+    # Check root level first (this is where Apple_Sally has it)
+    finalize = json_data.get("finalize", {})
+    impression = finalize.get("generalComment", {}).get("plain_text", "").strip()
+
+    # Fall back to exam.finalize if not found at root
+    if not impression:
+        exam = json_data.get("exam", {})
+        finalize = exam.get("finalize", {})
+        impression = finalize.get("generalComment", {}).get("plain_text", "").strip()
+
+    return impression
+
+
+# ---------------------------------------------------------------------
+# ViewPoint Text
+# ---------------------------------------------------------------------
+
+
+def _parse_viewpoint_text_impression(text: str) -> str:
+    """
+    Extract impression from ViewPoint text reports.
+
+    Expected pattern:
+        Impression
+        ==========
+        [free text narrative]
+    """
+    pattern = re.compile(
+        r"Impression\s*\n=+\n(?P<body>.*?)(?:\n[A-Z][^\n]*\n=+|\Z)",
+        re.DOTALL | re.IGNORECASE,
+    )
+
+    match = pattern.search(text)
+    return match.group("body").strip() if match else ""
+
+
+# ---------------------------------------------------------------------
+# ViewPoint HL7
+# ---------------------------------------------------------------------
+
+
+def _parse_viewpoint_hl7_impression(hl7: str) -> str:
+    """
+    Extract impression from HL7 ORU^R01 messages.
+
+    Looks for OBX segments containing "Impression" or "Interpretation"
+    in the observation identifier field.
+    """
+    lines: List[str] = []
+
+    for line in hl7.splitlines():
+        if not line.startswith("OBX"):
+            continue
+
+        fields = line.split("|")
+        if len(fields) < 6:
+            continue
+
+        obs_id = fields[3]
+        value = fields[5].split("^")[0].strip()
+
+        if "Impression" in obs_id or "Interpretation" in obs_id:
+            if value:
+                lines.append(value)
+
+    return " ".join(lines)
+
+
+# ---------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------
+
+
+def _infer_growth_assessment(text: str) -> Optional[str]:
+    """
+    Infer fetal growth assessment from impression text.
+
+    Returns:
+        "FGR" - Fetal Growth Restriction
+        "LGA" - Large for Gestational Age
+        "AGA" - Appropriate for Gestational Age
+        None - No assessment detected
+    """
+    if not text:
+        return None
+
+    text_lower = text.lower()
+
+    if "growth restriction" in text_lower or "fgr" in text_lower:
+        return "FGR"
+    if "large for gestational age" in text_lower or "lga" in text_lower:
+        return "LGA"
+    if "appropriate for gestational age" in text_lower or "aga" in text_lower:
+        return "AGA"
+
+    return None