Skip to content
Open
1 change: 1 addition & 0 deletions docs/api/models.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ We implement the following models for supporting multiple healthcare predictive
:maxdepth: 3

models/pyhealth.models.BaseModel
models/pyhealth.models.BHCToAVS
models/pyhealth.models.LogisticRegression
models/pyhealth.models.MLP
models/pyhealth.models.CNN
Expand Down
11 changes: 11 additions & 0 deletions docs/api/models/pyhealth.models.BHCToAVS.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
pyhealth.models.bhc_to_avs
==========================

BHCToAVS
------------------------------

.. autoclass:: pyhealth.models.bhc_to_avs.BHCToAVS
:members:
:inherited-members:
:show-inheritance:
:undoc-members:
21 changes: 21 additions & 0 deletions examples/bhc_to_avs_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from pyhealth.models.bhc_to_avs import BHCToAVS

# Initialize the model
model = BHCToAVS()

# Example Brief Hospital Course (BHC) text with common clinical abbreviations generated synthetically via ChatGPT 5.1
bhc = (
"Pt admitted with acute onset severe epigastric pain and hypotension. "
"Labs notable for elevated lactate, WBC 18K, mild AST/ALT elevation, and Cr 1.4 (baseline 0.9). "
"CT A/P w/ contrast demonstrated peripancreatic fat stranding c/w acute pancreatitis; "
"no necrosis or peripancreatic fluid collection. "
"Pt received aggressive IVFs, electrolyte repletion, IV analgesia, and NPO status initially. "
"Serial abd exams remained benign with no rebound or guarding. "
"BP stabilized, lactate downtrended, and pt tolerated ADAT to low-fat diet without recurrence of sx. "
"Discharged in stable condition w/ instructions for GI f/u and outpatient CMP in 1 week."
)

# Generate a patient-friendly After-Visit Summary
print(model.predict(bhc))

# Expected output: A simplified, patient-friendly summary explaining the hospital stay without medical jargon.
3 changes: 2 additions & 1 deletion pyhealth/models/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from .adacare import AdaCare, AdaCareLayer
from .agent import Agent, AgentLayer
from .base_model import BaseModel
from .bhc_to_avs import BHCToAVS
from .cnn import CNN, CNNLayer
from .concare import ConCare, ConCareLayer
from .contrawr import ContraWR, ResBlock2D
Expand All @@ -26,4 +27,4 @@
from .transformer import Transformer, TransformerLayer
from .transformers_model import TransformersModel
from .vae import VAE
from .sdoh import SdohClassifier
from .sdoh import SdohClassifier
98 changes: 98 additions & 0 deletions pyhealth/models/bhc_to_avs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
# Author: Charan Williams
# NetID: charanw2
# Description: Converts clinical brief hospital course (BHC) data to after visit summaries using a fine-tuned Mistral 7B model.

from typing import Dict, Any
from dataclasses import dataclass, field
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from peft import PeftModelForCausalLM
from pyhealth.models.base_model import BaseModel

_PROMPT = """Summarize for the patient what happened during the hospital stay:

### Brief Hospital Course:
{bhc}

### Patient Summary:
"""

# System prompt used during inference
_SYSTEM_PROMPT = (
"You are a clinical summarization model. Produce accurate, patient-friendly summaries "
"using only information from the doctor's note. Do not add new details.\n\n"
)

# Prompt used during fine-tuning
_PROMPT = (
"Summarize for the patient what happened during the hospital stay based on this doctor's note:\n"
"{bhc}\n\n"
"Summary for the patient:\n"
)
Comment on lines +36 to +41
Copy link

Copilot AI Dec 29, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment on line 36 states "Prompt used during fine-tuning" but this prompt is actually used during inference (as seen on line 146). If this prompt was indeed used during fine-tuning and is also being reused during inference, the comment should clarify this. If it's only used during inference, the comment is misleading and should be corrected to "Prompt template used during inference" or similar.

Copilot uses AI. Check for mistakes.

@dataclass
class BHCToAVS(BaseModel):
base_model_id: str = field(default="mistralai/Mistral-7B-Instruct")
"""HuggingFace repo containing the base Mistral 7B model."""

adapter_model_id: str = field(default="williach31/mistral-7b-bhc-to-avs-lora")
"""HuggingFace repo containing only LoRA adapter weights."""

def _get_pipeline(self):
"""Create and cache the text-generation pipeline."""
if not hasattr(self, "_pipeline"):
# Load base model
base = AutoModelForCausalLM.from_pretrained(
self.base_model_id,
torch_dtype=torch.bfloat16,
device_map="auto"
)

# Load LoRA adapter
model = PeftModelForCausalLM.from_pretrained(
base,
self.adapter_model_id,
torch_dtype=torch.bfloat16
)

tokenizer = AutoTokenizer.from_pretrained(self.base_model_id)

# Create HF pipeline
self._pipeline = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
device_map="auto",
model_kwargs={"torch_dtype": torch.bfloat16}
)

return self._pipeline

def predict(self, bhc_text: str) -> str:
"""
Generate an After-Visit Summary (AVS) from a Brief Hospital Course (BHC) note.

Parameters
----------
bhc_text : str
Raw BHC text.

Returns
-------
str
Patient-friendly summary.
"""

prompt = _SYSTEM_PROMPT + _PROMPT.format(bhc=bhc_text)

pipe = self._get_pipeline()
outputs = pipe(
prompt,
max_new_tokens=512,
temperature=0.0,
eos_token_id=[pipe.tokenizer.eos_token_id],
pad_token_id=pipe.tokenizer.eos_token_id,
)

# Output is a single text string
return outputs[0]["generated_text"].strip()
36 changes: 36 additions & 0 deletions tests/core/test_bhc_to_avs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from tests.base import BaseTestCase
from pyhealth.models.bhc_to_avs import BHCToAVS


class TestBHCToAVS(BaseTestCase):
"""Unit tests for the BHCToAVS model."""

def setUp(self):
self.set_random_seed()

def test_predict(self):
"""Test the predict method of BHCToAVS."""
bhc_text = (
"Patient admitted with abdominal pain. Imaging showed no acute findings. "
"Pain improved with supportive care and the patient was discharged in stable condition."
)
model = BHCToAVS()
try:

summary = model.predict(bhc_text)

# Output must be type str
self.assertIsInstance(summary, str)

# Output should not be empty
self.assertGreater(len(summary.strip()), 0)

# Output should be different from input
self.assertNotIn(bhc_text[:40], summary)

except OSError as e:
# Allow test to pass if model download fails on e.g. on GitHub workflows
if "gated repo" in str(e).lower() or "404" in str(e):
pass
else:
raise e
Loading