Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
15 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,7 @@ dist/
*.sqlite3

profile-imports-*.txt


# mypy
.mypy_cache
4 changes: 2 additions & 2 deletions canvas_generated/messages/effects_pb2.py

Large diffs are not rendered by default.

12 changes: 12 additions & 0 deletions canvas_generated/messages/effects_pb2.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,12 @@ class EffectType(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
CALENDAR__EVENT__CREATE: _ClassVar[EffectType]
CALENDAR__EVENT__UPDATE: _ClassVar[EffectType]
CALENDAR__EVENT__DELETE: _ClassVar[EffectType]
LINK_DOCUMENT_TO_PATIENT: _ClassVar[EffectType]
CATEGORIZE_DOCUMENT: _ClassVar[EffectType]
ASSIGN_DOCUMENT_REVIEWER: _ClassVar[EffectType]
UPDATE_DOCUMENT_FIELDS: _ClassVar[EffectType]
JUNK_DOCUMENT: _ClassVar[EffectType]
REMOVE_DOCUMENT_FROM_PATIENT: _ClassVar[EffectType]
UNKNOWN_EFFECT: EffectType
LOG: EffectType
ADD_PLAN_COMMAND: EffectType
Expand Down Expand Up @@ -629,6 +635,12 @@ CALENDAR__CREATE: EffectType
CALENDAR__EVENT__CREATE: EffectType
CALENDAR__EVENT__UPDATE: EffectType
CALENDAR__EVENT__DELETE: EffectType
LINK_DOCUMENT_TO_PATIENT: EffectType
CATEGORIZE_DOCUMENT: EffectType
ASSIGN_DOCUMENT_REVIEWER: EffectType
UPDATE_DOCUMENT_FIELDS: EffectType
JUNK_DOCUMENT: EffectType
REMOVE_DOCUMENT_FROM_PATIENT: EffectType

class Effect(_message.Message):
__slots__ = ("type", "payload", "plugin_name", "classname", "handler_name", "actor", "source")
Expand Down
4 changes: 2 additions & 2 deletions canvas_generated/messages/events_pb2.py

Large diffs are not rendered by default.

14 changes: 14 additions & 0 deletions canvas_generated/messages/events_pb2.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -1003,6 +1003,13 @@ class EventType(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
REVENUE__PAYMENT_PROCESSOR__PAYMENT_METHODS__LIST: _ClassVar[EventType]
REVENUE__PAYMENT_PROCESSOR__PAYMENT_METHODS__ADD: _ClassVar[EventType]
REVENUE__PAYMENT_PROCESSOR__PAYMENT_METHODS__REMOVE: _ClassVar[EventType]
DOCUMENT_RECEIVED: _ClassVar[EventType]
DOCUMENT_LINKED_TO_PATIENT: _ClassVar[EventType]
DOCUMENT_CATEGORIZED: _ClassVar[EventType]
DOCUMENT_REVIEWER_ASSIGNED: _ClassVar[EventType]
DOCUMENT_FIELDS_UPDATED: _ClassVar[EventType]
DOCUMENT_REVIEWED: _ClassVar[EventType]
DOCUMENT_DELETED: _ClassVar[EventType]
UNKNOWN: EventType
ALLERGY_INTOLERANCE_CREATED: EventType
ALLERGY_INTOLERANCE_UPDATED: EventType
Expand Down Expand Up @@ -1997,6 +2004,13 @@ REVENUE__PAYMENT_PROCESSOR__SELECTED: EventType
REVENUE__PAYMENT_PROCESSOR__PAYMENT_METHODS__LIST: EventType
REVENUE__PAYMENT_PROCESSOR__PAYMENT_METHODS__ADD: EventType
REVENUE__PAYMENT_PROCESSOR__PAYMENT_METHODS__REMOVE: EventType
DOCUMENT_RECEIVED: EventType
DOCUMENT_LINKED_TO_PATIENT: EventType
DOCUMENT_CATEGORIZED: EventType
DOCUMENT_REVIEWER_ASSIGNED: EventType
DOCUMENT_FIELDS_UPDATED: EventType
DOCUMENT_REVIEWED: EventType
DOCUMENT_DELETED: EventType

class Event(_message.Message):
__slots__ = ("type", "target", "context", "target_type", "actor", "source")
Expand Down
238 changes: 238 additions & 0 deletions canvas_sdk/effects/categorize_document.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,238 @@
from collections.abc import Mapping
from typing import Annotated, Any

from pydantic import Field, model_validator
from pydantic_core import InitErrorDetails
from typing_extensions import TypedDict

from canvas_sdk.effects.base import EffectType
from canvas_sdk.effects.data_integration.base import _BaseDocumentEffect


class DocumentTypeConfidenceScores(TypedDict, total=False):
"""
Confidence scores for individual document_type fields.

All fields are optional. Values must be floats between 0.0 and 1.0.

Attributes:
key: Confidence in key match (0.0-1.0)
name: Confidence in name match (0.0-1.0)
report_type: Confidence in report_type (0.0-1.0)
template_type: Confidence in template_type (0.0-1.0)
"""

key: Annotated[float, Field(ge=0.0, le=1.0)]
name: Annotated[float, Field(ge=0.0, le=1.0)]
report_type: Annotated[float, Field(ge=0.0, le=1.0)]
template_type: Annotated[float, Field(ge=0.0, le=1.0)]


class ConfidenceScores(TypedDict, total=False):
"""
Confidence scores for document fields extracted from a document.

All fields are optional. Values must be floats between 0.0 and 1.0,
representing the confidence level of each extracted field (e.g., from OCR).

Attributes:
document_id: Confidence score for the document_id field (0.0-1.0)
document_type: Confidence scores for document_type fields, as a nested dict
with keys: key, name, report_type, template_type (each 0.0-1.0)
"""

document_id: Annotated[float, Field(ge=0.0, le=1.0)]
document_type: DocumentTypeConfidenceScores


_DOCUMENT_TYPE_CONFIDENCE_KEYS = frozenset(DocumentTypeConfidenceScores.__annotations__.keys())


class DocumentType(TypedDict):
"""
Document type information for categorizing a document.

Attributes:
key: The unique key identifying the document type (required, non-empty string)
name: The human-readable name of the document type (required, non-empty string)
report_type: The type of report, must be "CLINICAL" or "ADMINISTRATIVE" (required)
template_type: The template type, can be "LabReportTemplate", "ImagingReportTemplate",
"SpecialtyReportTemplate", or null for administrative docs (optional)
"""

key: str
name: str
report_type: str
template_type: str | None


VALID_REPORT_TYPES = frozenset(["CLINICAL", "ADMINISTRATIVE"])
VALID_TEMPLATE_TYPES = frozenset(
["LabReportTemplate", "ImagingReportTemplate", "SpecialtyReportTemplate"]
)


class CategorizeDocument(_BaseDocumentEffect):
"""
An Effect that categorizes a document in the Data Integration queue into a specific document type.

When processed by the home-app interpreter, this effect will:
- Validate the IntegrationTask exists
- Look up the DocumentType by key (falls back to name if key not found)
- Validate the DocumentType exists
- Create or update an IntegrationTaskReview with template_name and document_key

Attributes:
document_id: The ID of the IntegrationTask document to categorize (required, non-empty).
Accepts str or int; always serialized as string in the payload.
document_type: Document type information dict with required fields: key, name, report_type, template_type.
confidence_scores: Optional confidence scores for document fields.
See ConfidenceScores TypedDict for valid keys and value constraints.
"""

class Meta:
effect_type = EffectType.CATEGORIZE_DOCUMENT
apply_required_fields = ("document_id", "document_type")

document_id: str | int | None = None
document_type: DocumentType | dict[str, Any] | None = None
confidence_scores: ConfidenceScores | None = None

@classmethod
def _get_confidence_score_keys(cls) -> frozenset[str]:
"""Return valid keys for confidence_scores validation."""
return frozenset(ConfidenceScores.__annotations__.keys())

@model_validator(mode="before")
@classmethod
def validate_confidence_scores_keys(cls, data: Any) -> Any:
"""Validate confidence_scores keys before Pydantic processes the TypedDict.

Extends the base class validation to also validate nested document_type keys.
TypedDict in Pydantic silently drops unknown keys, so we validate
them here to provide a clear error message to users.
"""
# Call base class validation for top-level confidence_scores keys
data = super().validate_confidence_scores_keys.__func__(cls, data)

# Additional validation for nested document_type confidence scores
if isinstance(data, dict) and "confidence_scores" in data:
scores = data.get("confidence_scores")
if isinstance(scores, dict) and "document_type" in scores:
doc_type_scores = scores["document_type"]
if isinstance(doc_type_scores, dict):
invalid_doc_type_keys = set(doc_type_scores.keys()) - _DOCUMENT_TYPE_CONFIDENCE_KEYS
if invalid_doc_type_keys:
raise ValueError(
f"confidence_scores.document_type contains invalid keys: {sorted(invalid_doc_type_keys)}. "
f"Valid keys are: {sorted(_DOCUMENT_TYPE_CONFIDENCE_KEYS)}"
)
return data

@property
def values(self) -> dict[str, Any]:
"""The effect's values to be sent in the payload."""
result: dict[str, Any] = {
"document_id": str(self.document_id) if self.document_id is not None else None,
"document_type": self.document_type,
}

if self.confidence_scores is not None:
result["confidence_scores"] = self.confidence_scores

return result

@property
def effect_payload(self) -> dict[str, Any]:
"""The payload of the effect."""
return self.values

def _validate_required_string(
self,
data: Mapping[str, Any],
field: str,
errors: list[InitErrorDetails],
) -> None:
"""Validate that a required string field exists and is non-empty."""
prefix = "document_type"
if field not in data:
errors.append(
self._create_error_detail("missing", f"{prefix}.{field} is required", None)
)
elif not isinstance(data[field], str) or not data[field].strip():
errors.append(
self._create_error_detail(
"value", f"{prefix}.{field} must be a non-empty string", data.get(field)
)
)

def _validate_enum_field(
self,
data: Mapping[str, Any],
field: str,
valid_values: frozenset[str],
errors: list[InitErrorDetails],
*,
required: bool = True,
nullable: bool = False,
) -> None:
"""Validate that a field has a value from a set of valid options."""
prefix = "document_type"
if field not in data:
if required:
errors.append(
self._create_error_detail("missing", f"{prefix}.{field} is required", None)
)
return

value = data[field]
if nullable and value is None:
return

null_suffix = " or null" if nullable else ""
if not isinstance(value, str):
errors.append(
self._create_error_detail(
"value", f"{prefix}.{field} must be a string{null_suffix}", value
)
)
elif value not in valid_values:
errors.append(
self._create_error_detail(
"value",
f"{prefix}.{field} must be one of {sorted(valid_values)}{null_suffix}, got: {value}",
value,
)
)

def _get_error_details(self, method: Any) -> list[InitErrorDetails]:
"""Validate the effect fields and return any error details.

Note: confidence_scores validation (invalid keys, range constraints) is
handled by Pydantic at construction time via model_validator and TypedDict
with Annotated field constraints.
"""
errors = super()._get_error_details(method)

if self.document_type is not None:
self._validate_required_string(self.document_type, "key", errors)
self._validate_required_string(self.document_type, "name", errors)
self._validate_enum_field(self.document_type, "report_type", VALID_REPORT_TYPES, errors)
self._validate_enum_field(
self.document_type,
"template_type",
VALID_TEMPLATE_TYPES,
errors,
required=False,
nullable=True,
)

return errors


__exports__ = (
"ConfidenceScores",
"DocumentType",
"DocumentTypeConfidenceScores",
"CategorizeDocument",
)
31 changes: 31 additions & 0 deletions canvas_sdk/effects/data_integration/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from .assign_document_reviewer import AssignDocumentReviewer as AssignDocumentReviewer
from .base import _BaseDocumentEffect as _BaseDocumentEffect
from .assign_document_reviewer import (
AssignDocumentReviewerConfidenceScores as AssignDocumentReviewerConfidenceScores,
)
from .assign_document_reviewer import Priority as Priority
from .assign_document_reviewer import ReviewMode as ReviewMode
from .junk_document import JunkDocument as JunkDocument
from .junk_document import JunkDocumentConfidenceScores as JunkDocumentConfidenceScores
from .link_document_to_patient import (
LinkDocumentConfidenceScores as LinkDocumentConfidenceScores,
)
from .link_document_to_patient import LinkDocumentToPatient as LinkDocumentToPatient
from .remove_document_from_patient import (
RemoveDocumentConfidenceScores as RemoveDocumentConfidenceScores,
)
from .remove_document_from_patient import RemoveDocumentFromPatient as RemoveDocumentFromPatient

__all__ = __exports__ = (
"_BaseDocumentEffect",
"AssignDocumentReviewer",
"AssignDocumentReviewerConfidenceScores",
"JunkDocument",
"JunkDocumentConfidenceScores",
"LinkDocumentConfidenceScores",
"LinkDocumentToPatient",
"Priority",
"RemoveDocumentConfidenceScores",
"RemoveDocumentFromPatient",
"ReviewMode",
)
Loading
Loading