Add Pydantic v2 enrichment and search schema models (#382)

Cataldir · web-flow · commit 7d9a86ff0680 · 2026-03-20T04:04:31.000Z
diff --git a/lib/src/holiday_peak_lib/schemas/__init__.py b/lib/src/holiday_peak_lib/schemas/__init__.py
@@ -10,6 +10,10 @@
 from .logistics import LogisticsContext, Shipment, ShipmentEvent
 from .pricing import PriceContext, PriceEntry
 from .product import CanonicalProduct, CatalogProduct, ProductContext
+from .search import (
+    IntentClassification,
+    SearchEnrichedProduct,
+)
 from .truth import (
     AssetMetadata,
     AttributeSource,
@@ -22,12 +26,17 @@
     ExportResult,
     GapReport,
     GapReportTarget,
-    IntentClassification,
+)
+from .truth import IntentClassification as LegacyIntentClassification
+from .truth import (
+    ProductEnrichmentProposal,
     ProductStyle,
     ProductVariant,
     ProposedAttribute,
     Provenance,
-    SearchEnrichedProduct,
+)
+from .truth import SearchEnrichedProduct as LegacySearchEnrichedProduct
+from .truth import (
     SharePolicy,
     SourceType,
     TruthAttribute,
@@ -79,11 +88,14 @@
     "GapReportTarget",
     "ProductStyle",
     "ProductVariant",
+    "ProductEnrichmentProposal",
     "ProposedAttribute",
     "Provenance",
     "SearchEnrichedProduct",
+    "LegacySearchEnrichedProduct",
     "SharePolicy",
     "SourceType",
     "TruthAttribute",
     "IntentClassification",
+    "LegacyIntentClassification",
 ]
diff --git a/lib/src/holiday_peak_lib/schemas/search.py b/lib/src/holiday_peak_lib/schemas/search.py
@@ -0,0 +1,59 @@
+"""Search and query-intelligence schemas.
+
+Pydantic v2 models used by intelligent search and product enrichment flows.
+"""
+
+from __future__ import annotations
+
+from datetime import datetime, timezone
+from typing import Any, Literal, Optional
+
+from pydantic import BaseModel, ConfigDict, Field
+
+
+class SearchEnrichedProduct(BaseModel):
+    """Enriched product document optimized for intelligent search."""
+
+    model_config = ConfigDict(populate_by_name=True)
+
+    id: str
+    entity_id: str = Field(alias="entityId")
+    sku: str
+    name: str
+    brand: str
+    category: str
+    description: Optional[str] = None
+    price: Optional[float] = None
+    use_cases: list[str] = Field(default_factory=list, alias="useCases")
+    complementary_products: list[str] = Field(
+        default_factory=list,
+        alias="complementaryProducts",
+    )
+    substitute_products: list[str] = Field(default_factory=list, alias="substituteProducts")
+    search_keywords: list[str] = Field(default_factory=list, alias="searchKeywords")
+    enriched_description: Optional[str] = Field(None, alias="enrichedDescription")
+    enriched_at: datetime = Field(
+        default_factory=lambda: datetime.now(timezone.utc),
+        alias="enrichedAt",
+    )
+    enrichment_model: Optional[str] = Field(None, alias="enrichmentModel")
+    source_approval_version: int = Field(alias="sourceApprovalVersion")
+
+
+class IntentClassification(BaseModel):
+    """Structured query interpretation output for intelligent search."""
+
+    model_config = ConfigDict(populate_by_name=True)
+
+    query_type: Literal["simple", "complex"] = Field(alias="queryType")
+    category: Optional[str] = None
+    attributes: list[str] = Field(default_factory=list)
+    use_case: Optional[str] = Field(None, alias="useCase")
+    brand: Optional[str] = None
+    price_range: tuple[float | None, float | None] = Field(
+        default=(None, None),
+        alias="priceRange",
+    )
+    filters: dict[str, Any] = Field(default_factory=dict)
+    sub_queries: list[str] = Field(default_factory=list, alias="subQueries")
+    confidence: float = Field(..., ge=0.0, le=1.0)
diff --git a/lib/src/holiday_peak_lib/schemas/truth.py b/lib/src/holiday_peak_lib/schemas/truth.py
@@ -79,6 +79,8 @@ class GapReportTarget(str, Enum):
 class SourceType(str, Enum):
     """Origin category for enrichment/search contextual data."""
 
+    TEXT_ENRICHMENT = "text_enrichment"
+    HYBRID = "hybrid"
     AI_REASONING = "ai_reasoning"
     PRODUCT_CONTEXT = "product_context"
     CATEGORY_INFERENCE = "category_inference"
@@ -260,6 +262,16 @@ class ProposedAttribute(BaseModel):
     reasoning: Optional[str] = None
 
 
+class ProductEnrichmentProposal(ProposedAttribute):
+    """Compatibility model name for enrichment proposals.
+
+    Keeps the existing ``ProposedAttribute`` payload contract while exposing a
+    dedicated semantic type for enrichment/search pipelines.
+    """
+
+    model_config = ConfigDict(populate_by_name=True)
+
+
 class IntentClassification(BaseModel):
     """Intent classification result for a search/enrichment request."""
 
diff --git a/lib/tests/test_search_schemas.py b/lib/tests/test_search_schemas.py
@@ -0,0 +1,97 @@
+"""Unit tests for search-oriented schemas (search.py)."""
+
+from datetime import datetime, timezone
+
+import pytest
+from holiday_peak_lib.schemas.search import IntentClassification, SearchEnrichedProduct
+
+
+class TestSearchEnrichedProduct:
+    """Tests for SearchEnrichedProduct model."""
+
+    @pytest.mark.parametrize(
+        "payload",
+        [
+            {
+                "id": "SEP-1",
+                "entityId": "STYLE-1",
+                "sku": "SKU-1",
+                "name": "Trail Shoe",
+                "brand": "Contoso",
+                "category": "footwear",
+                "sourceApprovalVersion": 3,
+            },
+            {
+                "id": "SEP-2",
+                "entityId": "STYLE-2",
+                "sku": "SKU-2",
+                "name": "Running Shoe",
+                "brand": "Fabrikam",
+                "category": "footwear",
+                "description": "Breathable upper",
+                "price": 129.99,
+                "useCases": ["running"],
+                "complementaryProducts": ["SKU-SOCK-1"],
+                "substituteProducts": ["SKU-ALT-1"],
+                "searchKeywords": ["shoe", "breathable"],
+                "enrichedDescription": "Lightweight breathable trail shoe.",
+                "enrichedAt": datetime(2026, 3, 1, tzinfo=timezone.utc),
+                "enrichmentModel": "gpt-4o-mini",
+                "sourceApprovalVersion": 5,
+            },
+        ],
+    )
+    def test_required_and_optional_fields(self, payload: dict):
+        model = SearchEnrichedProduct(**payload)
+        assert model.id.startswith("SEP-")
+        assert model.entity_id.startswith("STYLE-")
+        assert model.source_approval_version > 0
+
+    def test_json_roundtrip(self):
+        model = SearchEnrichedProduct(
+            id="SEP-3",
+            entityId="STYLE-3",
+            sku="SKU-3",
+            name="Hiking Boot",
+            brand="Contoso",
+            category="boots",
+            useCases=["hiking"],
+            sourceApprovalVersion=7,
+        )
+        payload = model.model_dump_json(by_alias=True)
+        restored = SearchEnrichedProduct.model_validate_json(payload)
+        assert restored.entity_id == "STYLE-3"
+        assert restored.use_cases == ["hiking"]
+
+
+class TestIntentClassification:
+    """Tests for IntentClassification model."""
+
+    @pytest.mark.parametrize("query_type", ["simple", "complex"])
+    def test_required_and_optional_fields(self, query_type: str):
+        model = IntentClassification(queryType=query_type, confidence=0.84)
+        assert model.query_type == query_type
+        assert model.attributes == []
+        assert model.price_range == (None, None)
+
+    @pytest.mark.parametrize("confidence", [-0.01, 1.01])
+    def test_confidence_bounds(self, confidence: float):
+        with pytest.raises(Exception):
+            IntentClassification(queryType="simple", confidence=confidence)
+
+    def test_json_roundtrip(self):
+        model = IntentClassification(
+            queryType="complex",
+            category="footwear",
+            attributes=["waterproof", "lightweight"],
+            useCase="hiking",
+            brand="Contoso",
+            priceRange=(100.0, 200.0),
+            filters={"size": "10"},
+            subQueries=["waterproof hiking shoe", "lightweight hiking boot"],
+            confidence=0.91,
+        )
+        payload = model.model_dump_json(by_alias=True)
+        restored = IntentClassification.model_validate_json(payload)
+        assert restored.query_type == "complex"
+        assert restored.sub_queries[0] == "waterproof hiking shoe"
diff --git a/lib/tests/test_truth_schemas.py b/lib/tests/test_truth_schemas.py
@@ -14,6 +14,7 @@
     GapReport,
     GapReportTarget,
     IntentClassification,
+    ProductEnrichmentProposal,
     ProductStyle,
     ProductVariant,
     ProposedAttribute,
@@ -335,6 +336,49 @@ def test_json_roundtrip(self):
         assert restored.entities["brand"] == "Acme"
 
 
+class TestProductEnrichmentProposal:
+    """Tests for ProductEnrichmentProposal compatibility model."""
+
+    @pytest.mark.parametrize(
+        "source_type",
+        ["text_enrichment", "image_analysis", "hybrid", "ai_reasoning"],
+    )
+    def test_compatible_payload_and_source_type_variants(self, source_type: str):
+        model = ProductEnrichmentProposal(
+            entityType="style",
+            entityId="S1",
+            attributeKey="material",
+            value="leather",
+            source="SYSTEM",
+            confidence=0.81,
+            modelRunId="run-1",
+            sourceType=source_type,
+            sourceAssets=["asset-1"],
+            originalData={"name": "Original"},
+            enrichedData={"name": "Enriched"},
+            reasoning="Context and evidence support the proposal.",
+        )
+        assert model.source_type is not None
+        assert model.source_type.value == source_type
+        assert model.source_assets == ["asset-1"]
+
+    def test_roundtrip_json(self):
+        model = ProductEnrichmentProposal(
+            entityType="style",
+            entityId="S2",
+            attributeKey="title",
+            value="Trail Shoe",
+            source="PIM",
+            confidence=0.77,
+            modelRunId="run-2",
+            sourceType="text_enrichment",
+        )
+        payload = model.model_dump_json(by_alias=True)
+        restored = ProductEnrichmentProposal.model_validate_json(payload)
+        assert restored.model_run_id == "run-2"
+        assert restored.source_type == SourceType.TEXT_ENRICHMENT
+
+
 class TestSearchEnrichedProduct:
     """Tests for SearchEnrichedProduct model."""
 
@@ -527,6 +571,7 @@ def test_package_exports(self):
         for name in [
             "ProductStyle",
             "ProductVariant",
+            "ProductEnrichmentProposal",
             "TruthAttribute",
             "ProposedAttribute",
             "GapReport",