Skip to content

Commit 7d9a86f

Browse files
authored
Add Pydantic v2 enrichment and search schema models (#382)
1 parent c0de6e2 commit 7d9a86f

File tree

5 files changed

+227
-2
lines changed

5 files changed

+227
-2
lines changed

lib/src/holiday_peak_lib/schemas/__init__.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@
1010
from .logistics import LogisticsContext, Shipment, ShipmentEvent
1111
from .pricing import PriceContext, PriceEntry
1212
from .product import CanonicalProduct, CatalogProduct, ProductContext
13+
from .search import (
14+
IntentClassification,
15+
SearchEnrichedProduct,
16+
)
1317
from .truth import (
1418
AssetMetadata,
1519
AttributeSource,
@@ -22,12 +26,17 @@
2226
ExportResult,
2327
GapReport,
2428
GapReportTarget,
25-
IntentClassification,
29+
)
30+
from .truth import IntentClassification as LegacyIntentClassification
31+
from .truth import (
32+
ProductEnrichmentProposal,
2633
ProductStyle,
2734
ProductVariant,
2835
ProposedAttribute,
2936
Provenance,
30-
SearchEnrichedProduct,
37+
)
38+
from .truth import SearchEnrichedProduct as LegacySearchEnrichedProduct
39+
from .truth import (
3140
SharePolicy,
3241
SourceType,
3342
TruthAttribute,
@@ -79,11 +88,14 @@
7988
"GapReportTarget",
8089
"ProductStyle",
8190
"ProductVariant",
91+
"ProductEnrichmentProposal",
8292
"ProposedAttribute",
8393
"Provenance",
8494
"SearchEnrichedProduct",
95+
"LegacySearchEnrichedProduct",
8596
"SharePolicy",
8697
"SourceType",
8798
"TruthAttribute",
8899
"IntentClassification",
100+
"LegacyIntentClassification",
89101
]
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
"""Search and query-intelligence schemas.
2+
3+
Pydantic v2 models used by intelligent search and product enrichment flows.
4+
"""
5+
6+
from __future__ import annotations
7+
8+
from datetime import datetime, timezone
9+
from typing import Any, Literal, Optional
10+
11+
from pydantic import BaseModel, ConfigDict, Field
12+
13+
14+
class SearchEnrichedProduct(BaseModel):
15+
"""Enriched product document optimized for intelligent search."""
16+
17+
model_config = ConfigDict(populate_by_name=True)
18+
19+
id: str
20+
entity_id: str = Field(alias="entityId")
21+
sku: str
22+
name: str
23+
brand: str
24+
category: str
25+
description: Optional[str] = None
26+
price: Optional[float] = None
27+
use_cases: list[str] = Field(default_factory=list, alias="useCases")
28+
complementary_products: list[str] = Field(
29+
default_factory=list,
30+
alias="complementaryProducts",
31+
)
32+
substitute_products: list[str] = Field(default_factory=list, alias="substituteProducts")
33+
search_keywords: list[str] = Field(default_factory=list, alias="searchKeywords")
34+
enriched_description: Optional[str] = Field(None, alias="enrichedDescription")
35+
enriched_at: datetime = Field(
36+
default_factory=lambda: datetime.now(timezone.utc),
37+
alias="enrichedAt",
38+
)
39+
enrichment_model: Optional[str] = Field(None, alias="enrichmentModel")
40+
source_approval_version: int = Field(alias="sourceApprovalVersion")
41+
42+
43+
class IntentClassification(BaseModel):
44+
"""Structured query interpretation output for intelligent search."""
45+
46+
model_config = ConfigDict(populate_by_name=True)
47+
48+
query_type: Literal["simple", "complex"] = Field(alias="queryType")
49+
category: Optional[str] = None
50+
attributes: list[str] = Field(default_factory=list)
51+
use_case: Optional[str] = Field(None, alias="useCase")
52+
brand: Optional[str] = None
53+
price_range: tuple[float | None, float | None] = Field(
54+
default=(None, None),
55+
alias="priceRange",
56+
)
57+
filters: dict[str, Any] = Field(default_factory=dict)
58+
sub_queries: list[str] = Field(default_factory=list, alias="subQueries")
59+
confidence: float = Field(..., ge=0.0, le=1.0)

lib/src/holiday_peak_lib/schemas/truth.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,8 @@ class GapReportTarget(str, Enum):
7979
class SourceType(str, Enum):
8080
"""Origin category for enrichment/search contextual data."""
8181

82+
TEXT_ENRICHMENT = "text_enrichment"
83+
HYBRID = "hybrid"
8284
AI_REASONING = "ai_reasoning"
8385
PRODUCT_CONTEXT = "product_context"
8486
CATEGORY_INFERENCE = "category_inference"
@@ -260,6 +262,16 @@ class ProposedAttribute(BaseModel):
260262
reasoning: Optional[str] = None
261263

262264

265+
class ProductEnrichmentProposal(ProposedAttribute):
266+
"""Compatibility model name for enrichment proposals.
267+
268+
Keeps the existing ``ProposedAttribute`` payload contract while exposing a
269+
dedicated semantic type for enrichment/search pipelines.
270+
"""
271+
272+
model_config = ConfigDict(populate_by_name=True)
273+
274+
263275
class IntentClassification(BaseModel):
264276
"""Intent classification result for a search/enrichment request."""
265277

lib/tests/test_search_schemas.py

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
"""Unit tests for search-oriented schemas (search.py)."""
2+
3+
from datetime import datetime, timezone
4+
5+
import pytest
6+
from holiday_peak_lib.schemas.search import IntentClassification, SearchEnrichedProduct
7+
8+
9+
class TestSearchEnrichedProduct:
10+
"""Tests for SearchEnrichedProduct model."""
11+
12+
@pytest.mark.parametrize(
13+
"payload",
14+
[
15+
{
16+
"id": "SEP-1",
17+
"entityId": "STYLE-1",
18+
"sku": "SKU-1",
19+
"name": "Trail Shoe",
20+
"brand": "Contoso",
21+
"category": "footwear",
22+
"sourceApprovalVersion": 3,
23+
},
24+
{
25+
"id": "SEP-2",
26+
"entityId": "STYLE-2",
27+
"sku": "SKU-2",
28+
"name": "Running Shoe",
29+
"brand": "Fabrikam",
30+
"category": "footwear",
31+
"description": "Breathable upper",
32+
"price": 129.99,
33+
"useCases": ["running"],
34+
"complementaryProducts": ["SKU-SOCK-1"],
35+
"substituteProducts": ["SKU-ALT-1"],
36+
"searchKeywords": ["shoe", "breathable"],
37+
"enrichedDescription": "Lightweight breathable trail shoe.",
38+
"enrichedAt": datetime(2026, 3, 1, tzinfo=timezone.utc),
39+
"enrichmentModel": "gpt-4o-mini",
40+
"sourceApprovalVersion": 5,
41+
},
42+
],
43+
)
44+
def test_required_and_optional_fields(self, payload: dict):
45+
model = SearchEnrichedProduct(**payload)
46+
assert model.id.startswith("SEP-")
47+
assert model.entity_id.startswith("STYLE-")
48+
assert model.source_approval_version > 0
49+
50+
def test_json_roundtrip(self):
51+
model = SearchEnrichedProduct(
52+
id="SEP-3",
53+
entityId="STYLE-3",
54+
sku="SKU-3",
55+
name="Hiking Boot",
56+
brand="Contoso",
57+
category="boots",
58+
useCases=["hiking"],
59+
sourceApprovalVersion=7,
60+
)
61+
payload = model.model_dump_json(by_alias=True)
62+
restored = SearchEnrichedProduct.model_validate_json(payload)
63+
assert restored.entity_id == "STYLE-3"
64+
assert restored.use_cases == ["hiking"]
65+
66+
67+
class TestIntentClassification:
68+
"""Tests for IntentClassification model."""
69+
70+
@pytest.mark.parametrize("query_type", ["simple", "complex"])
71+
def test_required_and_optional_fields(self, query_type: str):
72+
model = IntentClassification(queryType=query_type, confidence=0.84)
73+
assert model.query_type == query_type
74+
assert model.attributes == []
75+
assert model.price_range == (None, None)
76+
77+
@pytest.mark.parametrize("confidence", [-0.01, 1.01])
78+
def test_confidence_bounds(self, confidence: float):
79+
with pytest.raises(Exception):
80+
IntentClassification(queryType="simple", confidence=confidence)
81+
82+
def test_json_roundtrip(self):
83+
model = IntentClassification(
84+
queryType="complex",
85+
category="footwear",
86+
attributes=["waterproof", "lightweight"],
87+
useCase="hiking",
88+
brand="Contoso",
89+
priceRange=(100.0, 200.0),
90+
filters={"size": "10"},
91+
subQueries=["waterproof hiking shoe", "lightweight hiking boot"],
92+
confidence=0.91,
93+
)
94+
payload = model.model_dump_json(by_alias=True)
95+
restored = IntentClassification.model_validate_json(payload)
96+
assert restored.query_type == "complex"
97+
assert restored.sub_queries[0] == "waterproof hiking shoe"

lib/tests/test_truth_schemas.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
GapReport,
1515
GapReportTarget,
1616
IntentClassification,
17+
ProductEnrichmentProposal,
1718
ProductStyle,
1819
ProductVariant,
1920
ProposedAttribute,
@@ -335,6 +336,49 @@ def test_json_roundtrip(self):
335336
assert restored.entities["brand"] == "Acme"
336337

337338

339+
class TestProductEnrichmentProposal:
340+
"""Tests for ProductEnrichmentProposal compatibility model."""
341+
342+
@pytest.mark.parametrize(
343+
"source_type",
344+
["text_enrichment", "image_analysis", "hybrid", "ai_reasoning"],
345+
)
346+
def test_compatible_payload_and_source_type_variants(self, source_type: str):
347+
model = ProductEnrichmentProposal(
348+
entityType="style",
349+
entityId="S1",
350+
attributeKey="material",
351+
value="leather",
352+
source="SYSTEM",
353+
confidence=0.81,
354+
modelRunId="run-1",
355+
sourceType=source_type,
356+
sourceAssets=["asset-1"],
357+
originalData={"name": "Original"},
358+
enrichedData={"name": "Enriched"},
359+
reasoning="Context and evidence support the proposal.",
360+
)
361+
assert model.source_type is not None
362+
assert model.source_type.value == source_type
363+
assert model.source_assets == ["asset-1"]
364+
365+
def test_roundtrip_json(self):
366+
model = ProductEnrichmentProposal(
367+
entityType="style",
368+
entityId="S2",
369+
attributeKey="title",
370+
value="Trail Shoe",
371+
source="PIM",
372+
confidence=0.77,
373+
modelRunId="run-2",
374+
sourceType="text_enrichment",
375+
)
376+
payload = model.model_dump_json(by_alias=True)
377+
restored = ProductEnrichmentProposal.model_validate_json(payload)
378+
assert restored.model_run_id == "run-2"
379+
assert restored.source_type == SourceType.TEXT_ENRICHMENT
380+
381+
338382
class TestSearchEnrichedProduct:
339383
"""Tests for SearchEnrichedProduct model."""
340384

@@ -527,6 +571,7 @@ def test_package_exports(self):
527571
for name in [
528572
"ProductStyle",
529573
"ProductVariant",
574+
"ProductEnrichmentProposal",
530575
"TruthAttribute",
531576
"ProposedAttribute",
532577
"GapReport",

0 commit comments

Comments
 (0)