Skip to content

Commit 43cc9d3

Browse files
normalize camel from api
1 parent 61ade28 commit 43cc9d3

File tree

3 files changed

+74
-13
lines changed

3 files changed

+74
-13
lines changed

stagehand/handlers/extract_handler.py

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,11 @@
1313
ExtractOptions,
1414
ExtractResult,
1515
)
16-
from stagehand.utils import inject_urls, transform_url_strings_to_ids
16+
from stagehand.utils import (
17+
inject_urls,
18+
transform_url_strings_to_ids,
19+
convert_dict_keys_to_snake_case,
20+
)
1721

1822
T = TypeVar("T", bound=BaseModel)
1923

@@ -147,16 +151,22 @@ async def extract(
147151

148152
processed_data_payload = raw_data_dict # Default to the raw dictionary
149153

150-
if schema and isinstance(
151-
raw_data_dict, dict
152-
): # schema is the Pydantic model type
154+
if schema and isinstance(raw_data_dict, dict): # schema is the Pydantic model type
155+
# Try direct validation first
153156
try:
154157
validated_model_instance = schema.model_validate(raw_data_dict)
155-
processed_data_payload = validated_model_instance # Payload is now the Pydantic model instance
156-
except Exception as e:
157-
self.logger.error(
158-
f"Failed to validate extracted data against schema {schema.__name__}: {e}. Keeping raw data dict in .data field."
159-
)
158+
processed_data_payload = validated_model_instance
159+
except Exception as first_error:
160+
# Fallback: attempt camelCase→snake_case key normalization, then re-validate
161+
try:
162+
normalized = convert_dict_keys_to_snake_case(raw_data_dict)
163+
validated_model_instance = schema.model_validate(normalized)
164+
processed_data_payload = validated_model_instance
165+
except Exception as second_error:
166+
self.logger.error(
167+
f"Failed to validate extracted data against schema {schema.__name__}: {first_error}. "
168+
f"Normalization retry also failed: {second_error}. Keeping raw data dict in .data field."
169+
)
160170

161171
# Create ExtractResult object
162172
result = ExtractResult(

stagehand/page.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
ObserveResult,
2020
)
2121
from .types import DefaultExtractSchema, EmptyExtractSchema
22+
from .utils import convert_dict_keys_to_snake_case
2223

2324
_INJECTION_SCRIPT = None
2425

@@ -412,10 +413,20 @@ async def extract(
412413
processed_data_payload
413414
)
414415
processed_data_payload = validated_model
415-
except Exception as e:
416-
self._stagehand.logger.error(
417-
f"Failed to validate extracted data against schema {schema_to_validate_with.__name__}: {e}. Keeping raw data dict in .data field."
418-
)
416+
except Exception as first_error:
417+
# Fallback: normalize keys to snake_case and try once more
418+
try:
419+
normalized = convert_dict_keys_to_snake_case(processed_data_payload)
420+
if not options_obj:
421+
validated_model = EmptyExtractSchema.model_validate(normalized)
422+
else:
423+
validated_model = schema_to_validate_with.model_validate(normalized)
424+
processed_data_payload = validated_model
425+
except Exception as second_error:
426+
self._stagehand.logger.error(
427+
f"Failed to validate extracted data against schema {getattr(schema_to_validate_with, '__name__', str(schema_to_validate_with))}: {first_error}. "
428+
f"Normalization retry also failed: {second_error}. Keeping raw data dict in .data field."
429+
)
419430
return ExtractResult(data=processed_data_payload).data
420431
# Handle unexpected return types
421432
self._stagehand.logger.info(

stagehand/utils.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,46 @@ def convert_dict_keys_to_camel_case(data: dict[str, Any]) -> dict[str, Any]:
5555
return result
5656

5757

58+
def camel_to_snake(camel_str: str) -> str:
59+
"""
60+
Convert a camelCase or PascalCase string to snake_case.
61+
62+
Args:
63+
camel_str: The camelCase/PascalCase string to convert
64+
65+
Returns:
66+
The converted snake_case string
67+
"""
68+
result_chars = []
69+
for index, char in enumerate(camel_str):
70+
if char.isupper() and index != 0 and (not camel_str[index - 1].isupper()):
71+
result_chars.append("_")
72+
result_chars.append(char.lower())
73+
return "".join(result_chars)
74+
75+
76+
def convert_dict_keys_to_snake_case(data: Any) -> Any:
77+
"""
78+
Convert all dictionary keys from camelCase/PascalCase to snake_case.
79+
Works recursively for nested dictionaries and lists. Non-dict/list inputs are returned as-is.
80+
81+
Args:
82+
data: Potentially nested structure with dictionaries/lists
83+
84+
Returns:
85+
A new structure with all dict keys converted to snake_case
86+
"""
87+
if isinstance(data, dict):
88+
converted: dict[str, Any] = {}
89+
for key, value in data.items():
90+
converted_key = camel_to_snake(key) if isinstance(key, str) else key
91+
converted[converted_key] = convert_dict_keys_to_snake_case(value)
92+
return converted
93+
if isinstance(data, list):
94+
return [convert_dict_keys_to_snake_case(item) for item in data]
95+
return data
96+
97+
5898
def format_simplified_tree(node: AccessibilityNode, level: int = 0) -> str:
5999
"""Formats a node and its children into a simplified string representation."""
60100
indent = " " * level

0 commit comments

Comments
 (0)