Skip to content

Commit 65952ce

Browse files
fix test
1 parent 43cc9d3 commit 65952ce

File tree

1 file changed

+124
-0
lines changed

1 file changed

+124
-0
lines changed
Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
"""
2+
E2E tests to ensure extract returns validate into snake_case Pydantic schemas
3+
for both LOCAL and BROWSERBASE environments, covering API responses that may
4+
use camelCase keys.
5+
"""
6+
7+
import os
8+
import pytest
9+
import pytest_asyncio
10+
from urllib.parse import urlparse
11+
from pydantic import BaseModel, Field, HttpUrl
12+
13+
from stagehand import Stagehand, StagehandConfig
14+
from stagehand.schemas import ExtractOptions
15+
16+
17+
class Company(BaseModel):
18+
company_name: str = Field(..., description="The name of the company")
19+
company_url: HttpUrl = Field(..., description="The URL of the company website or relevant page")
20+
21+
22+
class Companies(BaseModel):
23+
companies: list[Company] = Field(..., description="List of companies extracted from the page")
24+
25+
26+
@pytest.fixture(scope="class")
27+
def local_config():
28+
return StagehandConfig(
29+
env="LOCAL",
30+
model_name="gpt-4o-mini",
31+
headless=True,
32+
verbose=1,
33+
dom_settle_timeout_ms=2000,
34+
model_client_options={
35+
"apiKey": os.getenv("MODEL_API_KEY") or os.getenv("OPENAI_API_KEY")
36+
},
37+
)
38+
39+
40+
@pytest.fixture(scope="class")
41+
def browserbase_config():
42+
return StagehandConfig(
43+
env="BROWSERBASE",
44+
api_key=os.getenv("BROWSERBASE_API_KEY"),
45+
project_id=os.getenv("BROWSERBASE_PROJECT_ID"),
46+
model_name="gpt-4o",
47+
headless=False,
48+
verbose=2,
49+
dom_settle_timeout_ms=3000,
50+
model_client_options={
51+
"apiKey": os.getenv("MODEL_API_KEY") or os.getenv("OPENAI_API_KEY")
52+
},
53+
)
54+
55+
56+
@pytest_asyncio.fixture
57+
async def local_stagehand(local_config):
58+
stagehand = Stagehand(config=local_config)
59+
await stagehand.init()
60+
yield stagehand
61+
await stagehand.close()
62+
63+
64+
@pytest_asyncio.fixture
65+
async def browserbase_stagehand(browserbase_config):
66+
if not (os.getenv("BROWSERBASE_API_KEY") and os.getenv("BROWSERBASE_PROJECT_ID")):
67+
pytest.skip("Browserbase credentials not available")
68+
stagehand = Stagehand(config=browserbase_config)
69+
await stagehand.init()
70+
yield stagehand
71+
await stagehand.close()
72+
73+
74+
@pytest.mark.asyncio
75+
@pytest.mark.local
76+
async def test_extract_companies_casing_local(local_stagehand):
77+
stagehand = local_stagehand
78+
# Use stable eval site for consistency
79+
await stagehand.page.goto("https://browserbase.github.io/stagehand-eval-sites/sites/aigrant/")
80+
81+
extract_options = ExtractOptions(
82+
instruction="Extract the names and URLs of up to 5 companies in batch 3",
83+
schema_definition=Companies,
84+
)
85+
86+
result = await stagehand.page.extract(extract_options)
87+
88+
# Should be validated into our snake_case Pydantic model
89+
assert isinstance(result, Companies)
90+
assert 0 < len(result.companies) <= 5
91+
for c in result.companies:
92+
assert isinstance(c.company_name, str) and c.company_name
93+
# Avoid isinstance checks with Pydantic's Annotated types; validate via parsing
94+
parsed = urlparse(str(c.company_url))
95+
assert parsed.scheme in ("http", "https") and bool(parsed.netloc)
96+
97+
98+
@pytest.mark.asyncio
99+
@pytest.mark.api
100+
@pytest.mark.skipif(
101+
not (os.getenv("BROWSERBASE_API_KEY") and os.getenv("BROWSERBASE_PROJECT_ID")),
102+
reason="Browserbase credentials not available",
103+
)
104+
async def test_extract_companies_casing_browserbase(browserbase_stagehand):
105+
stagehand = browserbase_stagehand
106+
# Use stable eval site for consistency
107+
await stagehand.page.goto("https://browserbase.github.io/stagehand-eval-sites/sites/aigrant/")
108+
109+
extract_options = ExtractOptions(
110+
instruction="Extract the names and URLs of up to 5 companies in batch 3",
111+
schema_definition=Companies,
112+
)
113+
114+
result = await stagehand.page.extract(extract_options)
115+
116+
# Should be validated into our snake_case Pydantic model even if API returns camelCase
117+
assert isinstance(result, Companies)
118+
assert 0 < len(result.companies) <= 5
119+
for c in result.companies:
120+
assert isinstance(c.company_name, str) and c.company_name
121+
parsed = urlparse(str(c.company_url))
122+
assert parsed.scheme in ("http", "https") and bool(parsed.netloc)
123+
124+

0 commit comments

Comments
 (0)