2
2
3
3
import pytest
4
4
import pytest_asyncio
5
+ from pydantic import BaseModel , Field
5
6
6
7
from stagehand import Stagehand , StagehandConfig
8
+ from stagehand .schemas import ExtractOptions
7
9
8
10
9
11
skip_if_no_creds = pytest .mark .skipif (
12
14
)
13
15
14
16
17
+ class Article (BaseModel ):
18
+ """Schema for article extraction tests"""
19
+ title : str = Field (..., description = "The title of the article" )
20
+ summary : str = Field (None , description = "A brief summary or description of the article" )
21
+
22
+
15
23
@pytest_asyncio .fixture (scope = "module" )
16
24
@skip_if_no_creds
17
25
async def stagehand_api ():
@@ -35,4 +43,48 @@ async def stagehand_api():
35
43
@pytest .mark .asyncio
36
44
async def test_stagehand_api_initialization (stagehand_api ):
37
45
"""Ensure that Stagehand initializes correctly against the Browserbase API."""
38
- assert stagehand_api .session_id is not None
46
+ assert stagehand_api .session_id is not None
47
+
48
+
49
+ @skip_if_no_creds
50
+ @pytest .mark .integration
51
+ @pytest .mark .api
52
+ @pytest .mark .asyncio
53
+ async def test_api_extract_functionality (stagehand_api ):
54
+ """Test core extract functionality in API mode - extracted from e2e tests."""
55
+ stagehand = stagehand_api
56
+
57
+ # Navigate to a content-rich page
58
+ await stagehand .page .goto ("https://news.ycombinator.com" )
59
+
60
+ # Test simple text-based extraction
61
+ titles_text = await stagehand .page .extract (
62
+ "Extract the titles of the first 3 articles on the page as a JSON array"
63
+ )
64
+
65
+ # Verify extraction worked
66
+ assert titles_text is not None
67
+
68
+ # Test schema-based extraction
69
+ extract_options = ExtractOptions (
70
+ instruction = "Extract the first article's title and any available summary" ,
71
+ schema_definition = Article
72
+ )
73
+
74
+ article_data = await stagehand .page .extract (extract_options )
75
+ assert article_data is not None
76
+
77
+ # Validate the extracted data structure (Browserbase format)
78
+ if hasattr (article_data , 'data' ) and article_data .data :
79
+ # BROWSERBASE mode format
80
+ article = Article .model_validate (article_data .data )
81
+ assert article .title
82
+ assert len (article .title ) > 0
83
+ elif hasattr (article_data , 'title' ):
84
+ # Fallback format
85
+ article = Article .model_validate (article_data .model_dump ())
86
+ assert article .title
87
+ assert len (article .title ) > 0
88
+
89
+ # Verify API session is active
90
+ assert stagehand .session_id is not None
0 commit comments