updates for BB-820: first crack

miguelg719 · miguelg719 · commit 3e242f772769 · 2025-02-26T14:13:23.000-08:00
diff --git a/examples/example.py b/examples/example.py
@@ -4,7 +4,8 @@
 from dotenv import load_dotenv
 from stagehand.client import Stagehand
 from stagehand.config import StagehandConfig
-from stagehand.schemas import ActOptions, ObserveOptions
+from pydantic import BaseModel
+from stagehand.schemas import ExtractOptions
 
 load_dotenv()
 
@@ -15,14 +16,16 @@
     datefmt='%Y-%m-%d %H:%M:%S'
 )
 
+class ExtractSchema(BaseModel):
+    stars: int
+
 async def main():
     try:
         # Build a unified configuration object for Stagehand
         config = StagehandConfig(
-            env="BROWSERBASE" if os.getenv("BROWSERBASE_API_KEY") and os.getenv("BROWSERBASE_PROJECT_ID") else "LOCAL",
+            env="BROWSERBASE",
             api_key=os.getenv("BROWSERBASE_API_KEY"),
             project_id=os.getenv("BROWSERBASE_PROJECT_ID"),
-            debug_dom=True,
             headless=False,
             dom_settle_timeout_ms=3000,
             model_name="gpt-4o-mini",
@@ -34,38 +37,67 @@ async def main():
 
         # Initialize - this creates a new session automatically.
         await stagehand.init()
+        page = stagehand.page
         print(f"Created new session with ID: {stagehand.session_id}")
 
         print('EXAMPLE: You can navigate to any website using the local or remote Playwright.')
 
-        await stagehand.page.goto("https://news.ycombinator.com/")
+        await page.goto("https://news.ycombinator.com/")
         print("Navigation complete with local Playwright.")
 
-        await stagehand.page.navigate("https://www.google.com")
+        await page.navigate("https://www.google.com")
         print("Navigation complete with remote Playwright.")
 
         print("EXAMPLE: Clicking on About link using local Playwright's get_by_role")
         # Click on the "About" link using Playwright
-        await stagehand.page.get_by_role("link", name="About", exact=True).click()
+        await page.get_by_role("link", name="About", exact=True).click()
         print("Clicked on About link")
 
         await asyncio.sleep(2)
-        await stagehand.page.navigate("https://www.google.com")
+        await page.navigate("https://www.google.com")
         
         # Hosted Stagehand API - ACT to do something like 'search for openai'
-        await stagehand.page.act(ActOptions(action="search for openai"))
+        print(f"EXAMPLE: Performing action")
+        await page.act("search for openai")
         
-        print("EXAMPLE: Find the XPATH of the button 'News' using Stagehand API")
-        xpaths = await stagehand.page.observe(ObserveOptions(instruction="find the button labeled 'News'", only_visible=True))
-        if len(xpaths) > 0:
-            element = xpaths[0]
-            print("EXAMPLE: Click on the button 'News' using local Playwright.")
-            await stagehand.page.click(element["selector"])
+        # print("EXAMPLE: Find the XPATH of the button 'News' using Stagehand API")
+        observed = await page.observe("find the news button on the page")
+        if len(observed) > 0:
+            element = observed[0]
+            # print("EXAMPLE: Click on the button 'News' using local Playwright.")
+            await page.act(element)
         else:
             print("No element found")
 
     except Exception as e:
         print(f"An error occurred in the example: {e}")
+    finally:
+        await stagehand.close()
+
+    new_stagehand = Stagehand(config=config, server_url=os.getenv("STAGEHAND_SERVER_URL"), verbose=2)
+    # page = new_stagehand.page
+    await new_stagehand.init()
+    page = new_stagehand.page
+    print(f"Created new session with ID: {new_stagehand.session_id}")
+
+    try:
+        await page.navigate("https://github.com/facebook/react")
+        print("Navigation complete.")
+
+        # Use the ExtractOptions Pydantic model to pass instruction and schema definition
+        data = await page.extract("Extract the number of stars for the project")
+        data = await page.extract(
+            ExtractOptions(
+                instruction="Extract the number of stars for the project",
+                schemaDefinition=ExtractSchema.model_json_schema()
+            )
+        )
+        print("\nExtracted stars:", data)
+
+    except Exception as e:
+        print(f"Error: {e}")
+    finally:
+        await new_stagehand.close()
 
 if __name__ == "__main__":
     asyncio.run(main())
diff --git a/examples/extract-example.py b/examples/extract-example.py
@@ -36,13 +36,14 @@ async def main():
         print("Navigation complete.")
 
         # Use the ExtractOptions Pydantic model to pass instruction and schema definition
+        print(ExtractSchema.model_json_schema())
         data = await stagehand.page.extract(
             ExtractOptions(
                 instruction="Extract the number of stars for the project",
                 schemaDefinition=ExtractSchema.model_json_schema()
             )
         )
-        print("\nExtracted stars:", data)
+        print("\nExtracted stars:", data["stars"])
 
     except Exception as e:
         print(f"Error: {e}")
diff --git a/examples/observe-example.py b/examples/observe-example.py
@@ -11,13 +11,12 @@
 async def main():
     # Build a unified Stagehand configuration object
     config = StagehandConfig(
-        env="BROWSERBASE" if os.getenv("BROWSERBASE_API_KEY") and os.getenv("BROWSERBASE_PROJECT_ID") else "LOCAL",
+        env="BROWSERBASE",
         api_key=os.getenv("BROWSERBASE_API_KEY"),
         project_id=os.getenv("BROWSERBASE_PROJECT_ID"),
         debug_dom=True,
         headless=True,
         model_name="gpt-4o-mini",
-        model_client_options={"apiKey": os.getenv("MODEL_API_KEY")}
     )
 
     # Create a Stagehand client using the configuration object.
@@ -35,7 +34,6 @@ async def main():
         # Use ObserveOptions for detailed instructions
         options = ObserveOptions(
             instruction="find all the links on the page regarding the city of el paso",
-            only_visible=True
         )
         activity = await stagehand.page.observe(options)
         print("\nObservations:", activity)
diff --git a/setup.py b/setup.py
@@ -5,7 +5,7 @@
 
 setup(
     name="stagehand-py",
-    version="0.2.1",
+    version="0.3.0",
     author="Browserbase, Inc.",
     author_email="support@browserbase.io",
     description="Python SDK for Stagehand",
diff --git a/stagehand/client.py b/stagehand/client.py
@@ -65,9 +65,7 @@ def __init__(
         if config:
             self.browserbase_api_key = config.api_key or browserbase_api_key or os.getenv("BROWSERBASE_API_KEY")
             self.browserbase_project_id = config.project_id or browserbase_project_id or os.getenv("BROWSERBASE_PROJECT_ID")
-            self.model_api_key = model_api_key or (
-                config.model_client_options.get("apiKey") if config.model_client_options else None
-            ) or os.getenv("MODEL_API_KEY")
+            self.model_api_key = os.getenv("MODEL_API_KEY")
             self.session_id = config.browserbase_session_id or session_id
             self.model_name = config.model_name or model_name
             self.dom_settle_timeout_ms = config.dom_settle_timeout_ms or dom_settle_timeout_ms
@@ -200,20 +198,38 @@ async def close(self):
             return
 
         self._log("Closing resources...", level=1)
-        if self._playwright_page:
-            self._log("Closing the Playwright page...", level=1)
-            await self._playwright_page.close()
-            self._playwright_page = None
+        
+        # End the session on the server if we have a session ID
+        if self.session_id:
+            try:
+                self._log(f"Ending session {self.session_id} on the server...", level=1)
+                client = self.httpx_client or httpx.AsyncClient(timeout=self.timeout_settings)
+                headers = {
+                    "x-bb-api-key": self.browserbase_api_key,
+                    "x-bb-project-id": self.browserbase_project_id,
+                    "Content-Type": "application/json",
+                }
+                
+                async with client:
+                    await self._execute("end", {"sessionId": self.session_id})
+                    self._log(f"Session {self.session_id} ended successfully", level=1)
+            except Exception as e:
+                self._log(f"Error ending session: {str(e)}", level=2)
+        
+        # if self._playwright_page:
+        #     self._log("Closing the Playwright page...", level=1)
+        #     await self._playwright_page.close()
+        #     self._playwright_page = None
 
-        if self._context:
-            self._log("Closing the context...", level=1)
-            await self._context.close()
-            self._context = None
+        # if self._context:
+        #     self._log("Closing the context...", level=1)
+        #     await self._context.close()
+        #     self._context = None
 
-        if self._browser:
-            self._log("Closing the browser...", level=1)
-            await self._browser.close()
-            self._browser = None
+        # if self._browser:
+        #     self._log("Closing the browser...", level=1)
+        #     await self._browser.close()
+        #     self._browser = None
 
         if self._playwright:
             self._log("Stopping Playwright...", level=1)
@@ -315,56 +331,76 @@ async def _execute(self, method: str, payload: Dict[str, Any]) -> Any:
             headers["x-model-api-key"] = self.model_api_key
 
         client = self.httpx_client or httpx.AsyncClient(timeout=self.timeout_settings)
-        print(f"Executing {method} with payload: {payload} and headers: {headers}")
+        print(f"\n==== EXECUTING {method.upper()} ====")
+        print(f"URL: {self.server_url}/sessions/{self.session_id}/{method}")
+        print(f"Payload: {payload}")
+        print(f"Headers: {headers}")
+        
         async with client:
-            async with client.stream(
-                "POST", 
-                f"{self.server_url}/sessions/{self.session_id}/{method}",
-                json=payload,
-                headers=headers,
-            ) as response:
-                if response.status_code != 200:
-                    error_text = await response.aread()
-                    self._log(f"Error: {error_text.decode('utf-8')}", level=2)
-                    return None
-
-                async for line in response.aiter_lines():
-                    # Skip empty lines
-                    if not line.strip():
-                        continue
-
-                    try:
-                        # Handle SSE-style messages that start with "data: "
-                        if line.startswith("data: "):
-                            line = line[len("data: "):]
-                        
-                        message = json.loads(line)
-                        logger.info(f"Message: {message}")
-                        
-                        # Handle different message types
-                        msg_type = message.get("type")
-                        
-                        if msg_type == "system":
-                            status = message.get("data", {}).get("status")
-                            if status == "finished":
-                                return message.get("data", {}).get("result")
-                        elif msg_type == "log":
-                            # Log message from data.message
-                            log_msg = message.get("data", {}).get("message", "")
-                            self._log(log_msg, level=1)
-                            if self.on_log:
-                                await self.on_log(message)
-                        else:
-                            # Log any other message types
-                            self._log(f"Unknown message type: {msg_type}", level=2)
-                            if self.on_log:
-                                await self.on_log(message)
-
-                    except json.JSONDecodeError:
-                        self._log(f"Could not parse line as JSON: {line}", level=2)
-                        continue
+            try:
+                async with client.stream(
+                    "POST", 
+                    f"{self.server_url}/sessions/{self.session_id}/{method}",
+                    json=payload,
+                    headers=headers,
+                ) as response:
+                    print(f"Response status: {response.status_code}")
+                    
+                    if response.status_code != 200:
+                        error_text = await response.aread()
+                        error_message = error_text.decode('utf-8')
+                        print(f"ERROR RESPONSE: {error_message}")
+                        self._log(f"Error: {error_message}", level=2)
+                        return None
+
+                    print("Starting to process streaming response...")
+                    async for line in response.aiter_lines():
+                        # Skip empty lines
+                        if not line.strip():
+                            continue
+
+                        try:
+                            # Handle SSE-style messages that start with "data: "
+                            if line.startswith("data: "):
+                                line = line[len("data: "):]
+                            
+                            message = json.loads(line)
+                            print(f"RAW MESSAGE: {message}")
+                            
+                            # Handle different message types
+                            msg_type = message.get("type")
+                            
+                            if msg_type == "system":
+                                status = message.get("data", {}).get("status")
+                                if status == "finished":
+                                    result = message.get("data", {}).get("result")
+                                    print(f"FINISHED WITH RESULT: {result}")
+                                    print(f"==== {method.upper()} COMPLETE ====\n")
+                                    return result
+                            elif msg_type == "log":
+                                # Log message from data.message
+                                log_msg = message.get("data", {}).get("message", "")
+                                print(f"LOG MESSAGE: {log_msg}")
+                                self._log(log_msg, level=1)
+                                if self.on_log:
+                                    await self.on_log(message)
+                            else:
+                                # Log any other message types
+                                print(f"UNKNOWN MESSAGE TYPE: {msg_type}")
+                                self._log(f"Unknown message type: {msg_type}", level=2)
+                                if self.on_log:
+                                    await self.on_log(message)
+
+                        except json.JSONDecodeError:
+                            print(f"JSON DECODE ERROR on line: {line}")
+                            self._log(f"Could not parse line as JSON: {line}", level=2)
+                            continue
+            except Exception as e:
+                print(f"EXCEPTION IN _EXECUTE: {str(e)}")
+                raise
 
         # If we get here without seeing a "finished" message, something went wrong
+        print("==== ERROR: No 'finished' message received ====")
         raise RuntimeError("Server connection closed without sending 'finished' message")
 
     async def _handle_log(self, msg: Dict[str, Any]):
diff --git a/stagehand/config.py b/stagehand/config.py
@@ -1,12 +1,13 @@
 from pydantic import BaseModel, Field
-from typing import Optional, Dict, Callable, Any
+from typing import Optional, Callable, Any
+from stagehand.schemas import AvailableModel
 
 class StagehandConfig(BaseModel):
     """
     Configuration for the Stagehand client.
 
     Attributes:
-        env (str): Environment type. Use 'BROWSERBASE' for remote usage or 'LOCAL' otherwise.
+        env (str): Environment type. 'BROWSERBASE' for remote usage
         api_key (Optional[str]): API key for authentication.
         project_id (Optional[str]): Project identifier.
         debug_dom (bool): Enable DOM debugging features.
@@ -16,19 +17,19 @@ class StagehandConfig(BaseModel):
         enable_caching (Optional[bool]): Enable caching functionality.
         browserbase_session_id (Optional[str]): Session ID for resuming Browserbase sessions.
         model_name (Optional[str]): Name of the model to use.
-        model_client_options (Optional[Dict[str, Any]]): Configuration options for the model client.
+        selfHeal (Optional[bool]): Enable self-healing functionality.
     """
-    env: str = Field("LOCAL", description="Environment type, e.g., 'BROWSERBASE' for remote or 'LOCAL' for local")
-    api_key: Optional[str] = Field(None, alias="apiKey", description="API key for authentication")
-    project_id: Optional[str] = Field(None, alias="projectId", description="Project identifier")
+    env: str = "BROWSERBASE"
+    api_key: Optional[str] = Field(None, alias="apiKey", description="Browserbase API key for authentication")
+    project_id: Optional[str] = Field(None, alias="projectId", description="Browserbase project ID")
     debug_dom: bool = Field(False, alias="debugDom", description="Enable DOM debugging features")
     headless: bool = Field(True, description="Run browser in headless mode")
     logger: Optional[Callable[[Any], None]] = Field(None, description="Custom logging function")
     dom_settle_timeout_ms: Optional[int] = Field(3000, alias="domSettleTimeoutMs", description="Timeout for DOM to settle (in ms)")
     enable_caching: Optional[bool] = Field(False, alias="enableCaching", description="Enable caching functionality")
     browserbase_session_id: Optional[str] = Field(None, alias="browserbaseSessionID", description="Session ID for resuming Browserbase sessions")
-    model_name: Optional[str] = Field(None, alias="modelName", description="Name of the model to use")
-    model_client_options: Optional[Dict[str, Any]] = Field(default_factory=dict, alias="modelClientOptions", description="Options for the model client")
+    model_name: Optional[str] = Field(AvailableModel.GPT_4O, alias="modelName", description="Name of the model to use")
+    selfHeal: Optional[bool] = Field(True, description="Enable self-healing functionality")
 
     class Config:
         populate_by_name = True 
diff --git a/stagehand/page.py b/stagehand/page.py
diff --git a/stagehand/schemas.py b/stagehand/schemas.py

Original file line number	Diff line number	Diff line change
`@@ -36,13 +36,14 @@ async def main():`
`36`	`36`	`print("Navigation complete.")`
`37`	`37`
`38`	`38`	`# Use the ExtractOptions Pydantic model to pass instruction and schema definition`
	`39`	`+ print(ExtractSchema.model_json_schema())`
`39`	`40`	`data = await stagehand.page.extract(`
`40`	`41`	`ExtractOptions(`
`41`	`42`	`instruction="Extract the number of stars for the project",`
`42`	`43`	`schemaDefinition=ExtractSchema.model_json_schema()`
`43`	`44`	`)`
`44`	`45`	`)`
`45`		`- print("\nExtracted stars:", data)`
	`46`	`+ print("\nExtracted stars:", data["stars"])`
`46`	`47`
`47`	`48`	`except Exception as e:`
`48`	`49`	`print(f"Error: {e}")`