Skip to content

Commit c2cbd35

Browse files
[dev] [Marfuen] mariano/agent-screenshots (#1903)
* feat(browserbase): add browser automation features and context management * refactor(api): add evaluation status and reason to browser automation runs * chore(dependencies): update package versions in bun.lock * chore(hooks): handle session management and cleanup in useBrowserExecution * chore(api): handle stagehand closure on navigation error * chore(test-browserbase): remove TestBrowserbasePage and TestBrowserbaseClient components * refactor(browser-connection): improve session management and cleanup on errors * chore(browserbase): implement context creation with pending state handling * refactor(browser-automations): simplify next scheduled run calculation * refactor(browserbase): increase maxSteps from 10 to 20 in execution * chore(browser-automations): implement browser automation configuration and management --------- Co-authored-by: Mariano Fuentes <[email protected]>
1 parent 407ed39 commit c2cbd35

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

49 files changed

+5070
-148
lines changed

apps/api/package.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
"@aws-sdk/client-securityhub": "^3.948.0",
1212
"@aws-sdk/client-sts": "^3.948.0",
1313
"@aws-sdk/s3-request-presigner": "^3.859.0",
14+
"@browserbasehq/sdk": "^2.6.0",
15+
"@browserbasehq/stagehand": "^3.0.5",
1416
"@comp/integration-platform": "workspace:*",
1517
"@nestjs/common": "^11.0.1",
1618
"@nestjs/config": "^4.0.2",
@@ -41,6 +43,7 @@
4143
"mammoth": "^1.8.0",
4244
"nanoid": "^5.1.6",
4345
"pdf-lib": "^1.17.1",
46+
"playwright-core": "^1.57.0",
4447
"prisma": "^6.13.0",
4548
"react": "^19.1.1",
4649
"react-dom": "^19.1.0",

apps/api/src/app.module.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import { KnowledgeBaseModule } from './knowledge-base/knowledge-base.module';
2727
import { SOAModule } from './soa/soa.module';
2828
import { IntegrationPlatformModule } from './integration-platform/integration-platform.module';
2929
import { CloudSecurityModule } from './cloud-security/cloud-security.module';
30+
import { BrowserbaseModule } from './browserbase/browserbase.module';
3031

3132
@Module({
3233
imports: [
@@ -66,6 +67,7 @@ import { CloudSecurityModule } from './cloud-security/cloud-security.module';
6667
SOAModule,
6768
IntegrationPlatformModule,
6869
CloudSecurityModule,
70+
BrowserbaseModule,
6971
],
7072
controllers: [AppController],
7173
providers: [
Lines changed: 362 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,362 @@
1+
import {
2+
Body,
3+
Controller,
4+
Delete,
5+
Get,
6+
Param,
7+
Patch,
8+
Post,
9+
UseGuards,
10+
} from '@nestjs/common';
11+
import {
12+
ApiHeader,
13+
ApiOperation,
14+
ApiParam,
15+
ApiResponse,
16+
ApiSecurity,
17+
ApiTags,
18+
} from '@nestjs/swagger';
19+
import { OrganizationId } from '../auth/auth-context.decorator';
20+
import { HybridAuthGuard } from '../auth/hybrid-auth.guard';
21+
import { BrowserbaseService } from './browserbase.service';
22+
import {
23+
AuthStatusResponseDto,
24+
BrowserAutomationResponseDto,
25+
BrowserAutomationRunResponseDto,
26+
CheckAuthDto,
27+
CloseSessionDto,
28+
ContextResponseDto,
29+
CreateBrowserAutomationDto,
30+
CreateSessionDto,
31+
NavigateToUrlDto,
32+
RunAutomationResponseDto,
33+
SessionResponseDto,
34+
UpdateBrowserAutomationDto,
35+
} from './dto/browserbase.dto';
36+
37+
@ApiTags('Browserbase')
38+
@Controller({ path: 'browserbase', version: '1' })
39+
@UseGuards(HybridAuthGuard)
40+
@ApiSecurity('apikey')
41+
@ApiHeader({
42+
name: 'X-Organization-Id',
43+
description: 'Organization ID (required for session auth)',
44+
required: true,
45+
})
46+
export class BrowserbaseController {
47+
constructor(private readonly browserbaseService: BrowserbaseService) {}
48+
49+
// ===== Organization Context =====
50+
51+
@Post('org-context')
52+
@ApiOperation({
53+
summary: 'Get or create organization browser context',
54+
description:
55+
'Gets the existing browser context for the org or creates a new one',
56+
})
57+
@ApiResponse({
58+
status: 201,
59+
description: 'Context retrieved or created',
60+
type: ContextResponseDto,
61+
})
62+
async getOrCreateOrgContext(
63+
@OrganizationId() organizationId: string,
64+
): Promise<ContextResponseDto> {
65+
return await this.browserbaseService.getOrCreateOrgContext(organizationId);
66+
}
67+
68+
@Get('org-context')
69+
@ApiOperation({
70+
summary: 'Get organization browser context status',
71+
description: 'Gets the current browser context for the org if it exists',
72+
})
73+
@ApiResponse({
74+
status: 200,
75+
description: 'Context status',
76+
})
77+
async getOrgContextStatus(
78+
@OrganizationId() organizationId: string,
79+
): Promise<{ hasContext: boolean; contextId?: string }> {
80+
const context = await this.browserbaseService.getOrgContext(organizationId);
81+
return {
82+
hasContext: !!context,
83+
contextId: context?.contextId,
84+
};
85+
}
86+
87+
// ===== Session Management =====
88+
89+
@Post('session')
90+
@ApiOperation({
91+
summary: 'Create a new browser session',
92+
description: 'Creates a new browser session using the org context',
93+
})
94+
@ApiResponse({
95+
status: 201,
96+
description: 'Session created',
97+
type: SessionResponseDto,
98+
})
99+
async createSession(
100+
@Body() dto: CreateSessionDto,
101+
): Promise<SessionResponseDto> {
102+
return await this.browserbaseService.createSessionWithContext(
103+
dto.contextId,
104+
);
105+
}
106+
107+
@Post('session/close')
108+
@ApiOperation({
109+
summary: 'Close a browser session',
110+
})
111+
@ApiResponse({
112+
status: 200,
113+
description: 'Session closed',
114+
})
115+
async closeSession(
116+
@Body() dto: CloseSessionDto,
117+
): Promise<{ success: boolean }> {
118+
await this.browserbaseService.closeSession(dto.sessionId);
119+
return { success: true };
120+
}
121+
122+
// ===== Browser Navigation =====
123+
124+
@Post('navigate')
125+
@ApiOperation({
126+
summary: 'Navigate to a URL',
127+
description: 'Navigates the browser session to the specified URL',
128+
})
129+
@ApiResponse({
130+
status: 200,
131+
description: 'Navigation result',
132+
})
133+
async navigateToUrl(
134+
@Body() dto: NavigateToUrlDto,
135+
): Promise<{ success: boolean; error?: string }> {
136+
return await this.browserbaseService.navigateToUrl(dto.sessionId, dto.url);
137+
}
138+
139+
@Post('check-auth')
140+
@ApiOperation({
141+
summary: 'Check authentication status',
142+
description: 'Checks if the user is logged in on the specified site',
143+
})
144+
@ApiResponse({
145+
status: 200,
146+
description: 'Auth status',
147+
type: AuthStatusResponseDto,
148+
})
149+
async checkAuth(@Body() dto: CheckAuthDto): Promise<AuthStatusResponseDto> {
150+
return await this.browserbaseService.checkLoginStatus(
151+
dto.sessionId,
152+
dto.url,
153+
);
154+
}
155+
156+
// ===== Browser Automations CRUD =====
157+
158+
@Post('automations')
159+
@ApiOperation({
160+
summary: 'Create a browser automation',
161+
})
162+
@ApiResponse({
163+
status: 201,
164+
description: 'Automation created',
165+
type: BrowserAutomationResponseDto,
166+
})
167+
async createAutomation(
168+
@Body() dto: CreateBrowserAutomationDto,
169+
): Promise<BrowserAutomationResponseDto> {
170+
return (await this.browserbaseService.createBrowserAutomation(
171+
dto,
172+
)) as BrowserAutomationResponseDto;
173+
}
174+
175+
@Get('automations/task/:taskId')
176+
@ApiOperation({
177+
summary: 'Get all browser automations for a task',
178+
})
179+
@ApiParam({ name: 'taskId', description: 'Task ID' })
180+
@ApiResponse({
181+
status: 200,
182+
description: 'List of automations',
183+
type: [BrowserAutomationResponseDto],
184+
})
185+
async getAutomationsForTask(
186+
@Param('taskId') taskId: string,
187+
): Promise<BrowserAutomationResponseDto[]> {
188+
return (await this.browserbaseService.getAutomationsWithPresignedUrls(
189+
taskId,
190+
)) as BrowserAutomationResponseDto[];
191+
}
192+
193+
@Get('automations/:automationId')
194+
@ApiOperation({
195+
summary: 'Get a browser automation by ID',
196+
})
197+
@ApiParam({ name: 'automationId', description: 'Automation ID' })
198+
@ApiResponse({
199+
status: 200,
200+
description: 'Automation details',
201+
type: BrowserAutomationResponseDto,
202+
})
203+
async getAutomation(
204+
@Param('automationId') automationId: string,
205+
): Promise<BrowserAutomationResponseDto | null> {
206+
return (await this.browserbaseService.getBrowserAutomation(
207+
automationId,
208+
)) as BrowserAutomationResponseDto | null;
209+
}
210+
211+
@Patch('automations/:automationId')
212+
@ApiOperation({
213+
summary: 'Update a browser automation',
214+
})
215+
@ApiParam({ name: 'automationId', description: 'Automation ID' })
216+
@ApiResponse({
217+
status: 200,
218+
description: 'Automation updated',
219+
type: BrowserAutomationResponseDto,
220+
})
221+
async updateAutomation(
222+
@Param('automationId') automationId: string,
223+
@Body() dto: UpdateBrowserAutomationDto,
224+
): Promise<BrowserAutomationResponseDto> {
225+
return (await this.browserbaseService.updateBrowserAutomation(
226+
automationId,
227+
dto,
228+
)) as BrowserAutomationResponseDto;
229+
}
230+
231+
@Delete('automations/:automationId')
232+
@ApiOperation({
233+
summary: 'Delete a browser automation',
234+
})
235+
@ApiParam({ name: 'automationId', description: 'Automation ID' })
236+
@ApiResponse({
237+
status: 200,
238+
description: 'Automation deleted',
239+
})
240+
async deleteAutomation(
241+
@Param('automationId') automationId: string,
242+
): Promise<{ success: boolean }> {
243+
await this.browserbaseService.deleteBrowserAutomation(automationId);
244+
return { success: true };
245+
}
246+
247+
// ===== Automation Execution =====
248+
249+
@Post('automations/:automationId/start-live')
250+
@ApiOperation({
251+
summary: 'Start automation with live view',
252+
description:
253+
'Creates a session and returns live view URL for watching execution',
254+
})
255+
@ApiParam({ name: 'automationId', description: 'Automation ID' })
256+
@ApiResponse({
257+
status: 200,
258+
description: 'Session started with live view URL',
259+
})
260+
async startAutomationLive(
261+
@Param('automationId') automationId: string,
262+
@OrganizationId() organizationId: string,
263+
): Promise<{
264+
runId: string;
265+
sessionId: string;
266+
liveViewUrl: string;
267+
error?: string;
268+
needsReauth?: boolean;
269+
}> {
270+
return await this.browserbaseService.startAutomationWithLiveView(
271+
automationId,
272+
organizationId,
273+
);
274+
}
275+
276+
@Post('automations/:automationId/execute')
277+
@ApiOperation({
278+
summary: 'Execute automation on existing session',
279+
description: 'Runs the automation on a pre-created session',
280+
})
281+
@ApiParam({ name: 'automationId', description: 'Automation ID' })
282+
@ApiResponse({
283+
status: 200,
284+
description: 'Execution result',
285+
})
286+
async executeAutomationOnSession(
287+
@Param('automationId') automationId: string,
288+
@Body() body: { runId: string; sessionId: string },
289+
@OrganizationId() organizationId: string,
290+
): Promise<{
291+
success: boolean;
292+
screenshotUrl?: string;
293+
error?: string;
294+
needsReauth?: boolean;
295+
}> {
296+
return await this.browserbaseService.executeAutomationOnSession(
297+
automationId,
298+
body.runId,
299+
body.sessionId,
300+
organizationId,
301+
);
302+
}
303+
304+
@Post('automations/:automationId/run')
305+
@ApiOperation({
306+
summary: 'Run a browser automation',
307+
description: 'Executes the automation and returns the result',
308+
})
309+
@ApiParam({ name: 'automationId', description: 'Automation ID' })
310+
@ApiResponse({
311+
status: 200,
312+
description: 'Run result',
313+
type: RunAutomationResponseDto,
314+
})
315+
async runAutomation(
316+
@Param('automationId') automationId: string,
317+
@OrganizationId() organizationId: string,
318+
): Promise<RunAutomationResponseDto> {
319+
return await this.browserbaseService.runBrowserAutomation(
320+
automationId,
321+
organizationId,
322+
);
323+
}
324+
325+
// ===== Run History =====
326+
327+
@Get('automations/:automationId/runs')
328+
@ApiOperation({
329+
summary: 'Get run history for an automation',
330+
})
331+
@ApiParam({ name: 'automationId', description: 'Automation ID' })
332+
@ApiResponse({
333+
status: 200,
334+
description: 'List of runs',
335+
type: [BrowserAutomationRunResponseDto],
336+
})
337+
async getAutomationRuns(
338+
@Param('automationId') automationId: string,
339+
): Promise<BrowserAutomationRunResponseDto[]> {
340+
return (await this.browserbaseService.getAutomationRuns(
341+
automationId,
342+
)) as BrowserAutomationRunResponseDto[];
343+
}
344+
345+
@Get('runs/:runId')
346+
@ApiOperation({
347+
summary: 'Get a specific run by ID',
348+
})
349+
@ApiParam({ name: 'runId', description: 'Run ID' })
350+
@ApiResponse({
351+
status: 200,
352+
description: 'Run details',
353+
type: BrowserAutomationRunResponseDto,
354+
})
355+
async getRunById(
356+
@Param('runId') runId: string,
357+
): Promise<BrowserAutomationRunResponseDto | null> {
358+
return (await this.browserbaseService.getRunWithPresignedUrl(
359+
runId,
360+
)) as BrowserAutomationRunResponseDto | null;
361+
}
362+
}

0 commit comments

Comments
 (0)