@@ -548,15 +548,15 @@ export class BrowserbaseService {
548548 completedAt : new Date ( ) ,
549549 durationMs : run . startedAt ? Date . now ( ) - run . startedAt . getTime ( ) : 0 ,
550550 screenshotUrl : screenshotKey ,
551- evaluationStatus : 'pass' ,
552- evaluationReason : result . evaluationReason ?? 'Requirement verified ' ,
551+ evaluationStatus : result . evaluationStatus ?? null ,
552+ evaluationReason : result . evaluationReason ?? 'Screenshot captured ' ,
553553 } ,
554554 } ) ;
555555
556556 return {
557557 success : true ,
558558 screenshotUrl : presignedUrl ,
559- evaluationStatus : 'pass' ,
559+ evaluationStatus : result . evaluationStatus ,
560560 evaluationReason : result . evaluationReason ,
561561 } ;
562562 } catch ( err ) {
@@ -687,16 +687,16 @@ export class BrowserbaseService {
687687 completedAt : new Date ( ) ,
688688 durationMs : Date . now ( ) - run . startedAt ! . getTime ( ) ,
689689 screenshotUrl : screenshotKey ,
690- evaluationStatus : 'pass' ,
691- evaluationReason : result . evaluationReason ?? 'Requirement verified ' ,
690+ evaluationStatus : result . evaluationStatus ?? null ,
691+ evaluationReason : result . evaluationReason ?? 'Screenshot captured ' ,
692692 } ,
693693 } ) ;
694694
695695 return {
696696 runId : run . id ,
697697 success : true ,
698698 screenshotUrl : presignedUrl ,
699- evaluationStatus : 'pass' ,
699+ evaluationStatus : result . evaluationStatus ,
700700 evaluationReason : result . evaluationReason ,
701701 } ;
702702 } finally {
@@ -793,73 +793,7 @@ export class BrowserbaseService {
793793 // Wait for final page to settle
794794 await delay ( 2000 ) ;
795795
796- // Evaluate if the automation fulfills the task requirements BEFORE taking screenshot
797- if ( taskContext ) {
798- // Re-acquire page in case the agent closed/replaced it during execution
799- page = await this . ensureActivePage ( stagehand ) ;
800-
801- const evaluationSchema = z . object ( {
802- passes : z
803- . boolean ( )
804- . describe (
805- 'Whether the current page state shows that the requirement is fulfilled' ,
806- ) ,
807- reason : z
808- . string ( )
809- . describe (
810- 'A brief explanation of why it passes or fails the requirement' ,
811- ) ,
812- } ) ;
813-
814- const evaluationPrompt = `You are evaluating whether a compliance requirement is being met.
815-
816- Task/Requirement: "${ taskContext . title } "
817- ${ taskContext . description ? `Description: "${ taskContext . description } "` : '' }
818-
819- Navigation completed: "${ instruction } "
820-
821- Look at the current page and determine if the visible configuration, settings, or state demonstrates that this requirement is fulfilled.
822-
823- For example:
824- - If the task is about "branch protection", check if branch protection rules are visible and enabled
825- - If the task is about "MFA/2FA", check if multi-factor authentication is shown as enabled
826- - If the task is about "access controls", check if appropriate access restrictions are configured
827-
828- Be strict: if the setting is disabled, not configured, or shows a warning/error state, it should FAIL.
829- Only pass if there is clear evidence the requirement is properly configured and active.` ;
830-
831- try {
832- const evaluation = ( await stagehand . extract (
833- evaluationPrompt ,
834- evaluationSchema as any ,
835- ) ) as { passes : boolean ; reason : string } ;
836-
837- this . logger . log (
838- `Automation evaluation: ${ evaluation . passes ? 'PASS' : 'FAIL' } - ${ evaluation . reason } ` ,
839- ) ;
840-
841- // If evaluation fails, abort without taking screenshot
842- if ( ! evaluation . passes ) {
843- return {
844- success : false ,
845- evaluationStatus : 'fail' ,
846- evaluationReason : evaluation . reason ,
847- error : `Requirement not met: ${ evaluation . reason } ` ,
848- } ;
849- }
850- } catch ( evalErr ) {
851- this . logger . warn (
852- `Failed to evaluate automation: ${ evalErr instanceof Error ? evalErr . message : String ( evalErr ) } ` ,
853- ) ;
854- // If evaluation itself errors, fail the automation
855- return {
856- success : false ,
857- error : `Evaluation error: ${ evalErr instanceof Error ? evalErr . message : 'Unknown error' } ` ,
858- } ;
859- }
860- }
861-
862- // Only take screenshot if evaluation passed (or no task context)
796+ // Always take a screenshot at the end (no pass/fail criteria gate)
863797 page = await this . ensureActivePage ( stagehand ) ;
864798 const screenshot = await page . screenshot ( {
865799 type : 'jpeg' ,
@@ -870,8 +804,9 @@ Only pass if there is clear evidence the requirement is properly configured and
870804 return {
871805 success : true ,
872806 screenshot : screenshot . toString ( 'base64' ) ,
873- evaluationStatus : 'pass' ,
874- evaluationReason : 'Requirement verified successfully' ,
807+ evaluationReason : taskContext
808+ ? `Navigation completed for "${ taskContext . title } ". Screenshot captured.`
809+ : 'Navigation completed. Screenshot captured.' ,
875810 } ;
876811 } catch ( err ) {
877812 this . logger . error ( 'Failed to execute automation' , err ) ;
0 commit comments