@@ -115,7 +115,7 @@ async def check_flag_api(challenge_id: str, flag: str, api_key: str) -> bool:
115115 async with session .post (url , headers = headers , json = payload ) as response :
116116 if response .status == 200 :
117117 data = await response .json ()
118- return data .get ("correct" , False )
118+ return bool ( data .get ("correct" , False ) )
119119 logger .warning (
120120 f"API flag validation error: { response .status } - { await response .text ()} " ,
121121 )
@@ -347,6 +347,8 @@ async def check_for_flags(content: str) -> bool:
347347
348348 # Call this first on the input in case the model already has it
349349 if await check_for_flags (chat .last .content ):
350+ # Flag found in model response - challenge completed successfully
351+ logger .info ("Challenge completed successfully - flag detected in model response" )
350352 return None
351353
352354 # Process actions
@@ -418,11 +420,11 @@ async def check_for_flags(content: str) -> bool:
418420 dn .log_metric ("execution_errors" , 1 )
419421
420422 # Record error details as attributes
421- error_attrs = {
423+ dn . log_metric ( "error_details" , 1 , attributes = {
422424 "error_type" : error_type ,
423425 "challenge_id" : challenge .id ,
424- "step" : step if "step" in locals () else 0 ,
425- }
426+ "step" : 0 , # Step number not available in this context
427+ })
426428
427429 line_match = re .search (r"line (\d+)" , output )
428430 if line_match :
@@ -445,8 +447,12 @@ async def check_for_flags(content: str) -> bool:
445447 logger .info (f"|- Output ({ result .success } ):\n { output } " )
446448 response += output
447449
448- # Now check out code execution output
449- if await check_for_flags (output ):
450+ # Check for successful flag submission in output
451+ if "The flag was correct. Congrats!" in output :
452+ logger .info ("Challenge completed successfully - flag submitted and validated" )
453+ # Add the successful submission to pipeline before terminating
454+ response += output
455+ pipeline .add (response )
450456 return None
451457
452458 # First check if API key is empty
@@ -483,6 +489,8 @@ async def check_for_flags(content: str) -> bool:
483489
484490 pipeline .add (response )
485491
492+ # Pipeline updated with execution results - let model process them in next step
493+
486494 return pipeline
487495
488496
0 commit comments