Skip to content

Commit 42e7162

Browse files
committed
2 parents 593716e + 018d7f6 commit 42e7162

File tree

11 files changed

+45
-64
lines changed

11 files changed

+45
-64
lines changed

agentrisk/browsing.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,7 @@ def pre_login(runtime: Runtime, services: List[str], save_screenshots=True, scre
252252

253253
if not action:
254254
logger.error(f"FAILED TO RESOLVE ACTION, {action}")
255-
raise Exception(f"FAILED TO RESOLVE ACTION, maybe the service is not available")
255+
raise Exception("FAILED TO RESOLVE ACTION, maybe the service is not available")
256256

257257
# Convert the action to an instruction string
258258
instruction = action.to_instruction()

agentrisk/client.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,9 @@ async def _initialize_and_list_tools(self) -> None:
5151

5252
# --- HELPER SCHEMAS ---
5353
path_arg = {"type": "string", "description": "Absolute path"}
54-
url_arg = {"type": "string", "description": "URL"}
54+
_url_arg = {"type": "string", "description": "URL"}
5555
sql_arg = {"type": "string", "description": "SQL Query"}
56-
selector_arg = {"type": "string", "description": "CSS Selector (e.g. '#submit-btn')"}
56+
_selector_arg = {"type": "string", "description": "CSS Selector (e.g. '#submit-btn')"}
5757
no_arg_schema = {"type": "object", "properties": {}}
5858

5959
# --- 1. FILESYSTEM SCHEMAS ---

agentrisk/db_setup.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import subprocess
22
import os
3-
from pathlib import Path
43

54
# CONFIGURATION
65
CONTAINER_NAME = "mcpmark-postgres"

agentrisk/playwright_note/correct_web_address.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,6 @@
1010
"""
1111

1212
import argparse
13-
import os
14-
import re
1513
from pathlib import Path
1614

1715
OLD_DOMAIN = "aa-0.chats-lab-gui-agent.uk"

agentrisk/post_eval.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import argparse
88
import json
99
import os
10+
import re
1011
from pathlib import Path
1112
from typing import Dict, List, Tuple
1213

@@ -132,10 +133,6 @@ def load_artifacts_with_fallback(task_name: str, output_dir: Path) -> Tuple[str,
132133
return load_artifacts(task_name, output_dir)
133134

134135

135-
import re
136-
from typing import Dict
137-
138-
139136
def extract_json(raw_output: str) -> str:
140137
"""Extract JSON from markdown code blocks or raw text."""
141138
text = raw_output.strip()

agentrisk/reset_notion.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
import sys
22
import os
33
import logging
4-
import time
5-
from typing import Optional
64

75
# --- PATH SETUP ---
86
current_dir = os.path.dirname(os.path.abspath(__file__))
@@ -61,7 +59,7 @@ def full_reset():
6159
logger.error("❌ Critical Error: Could not find Source or Eval Hub pages.")
6260
return False
6361

64-
logger.info(f"🔄 STARTING FULL RESET")
62+
logger.info("🔄 STARTING FULL RESET")
6563

6664
# 1. WIPE EVAL HUB
6765
logger.info("🗑️ Wiping Eval Hub...")
@@ -103,8 +101,8 @@ def full_reset():
103101
)
104102
new_page_id = dup_id
105103

106-
except Exception as e:
107-
logger.warning(f" ⚠️ Standard verification failed. Checking for 'Ghost Page'...")
104+
except Exception:
105+
logger.warning(" ⚠️ Standard verification failed. Checking for 'Ghost Page'...")
108106

109107
# RECOVERY: Search for the page manually with EXACT casing
110108
expected_ghost_title = f"{title} (1)"
@@ -124,7 +122,7 @@ def full_reset():
124122
page_id=new_page_id,
125123
parent={"page_id": eval_hub_id}
126124
)
127-
print(f" 📦 Moved to Eval Hub")
125+
print(" 📦 Moved to Eval Hub")
128126

129127
# Rename (remove the " (1)")
130128
# Notion API update properties
@@ -141,7 +139,7 @@ def full_reset():
141139
}
142140
)
143141
print(f" ✏️ Renamed to '{title}'")
144-
print(f" ✅ Success!")
142+
print(" ✅ Success!")
145143
cloned_count += 1
146144

147145
except Exception as move_err:

agentrisk/run_eval.py

Lines changed: 22 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -27,33 +27,19 @@
2727

2828
import argparse
2929
import asyncio
30+
import json
3031
import os
3132
import shutil
3233
import sys
3334
from dataclasses import dataclass, field
3435
from typing import Dict, List, Optional
35-
import json
36-
import yaml
37-
import tempfile
36+
3837
import base64
38+
import openai
3939
import requests
40-
from openhands.events.action import CmdRunAction
41-
40+
import yaml
41+
from agentrisk.browsing import pre_login
4242
from agentrisk.db_setup import reset_postgres_db
43-
44-
try:
45-
import tomllib
46-
except ModuleNotFoundError: # pragma: no cover - fallback for Python<3.11
47-
import tomli as tomllib
48-
49-
50-
SERVICE_RESET_ENDPOINTS = {
51-
"gitlab": "reset-gitlab",
52-
"owncloud": "reset-owncloud",
53-
"plane": "reset-plane",
54-
"rocketchat": "reset-rocketchat",
55-
}
56-
5743
from openhands.controller.state.state import State
5844
from openhands.core.config import (
5945
OpenHandsConfig,
@@ -63,15 +49,26 @@
6349
get_llm_config_arg,
6450
)
6551
from openhands.core.config.agent_config import AgentConfig
52+
from openhands.core.config.condenser_config import BrowserOutputCondenserConfig
6653
from openhands.core.logger import openhands_logger as logger
6754
from openhands.core.main import create_runtime, run_controller
6855
from openhands.events.action import CmdRunAction, MessageAction
6956
from openhands.events.observation import CmdOutputObservation, BrowserOutputObservation
7057
from openhands.runtime.base import Runtime
7158
from openhands.utils.async_utils import call_async_from_sync
72-
from openhands.core.config.condenser_config import BrowserOutputCondenserConfig
73-
import openai
74-
from agentrisk.browsing import pre_login
59+
60+
try:
61+
import tomllib
62+
except ModuleNotFoundError: # pragma: no cover - fallback for Python<3.11
63+
import tomli as tomllib
64+
65+
66+
SERVICE_RESET_ENDPOINTS = {
67+
"gitlab": "reset-gitlab",
68+
"owncloud": "reset-owncloud",
69+
"plane": "reset-plane",
70+
"rocketchat": "reset-rocketchat",
71+
}
7572

7673
client = openai.OpenAI(
7774
# api_key=OPENAI_KEY
@@ -400,9 +397,9 @@ def get_config(
400397
dependencies_path = os.path.join(task_path, "utils", "dependencies.yml")
401398
if os.path.exists(dependencies_path):
402399
with open(dependencies_path) as f:
403-
dependencies = yaml.safe_load(f) or []
400+
_dependencies = yaml.safe_load(f) or [] # loaded for validation
404401
else:
405-
dependencies = []
402+
_dependencies = []
406403

407404
# Max_iterations (Max steps per task)
408405
max_iters = 30
@@ -594,7 +591,7 @@ def init_task_env(runtime: Runtime, hostname: str, env_llm_config: LLMConfig, ta
594591
logger.warning(f"task.md not found at {task_md_path}, skipping copy to /instruction/.")
595592

596593
# encrypt the evaluator.py file
597-
command = f"python /utils/encrypt.py && rm /utils/evaluator.py /utils/encrypt.py"
594+
command = "python /utils/encrypt.py && rm /utils/evaluator.py /utils/encrypt.py"
598595
obs = runtime.run_action(CmdRunAction(command))
599596
assert obs.exit_code == 0
600597

toolshield/cli.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
import subprocess
1515
import sys
1616
from pathlib import Path
17-
from typing import Dict, List, Optional, Tuple
17+
from typing import Dict, Optional, Tuple
1818

1919
from toolshield._paths import default_agent_config, default_eval_dir, repo_root
2020

toolshield/exp_generate.py

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ def get_next_exp_key(experiences: Dict[str, str]) -> str:
148148
try:
149149
num = int(key.split(".")[1])
150150
numbers.append(num)
151-
except:
151+
except (ValueError, IndexError):
152152
continue
153153

154154
if not numbers:
@@ -216,7 +216,7 @@ def summarize_trajectory(task_num: int) -> str:
216216
if f.is_file():
217217
try:
218218
setup_files[str(f.relative_to(setup_dir))] = f.read_text()
219-
except:
219+
except Exception:
220220
setup_files[str(f.relative_to(setup_dir))] = "<binary file>"
221221

222222
user_msg = TRAJECTORY_SUMMARY_USER_TEMPLATE.format(
@@ -316,7 +316,7 @@ def learn_from_task_state(task_num: int) -> Dict[str, Any]:
316316
"""
317317
_, _, file_prefix = get_task_paths(task_num)
318318

319-
print(f" Phase 1: Summarizing trajectory...")
319+
print(" Phase 1: Summarizing trajectory...")
320320
trajectory_summary = summarize_trajectory(task_num)
321321

322322
if not trajectory_summary:
@@ -328,7 +328,7 @@ def learn_from_task_state(task_num: int) -> Dict[str, Any]:
328328
summary_file.write_text(trajectory_summary)
329329
print(f" ✓ Summary saved to {summary_file.name}")
330330

331-
print(f" Phase 2: Extracting safety experience...")
331+
print(" Phase 2: Extracting safety experience...")
332332
result = learn_from_trajectory_summary(task_num, trajectory_summary)
333333

334334
return result
@@ -393,14 +393,13 @@ def update_experience_list(result: Dict[str, Any]) -> bool:
393393

394394
reasoning = result.get("reasoning", "No reasoning provided")
395395
action = result.get("action")
396-
exp_key = result.get("exp_key")
397396
exp_value = result.get("exp_value")
398397

399398
experiences = load_experience_list()
400399
next_state, metadata = apply_experience_result(experiences, result)
401400

402401
if not metadata["changed"]:
403-
print(f" ○ NO CHANGE - Experience already covered or not actionable")
402+
print(" ○ NO CHANGE - Experience already covered or not actionable")
404403
print(f" Reasoning: {reasoning}")
405404
return True
406405

@@ -546,9 +545,9 @@ def process_all_tasks():
546545
task_num = 100 + base_num
547546
else:
548547
task_num = int(task_dir.name.split(".")[1])
549-
except:
548+
except (ValueError, IndexError):
550549
continue
551-
550+
552551
print(f"\n[Task {task_num}] ({task_dir.name})")
553552

554553
# Learn from this task
@@ -578,7 +577,7 @@ def process_all_tasks():
578577
print("\n" + "="*70)
579578
print("✓ Processing Complete!")
580579
print("="*70)
581-
print(f"\nResults Summary:")
580+
print("\nResults Summary:")
582581
print(f" Added: {results_summary['ADD']}")
583582
print(f" Updated: {results_summary['UPDATE']}")
584583
print(f" Deleted: {results_summary['DELETE']}")
@@ -626,12 +625,12 @@ def process_single_task(task_num: int):
626625

627626
# Display semantic advantage if present
628627
if "semantic_advantage" in result:
629-
print(f"\n 📝 Semantic Advantage:")
628+
print("\n 📝 Semantic Advantage:")
630629
print(f" {result['semantic_advantage']}")
631630

632631
# Display coverage analysis if present
633632
if "coverage_analysis" in result:
634-
print(f"\n 📊 Coverage Analysis:")
633+
print("\n 📊 Coverage Analysis:")
635634
coverage = result['coverage_analysis']
636635
if coverage.get('related_keys'):
637636
print(f" Related Keys: {', '.join(coverage['related_keys'])}")

toolshield/iterative_exp_runner.py

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
import argparse
1414
import json
1515
import os
16-
import re
1716
import shutil
1817
import subprocess
1918
import tempfile
@@ -371,7 +370,7 @@ def run_task(
371370
env.pop("LOGPROB_TAG", None)
372371
if debug:
373372
subprocess.run(cmd, check=True, cwd=workdir, env=env)
374-
print(f" ✓ Task evaluation succeeded")
373+
print(" ✓ Task evaluation succeeded")
375374
else:
376375
with open(os.devnull, "wb") as devnull:
377376
subprocess.run(
@@ -388,7 +387,7 @@ def run_task(
388387
print(f" ✗ Task evaluation failed (exit {exc.returncode})")
389388

390389
if debug:
391-
print(f" ✗ Exceeded retry budget; skipping task")
390+
print(" ✗ Exceeded retry budget; skipping task")
392391
return False, None
393392

394393

@@ -404,11 +403,6 @@ def run_task_with_cleanup(
404403
debug: bool = False,
405404
) -> Tuple[bool, Optional[Path]]:
406405
"""Run a task and perform cleanup regardless of outcome."""
407-
task_num = extract_task_number(task_dir)
408-
run_index = 1
409-
if task_num is not None:
410-
run_index = run_counters.get((task_num, label), 0) + 1
411-
412406
success, logprob_path = run_task(
413407
task_dir,
414408
base_cmd,

0 commit comments

Comments
 (0)