-
Notifications
You must be signed in to change notification settings - Fork 20
Expand file tree
/
Copy pathvalidate_retry_necessity.py
More file actions
148 lines (131 loc) · 5.33 KB
/
validate_retry_necessity.py
File metadata and controls
148 lines (131 loc) · 5.33 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
"""
Minimal LLM-based validation for retry necessity.
"""
import json
import time
import re
from pathlib import Path
from llm_server import call_llm_simple
# In-memory cache for dataset lookups
_PROBLEM_STATEMENT_CACHE = {}
def _get_problem_statement_from_dataset(instance_id: str) -> str:
"""Try to load problem_statement from the HF dataset; fallback to empty string."""
if instance_id in _PROBLEM_STATEMENT_CACHE:
return _PROBLEM_STATEMENT_CACHE[instance_id]
try:
# Lazy import with ignore to avoid static analysis errors if not installed
from datasets import load_dataset # type: ignore[import-not-found]
ds = load_dataset("princeton-nlp___swe-bench_verified", split="test")
# Iterate to avoid pandas dependency
for row in ds:
if row.get("instance_id") == instance_id:
ps = row.get("problem_statement") or ""
_PROBLEM_STATEMENT_CACHE[instance_id] = ps
return ps
except Exception:
pass
_PROBLEM_STATEMENT_CACHE[instance_id] = ""
return ""
def _load_inputs(instance_id: str, output_files_dir: str) -> tuple:
"""Load issue, patch, and test_case with minimal assumptions."""
instance_dir = Path(output_files_dir) / instance_id
if not instance_dir.exists():
raise FileNotFoundError(f"Instance directory not found: {instance_dir}")
# Prefer dataset problem statement
issue = _get_problem_statement_from_dataset(instance_id) or f"Problem statement for {instance_id}"
patch = ""
test_case = ""
# predictions.json -> model_patch (+ optional problem_statement)
predictions_file = instance_dir / "predictions.json"
if predictions_file.exists():
try:
with open(predictions_file, 'r') as f:
predictions_data = json.load(f)
if isinstance(predictions_data, list) and predictions_data:
patch = predictions_data[0].get("model_patch", "")
# Only use predictions problem_statement if dataset is empty
if not issue:
issue = predictions_data[0].get("problem_statement", issue)
elif isinstance(predictions_data, dict):
patch = predictions_data.get("model_patch", "")
if not issue:
issue = predictions_data.get("problem_statement", issue)
except Exception:
pass
# test_cases/*.py -> first file content
test_cases_dir = instance_dir / "test_cases"
if test_cases_dir.exists():
for test_file in sorted(test_cases_dir.glob("*.py")):
try:
with open(test_file, 'r') as f:
test_case = f.read()
break
except Exception:
continue
return issue, patch, test_case
def _call_llm_for_judgment(issue: str, patch: str, test_case: str) -> str:
"""Call LLM with judgment prompt and return raw text (expected JSON)."""
from prompts.notpass_judgement import NOTPASS_JUDGEMENT_PROMPT
prompt = NOTPASS_JUDGEMENT_PROMPT.format(
pr_description=issue,
patch=patch,
test_case=test_case,
)
response_text = call_llm_simple(
purpose="patch_generation",
prompt=prompt,
max_tokens=400,
temperature=0.2,
) or ""
return response_text.strip()
def _parse_response(response: str) -> dict:
"""Parse JSON from LLM response simply."""
if not response:
return {"root_cause": "PATCH", "confidence": 0.5, "one_sentence": "Empty response"}
try:
return json.loads(response)
except Exception:
pass
m = re.search(r"\{[\s\S]*\}", response)
if m:
try:
return json.loads(m.group(0))
except Exception:
pass
return {"root_cause": "PATCH", "confidence": 0.5, "one_sentence": "Unparseable response"}
def judge_failure_root_cause(instance_id: str, output_files_dir: str = "output_files") -> dict:
"""Return LLM-based root cause judgment for a given instance and save it."""
instance_dir = Path(output_files_dir) / instance_id
judgment_file = instance_dir / "judgment_result.json"
try:
issue, patch, test_case = _load_inputs(instance_id, output_files_dir)
response = _call_llm_for_judgment(issue, patch, test_case)
data = _parse_response(response)
result = {
"instance_id": instance_id,
"root_cause": data.get("root_cause", "PATCH"),
"confidence": data.get("confidence", 0.5),
"reasoning": data.get("one_sentence", ""),
"timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
"raw_response": response,
}
instance_dir.mkdir(parents=True, exist_ok=True)
with open(judgment_file, 'w') as f:
json.dump(result, f, indent=2)
return result
except Exception as e:
result = {
"instance_id": instance_id,
"root_cause": "PATCH",
"confidence": 0.5,
"reasoning": f"Failed to analyze: {e}",
"timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
"raw_response": None,
}
try:
instance_dir.mkdir(parents=True, exist_ok=True)
with open(judgment_file, 'w') as f:
json.dump(result, f, indent=2)
except Exception:
pass
return result