Skip to content

Commit 5eb0477

Browse files
committed
dogfood: Save skips
1 parent 89df0fa commit 5eb0477

File tree

3 files changed

+285
-1
lines changed

3 files changed

+285
-1
lines changed

build_tools/sharpy_dogfood/config.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ class Config(BaseConfig):
7575
successes_dir: Path = field(
7676
default_factory=lambda: Path("dogfood_output/successes")
7777
)
78+
skips_dir: Path = field(default_factory=lambda: Path("dogfood_output/skips"))
7879

7980
# Execution limits
8081
max_iterations: int = 10
@@ -147,10 +148,12 @@ def ensure_dirs(self) -> None:
147148
self.output_dir = self.project_root / self.output_dir
148149
self.issues_dir = self.project_root / self.issues_dir
149150
self.successes_dir = self.project_root / self.successes_dir
151+
self.skips_dir = self.project_root / self.skips_dir
150152

151153
self.output_dir.mkdir(parents=True, exist_ok=True)
152154
self.issues_dir.mkdir(parents=True, exist_ok=True)
153155
self.successes_dir.mkdir(parents=True, exist_ok=True)
156+
self.skips_dir.mkdir(parents=True, exist_ok=True)
154157

155158
@classmethod
156159
def from_file(cls, path: Path) -> "Config":

build_tools/sharpy_dogfood/orchestrator.py

Lines changed: 98 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@
3939
Issue,
4040
IssueType,
4141
IssueReporter,
42+
Skip,
43+
SkipReporter,
4244
SummaryReporter,
4345
Success,
4446
SuccessReporter,
@@ -60,6 +62,7 @@ class IterationResult:
6062
status: IterationStatus
6163
issue_dir: Optional[Path] = None
6264
success_dir: Optional[Path] = None
65+
skip_dir: Optional[Path] = None
6366
skip_reason: Optional[str] = None
6467

6568

@@ -75,6 +78,7 @@ class GenerationResult:
7578
generation_duration: Optional[float] = None
7679
rate_limited: bool = False
7780
attempts: int = 1
81+
validation_output: Optional[str] = None # AI validation output (for debugging)
7882

7983

8084
def _outputs_equivalent(expected: str, actual: str, rel_tol: float = 1e-9) -> bool:
@@ -348,6 +352,7 @@ def __init__(self, config: Config):
348352
self.compiler = SharpyCompiler(config.project_root, config.sharpy_cli_project)
349353
self.issue_reporter = IssueReporter(config.issues_dir)
350354
self.success_reporter = SuccessReporter(config.successes_dir)
355+
self.skip_reporter = SkipReporter(config.skips_dir)
351356
self.summary_reporter = SummaryReporter(config.output_dir)
352357
self.spec_context: Optional[str] = None
353358
self.example_snippets: list[str] = []
@@ -487,14 +492,33 @@ async def run_iteration(
487492
skip_reason=gen_result.skip_reason,
488493
)
489494

490-
# If it was a validation failure after retries, skip
495+
# If it was a validation failure after retries, skip but save for inspection
491496
if gen_result.attempts > 1:
492497
print(
493498
f" Code generation failed after {gen_result.attempts} attempts",
494499
file=sys.stderr,
495500
)
501+
# Save the skip for inspection if we have generated code
502+
skip_dir = None
503+
if gen_result.code:
504+
skip = Skip(
505+
timestamp=timestamp,
506+
skip_reason=gen_result.skip_reason or "Validation failed",
507+
generated_code=gen_result.code,
508+
expected_output=gen_result.expected_output,
509+
feature_focus=feature_focus,
510+
complexity=complexity,
511+
backend_used=gen_result.backend_used,
512+
generation_duration=gen_result.generation_duration,
513+
validation_output=gen_result.validation_output,
514+
)
515+
skip_dir = self.skip_reporter.report(skip)
516+
print(
517+
f" Skip saved for inspection: {skip_dir.name}", file=sys.stderr
518+
)
496519
return IterationResult(
497520
IterationStatus.SKIPPED,
521+
skip_dir=skip_dir,
498522
skip_reason=gen_result.skip_reason,
499523
)
500524

@@ -772,6 +796,8 @@ async def _generate_and_validate_code(
772796
else:
773797
return GenerationResult(
774798
success=False,
799+
code=code,
800+
expected_output=extract_expected_output(code),
775801
skip_reason=f"Pre-validation failed after {attempt} attempts: {prevalidation_error}",
776802
backend_used=backend_used,
777803
generation_duration=total_duration,
@@ -799,6 +825,8 @@ async def _generate_and_validate_code(
799825
else:
800826
return GenerationResult(
801827
success=False,
828+
code=code,
829+
expected_output=expected_output,
802830
skip_reason=f"Invalid expected output after {attempt} attempts (Python says: {python_output})",
803831
backend_used=backend_used,
804832
generation_duration=total_duration,
@@ -827,10 +855,13 @@ async def _generate_and_validate_code(
827855
else:
828856
return GenerationResult(
829857
success=False,
858+
code=code,
859+
expected_output=expected_output,
830860
skip_reason=f"Validation backend error after {attempt} attempts: {val_result.error}",
831861
backend_used=backend_used,
832862
generation_duration=total_duration,
833863
attempts=attempt,
864+
validation_output=val_result.error,
834865
)
835866

836867
validation_output = val_result.output
@@ -847,10 +878,13 @@ async def _generate_and_validate_code(
847878
else:
848879
return GenerationResult(
849880
success=False,
881+
code=code,
882+
expected_output=expected_output,
850883
skip_reason=f"Code invalid per spec after {attempt} attempts",
851884
backend_used=backend_used,
852885
generation_duration=total_duration,
853886
attempts=attempt,
887+
validation_output=validation_output,
854888
)
855889

856890
# Success!
@@ -867,6 +901,8 @@ async def _generate_and_validate_code(
867901
# Should not reach here, but just in case
868902
return GenerationResult(
869903
success=False,
904+
code=last_code,
905+
expected_output=extract_expected_output(last_code) if last_code else None,
870906
skip_reason="Generation failed after all retry attempts",
871907
backend_used=backend_used,
872908
generation_duration=total_duration,
@@ -956,8 +992,21 @@ async def run_multifile_iteration(
956992
files = extract_multifile_code(gen_result.output)
957993
if not files:
958994
print(" Failed to parse multi-file response", file=sys.stderr)
995+
# Save raw output for debugging prompt issues
996+
skip = Skip(
997+
timestamp=timestamp,
998+
skip_reason="Failed to parse multi-file response from AI",
999+
generated_code=gen_result.output, # Raw output for debugging
1000+
feature_focus=feature_focus,
1001+
complexity=complexity,
1002+
backend_used=gen_result.backend,
1003+
generation_duration=gen_result.duration_seconds,
1004+
)
1005+
skip_dir = self.skip_reporter.report(skip)
1006+
print(f" Skip saved for inspection: {skip_dir.name}", file=sys.stderr)
9591007
return IterationResult(
9601008
IterationStatus.SKIPPED,
1009+
skip_dir=skip_dir,
9611010
skip_reason="Failed to parse multi-file response from AI",
9621011
)
9631012

@@ -976,8 +1025,23 @@ async def run_multifile_iteration(
9761025
f" Pre-validation failed for {filename}: {prevalidation_error}",
9771026
file=sys.stderr,
9781027
)
1028+
# Save for inspection
1029+
skip = Skip(
1030+
timestamp=timestamp,
1031+
skip_reason=f"Unsupported feature in {filename}: {prevalidation_error}",
1032+
generated_code=files.get("main.spy", ""),
1033+
expected_output=expected_output,
1034+
feature_focus=feature_focus,
1035+
complexity=complexity,
1036+
backend_used=gen_result.backend,
1037+
generation_duration=gen_result.duration_seconds,
1038+
source_files=files,
1039+
)
1040+
skip_dir = self.skip_reporter.report(skip)
1041+
print(f" Skip saved for inspection: {skip_dir.name}", file=sys.stderr)
9791042
return IterationResult(
9801043
IterationStatus.SKIPPED,
1044+
skip_dir=skip_dir,
9811045
skip_reason=f"Unsupported feature in {filename}: {prevalidation_error}",
9821046
)
9831047

@@ -990,15 +1054,47 @@ async def run_multifile_iteration(
9901054
f" Validation failed for {filename}: {val_result.error}",
9911055
file=sys.stderr,
9921056
)
1057+
# Save for inspection
1058+
skip = Skip(
1059+
timestamp=timestamp,
1060+
skip_reason=f"Validation backend error for {filename}",
1061+
generated_code=files.get("main.spy", ""),
1062+
expected_output=expected_output,
1063+
feature_focus=feature_focus,
1064+
complexity=complexity,
1065+
backend_used=gen_result.backend,
1066+
generation_duration=gen_result.duration_seconds,
1067+
source_files=files,
1068+
validation_output=val_result.error,
1069+
)
1070+
skip_dir = self.skip_reporter.report(skip)
1071+
print(f" Skip saved for inspection: {skip_dir.name}", file=sys.stderr)
9931072
return IterationResult(
9941073
IterationStatus.SKIPPED,
1074+
skip_dir=skip_dir,
9951075
skip_reason=f"Validation backend error for {filename}",
9961076
)
9971077

9981078
if "INVALID" in val_result.output.upper():
9991079
print(f" {filename} is invalid per spec, skipping", file=sys.stderr)
1080+
# Save for inspection
1081+
skip = Skip(
1082+
timestamp=timestamp,
1083+
skip_reason=f"{filename} invalid per spec",
1084+
generated_code=files.get("main.spy", ""),
1085+
expected_output=expected_output,
1086+
feature_focus=feature_focus,
1087+
complexity=complexity,
1088+
backend_used=gen_result.backend,
1089+
generation_duration=gen_result.duration_seconds,
1090+
source_files=files,
1091+
validation_output=val_result.output,
1092+
)
1093+
skip_dir = self.skip_reporter.report(skip)
1094+
print(f" Skip saved for inspection: {skip_dir.name}", file=sys.stderr)
10001095
return IterationResult(
10011096
IterationStatus.SKIPPED,
1097+
skip_dir=skip_dir,
10021098
skip_reason=f"{filename} invalid per spec",
10031099
)
10041100

@@ -1264,6 +1360,7 @@ async def run(self, iterations: Optional[int] = None) -> int:
12641360
complexity,
12651361
success=False,
12661362
issue_type=IssueType.SKIPPED,
1363+
skip_dir=result.skip_dir,
12671364
duration=duration,
12681365
skip_reason=result.skip_reason,
12691366
)

0 commit comments

Comments
 (0)