3939 Issue ,
4040 IssueType ,
4141 IssueReporter ,
42+ Skip ,
43+ SkipReporter ,
4244 SummaryReporter ,
4345 Success ,
4446 SuccessReporter ,
@@ -60,6 +62,7 @@ class IterationResult:
6062 status : IterationStatus
6163 issue_dir : Optional [Path ] = None
6264 success_dir : Optional [Path ] = None
65+ skip_dir : Optional [Path ] = None
6366 skip_reason : Optional [str ] = None
6467
6568
@@ -75,6 +78,7 @@ class GenerationResult:
7578 generation_duration : Optional [float ] = None
7679 rate_limited : bool = False
7780 attempts : int = 1
81+ validation_output : Optional [str ] = None # AI validation output (for debugging)
7882
7983
8084def _outputs_equivalent (expected : str , actual : str , rel_tol : float = 1e-9 ) -> bool :
@@ -348,6 +352,7 @@ def __init__(self, config: Config):
348352 self .compiler = SharpyCompiler (config .project_root , config .sharpy_cli_project )
349353 self .issue_reporter = IssueReporter (config .issues_dir )
350354 self .success_reporter = SuccessReporter (config .successes_dir )
355+ self .skip_reporter = SkipReporter (config .skips_dir )
351356 self .summary_reporter = SummaryReporter (config .output_dir )
352357 self .spec_context : Optional [str ] = None
353358 self .example_snippets : list [str ] = []
@@ -487,14 +492,33 @@ async def run_iteration(
487492 skip_reason = gen_result .skip_reason ,
488493 )
489494
490- # If it was a validation failure after retries, skip
495+ # If it was a validation failure after retries, skip but save for inspection
491496 if gen_result .attempts > 1 :
492497 print (
493498 f" Code generation failed after { gen_result .attempts } attempts" ,
494499 file = sys .stderr ,
495500 )
501+ # Save the skip for inspection if we have generated code
502+ skip_dir = None
503+ if gen_result .code :
504+ skip = Skip (
505+ timestamp = timestamp ,
506+ skip_reason = gen_result .skip_reason or "Validation failed" ,
507+ generated_code = gen_result .code ,
508+ expected_output = gen_result .expected_output ,
509+ feature_focus = feature_focus ,
510+ complexity = complexity ,
511+ backend_used = gen_result .backend_used ,
512+ generation_duration = gen_result .generation_duration ,
513+ validation_output = gen_result .validation_output ,
514+ )
515+ skip_dir = self .skip_reporter .report (skip )
516+ print (
517+ f" Skip saved for inspection: { skip_dir .name } " , file = sys .stderr
518+ )
496519 return IterationResult (
497520 IterationStatus .SKIPPED ,
521+ skip_dir = skip_dir ,
498522 skip_reason = gen_result .skip_reason ,
499523 )
500524
@@ -772,6 +796,8 @@ async def _generate_and_validate_code(
772796 else :
773797 return GenerationResult (
774798 success = False ,
799+ code = code ,
800+ expected_output = extract_expected_output (code ),
775801 skip_reason = f"Pre-validation failed after { attempt } attempts: { prevalidation_error } " ,
776802 backend_used = backend_used ,
777803 generation_duration = total_duration ,
@@ -799,6 +825,8 @@ async def _generate_and_validate_code(
799825 else :
800826 return GenerationResult (
801827 success = False ,
828+ code = code ,
829+ expected_output = expected_output ,
802830 skip_reason = f"Invalid expected output after { attempt } attempts (Python says: { python_output } )" ,
803831 backend_used = backend_used ,
804832 generation_duration = total_duration ,
@@ -827,10 +855,13 @@ async def _generate_and_validate_code(
827855 else :
828856 return GenerationResult (
829857 success = False ,
858+ code = code ,
859+ expected_output = expected_output ,
830860 skip_reason = f"Validation backend error after { attempt } attempts: { val_result .error } " ,
831861 backend_used = backend_used ,
832862 generation_duration = total_duration ,
833863 attempts = attempt ,
864+ validation_output = val_result .error ,
834865 )
835866
836867 validation_output = val_result .output
@@ -847,10 +878,13 @@ async def _generate_and_validate_code(
847878 else :
848879 return GenerationResult (
849880 success = False ,
881+ code = code ,
882+ expected_output = expected_output ,
850883 skip_reason = f"Code invalid per spec after { attempt } attempts" ,
851884 backend_used = backend_used ,
852885 generation_duration = total_duration ,
853886 attempts = attempt ,
887+ validation_output = validation_output ,
854888 )
855889
856890 # Success!
@@ -867,6 +901,8 @@ async def _generate_and_validate_code(
867901 # Should not reach here, but just in case
868902 return GenerationResult (
869903 success = False ,
904+ code = last_code ,
905+ expected_output = extract_expected_output (last_code ) if last_code else None ,
870906 skip_reason = "Generation failed after all retry attempts" ,
871907 backend_used = backend_used ,
872908 generation_duration = total_duration ,
@@ -956,8 +992,21 @@ async def run_multifile_iteration(
956992 files = extract_multifile_code (gen_result .output )
957993 if not files :
958994 print (" Failed to parse multi-file response" , file = sys .stderr )
995+ # Save raw output for debugging prompt issues
996+ skip = Skip (
997+ timestamp = timestamp ,
998+ skip_reason = "Failed to parse multi-file response from AI" ,
999+ generated_code = gen_result .output , # Raw output for debugging
1000+ feature_focus = feature_focus ,
1001+ complexity = complexity ,
1002+ backend_used = gen_result .backend ,
1003+ generation_duration = gen_result .duration_seconds ,
1004+ )
1005+ skip_dir = self .skip_reporter .report (skip )
1006+ print (f" Skip saved for inspection: { skip_dir .name } " , file = sys .stderr )
9591007 return IterationResult (
9601008 IterationStatus .SKIPPED ,
1009+ skip_dir = skip_dir ,
9611010 skip_reason = "Failed to parse multi-file response from AI" ,
9621011 )
9631012
@@ -976,8 +1025,23 @@ async def run_multifile_iteration(
9761025 f" Pre-validation failed for { filename } : { prevalidation_error } " ,
9771026 file = sys .stderr ,
9781027 )
1028+ # Save for inspection
1029+ skip = Skip (
1030+ timestamp = timestamp ,
1031+ skip_reason = f"Unsupported feature in { filename } : { prevalidation_error } " ,
1032+ generated_code = files .get ("main.spy" , "" ),
1033+ expected_output = expected_output ,
1034+ feature_focus = feature_focus ,
1035+ complexity = complexity ,
1036+ backend_used = gen_result .backend ,
1037+ generation_duration = gen_result .duration_seconds ,
1038+ source_files = files ,
1039+ )
1040+ skip_dir = self .skip_reporter .report (skip )
1041+ print (f" Skip saved for inspection: { skip_dir .name } " , file = sys .stderr )
9791042 return IterationResult (
9801043 IterationStatus .SKIPPED ,
1044+ skip_dir = skip_dir ,
9811045 skip_reason = f"Unsupported feature in { filename } : { prevalidation_error } " ,
9821046 )
9831047
@@ -990,15 +1054,47 @@ async def run_multifile_iteration(
9901054 f" Validation failed for { filename } : { val_result .error } " ,
9911055 file = sys .stderr ,
9921056 )
1057+ # Save for inspection
1058+ skip = Skip (
1059+ timestamp = timestamp ,
1060+ skip_reason = f"Validation backend error for { filename } " ,
1061+ generated_code = files .get ("main.spy" , "" ),
1062+ expected_output = expected_output ,
1063+ feature_focus = feature_focus ,
1064+ complexity = complexity ,
1065+ backend_used = gen_result .backend ,
1066+ generation_duration = gen_result .duration_seconds ,
1067+ source_files = files ,
1068+ validation_output = val_result .error ,
1069+ )
1070+ skip_dir = self .skip_reporter .report (skip )
1071+ print (f" Skip saved for inspection: { skip_dir .name } " , file = sys .stderr )
9931072 return IterationResult (
9941073 IterationStatus .SKIPPED ,
1074+ skip_dir = skip_dir ,
9951075 skip_reason = f"Validation backend error for { filename } " ,
9961076 )
9971077
9981078 if "INVALID" in val_result .output .upper ():
9991079 print (f" { filename } is invalid per spec, skipping" , file = sys .stderr )
1080+ # Save for inspection
1081+ skip = Skip (
1082+ timestamp = timestamp ,
1083+ skip_reason = f"{ filename } invalid per spec" ,
1084+ generated_code = files .get ("main.spy" , "" ),
1085+ expected_output = expected_output ,
1086+ feature_focus = feature_focus ,
1087+ complexity = complexity ,
1088+ backend_used = gen_result .backend ,
1089+ generation_duration = gen_result .duration_seconds ,
1090+ source_files = files ,
1091+ validation_output = val_result .output ,
1092+ )
1093+ skip_dir = self .skip_reporter .report (skip )
1094+ print (f" Skip saved for inspection: { skip_dir .name } " , file = sys .stderr )
10001095 return IterationResult (
10011096 IterationStatus .SKIPPED ,
1097+ skip_dir = skip_dir ,
10021098 skip_reason = f"{ filename } invalid per spec" ,
10031099 )
10041100
@@ -1264,6 +1360,7 @@ async def run(self, iterations: Optional[int] = None) -> int:
12641360 complexity ,
12651361 success = False ,
12661362 issue_type = IssueType .SKIPPED ,
1363+ skip_dir = result .skip_dir ,
12671364 duration = duration ,
12681365 skip_reason = result .skip_reason ,
12691366 )
0 commit comments