Skip to content

Commit 4b12551

Browse files
darrenangleclaude
andcommitted
Apply steep penalty scoring across 140 poetic forms
Mass audit of all form verify() methods to fix lenient scoring that allowed models to get partial credit with many violations. Pattern applied (same as Lipogram fix): - 0 violations: 1.0 (perfect) - 1 violation: 0.5 - 2 violations: 0.25 - 3+ violations: 0.05 Files modified: - constrained.py: Abecedarian, Mesostic, Anaphora, PalindromePoem - novel.py: VowelPilgrimage, QuestionQuest, WhisperPoem, etc. - mathematical.py: FibonacciVerse, GoldenRatioVerse, etc. - japanese.py, haiku.py: Haiku, Senryu, Tanka, Katauta, Sedoka - rondeau.py, rondel.py, ballade.py, medieval.py: French forms - terza_rima.py, ghazal.py: TerzaRima, Ghazal - pantoum.py: Pantoum repetition scoring - clerihew.py, epigram.py, blues.py: Short/folk forms - world.py: Naani, Tanaga, regional forms - constraints/relational.py: EndWordPattern, Refrain 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <[email protected]>
1 parent 9c97301 commit 4b12551

File tree

18 files changed

+1140
-197
lines changed

18 files changed

+1140
-197
lines changed

.beads/issues.jsonl

Lines changed: 142 additions & 0 deletions
Large diffs are not rendered by default.

src/abide/constraints/relational.py

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,18 @@ def verify(self, poem: str | PoemStructure) -> VerificationResult:
260260
)
261261
scores.append(score)
262262

263-
overall_score = sum(scores) / len(scores) if scores else 0.0
263+
# Calculate overall score with steep exponential penalty
264+
# 0 violations: 1.0, 1 violation: 0.5, 2 violations: 0.25, 3+ violations: 0.05
265+
num_violations = sum(1 for r in rubric if not r.passed)
266+
if num_violations == 0:
267+
overall_score = 1.0
268+
elif num_violations == 1:
269+
overall_score = 0.5
270+
elif num_violations == 2:
271+
overall_score = 0.25
272+
else:
273+
overall_score = 0.05
274+
264275
overall_passed = all(r.passed for r in rubric)
265276

266277
return VerificationResult(
@@ -415,7 +426,18 @@ def verify(self, poem: str | PoemStructure) -> VerificationResult:
415426
if stanza_scores:
416427
scores.append(sum(stanza_scores) / len(stanza_scores))
417428

418-
overall_score = sum(scores) / len(scores) if scores else 0.0
429+
# Calculate overall score with steep exponential penalty
430+
# 0 violations: 1.0, 1 violation: 0.5, 2 violations: 0.25, 3+ violations: 0.05
431+
num_violations = sum(1 for r in rubric if not r.passed)
432+
if num_violations == 0:
433+
overall_score = 1.0
434+
elif num_violations == 1:
435+
overall_score = 0.5
436+
elif num_violations == 2:
437+
overall_score = 0.25
438+
else:
439+
overall_score = 0.05
440+
419441
overall_passed = all(r.passed for r in rubric)
420442

421443
return VerificationResult(

src/abide/forms/ballade.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,19 @@ def verify(self, poem: str | PoemStructure) -> VerificationResult:
234234
)
235235
scores.append(avg_c)
236236

237-
overall_score = sum(scores) / len(scores) if scores else 0.0
237+
# Count violations (rubric items that failed)
238+
violations = sum(1 for r in rubric if not r.passed)
239+
240+
# Steep penalty scoring: 0 violations = 1.0, 1 = 0.5, 2 = 0.25, 3+ = 0.05
241+
if violations == 0:
242+
overall_score = 1.0
243+
elif violations == 1:
244+
overall_score = 0.5
245+
elif violations == 2:
246+
overall_score = 0.25
247+
else:
248+
overall_score = 0.05
249+
238250
overall_passed = all(r.passed for r in rubric) if self.strict else overall_score >= 0.6
239251

240252
return VerificationResult(

src/abide/forms/blues.py

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -164,10 +164,27 @@ def verify(self, poem: str | PoemStructure) -> VerificationResult:
164164
)
165165
rhyme_scores.append(avg_rhyme)
166166

167-
if repetition_scores:
168-
scores.append(sum(repetition_scores) / len(repetition_scores))
169-
if rhyme_scores:
170-
scores.append(sum(rhyme_scores) / len(rhyme_scores))
167+
# Apply steep penalty based on violation count
168+
# Count violations in repetition and rhyme
169+
repetition_violations = sum(
170+
1 for score in repetition_scores if score < self.repetition_threshold
171+
)
172+
rhyme_violations = sum(1 for score in rhyme_scores if score < self.rhyme_threshold)
173+
total_violations = repetition_violations + rhyme_violations
174+
175+
# Steep penalty: 0=1.0, 1=0.5, 2=0.25, 3+=0.05
176+
if total_violations == 0:
177+
pattern_score = 1.0
178+
elif total_violations == 1:
179+
pattern_score = 0.5
180+
elif total_violations == 2:
181+
pattern_score = 0.25
182+
else:
183+
pattern_score = 0.05
184+
185+
# Add pattern score to overall scores
186+
if repetition_scores or rhyme_scores:
187+
scores.append(pattern_score)
171188

172189
overall_score = sum(scores) / len(scores) if scores else 0.0
173190
overall_passed = all(r.passed for r in rubric) if self.strict else overall_score >= 0.5

src/abide/forms/clerihew.py

Lines changed: 23 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ def verify(self, poem: str | PoemStructure) -> VerificationResult:
6666
structure = self._ensure_structure(poem)
6767

6868
rubric: list[RubricItem] = []
69-
scores: list[float] = []
69+
violations = 0
7070

7171
# Check line count (exactly 4)
7272
if structure.line_count == 4:
@@ -79,22 +79,22 @@ def verify(self, poem: str | PoemStructure) -> VerificationResult:
7979
passed=True,
8080
)
8181
)
82-
scores.append(1.0)
8382
else:
8483
rubric.append(
8584
RubricItem(
8685
criterion="Line count",
8786
expected="4",
8887
actual=str(structure.line_count),
89-
score=0.0 if structure.line_count > 6 else 0.5,
88+
score=0.0,
9089
passed=False,
9190
)
9291
)
93-
scores.append(0.0 if structure.line_count > 6 else 0.5)
92+
violations += 1
9493

9594
if structure.line_count < 4:
95+
# Can't verify further constraints without enough lines
9696
return VerificationResult(
97-
score=0.0,
97+
score=0.05,
9898
passed=False,
9999
rubric=rubric,
100100
constraint_name=self.name,
@@ -115,18 +115,17 @@ def verify(self, poem: str | PoemStructure) -> VerificationResult:
115115
passed=True,
116116
)
117117
)
118-
scores.append(1.0)
119118
else:
120119
rubric.append(
121120
RubricItem(
122121
criterion="First line contains name",
123122
expected="capitalized proper noun",
124123
actual=first_line,
125-
score=0.3,
124+
score=0.0,
126125
passed=False,
127126
)
128127
)
129-
scores.append(0.3)
128+
violations += 1
130129

131130
# Check AABB rhyme scheme
132131
end_words = [self._get_end_word(line) for line in structure.lines[:4]]
@@ -145,7 +144,8 @@ def verify(self, poem: str | PoemStructure) -> VerificationResult:
145144
passed=passed_12,
146145
)
147146
)
148-
scores.append(rhyme_12)
147+
if not passed_12:
148+
violations += 1
149149

150150
# Second couplet (lines 3-4)
151151
if len(end_words) >= 4:
@@ -161,10 +161,20 @@ def verify(self, poem: str | PoemStructure) -> VerificationResult:
161161
passed=passed_34,
162162
)
163163
)
164-
scores.append(rhyme_34)
165-
166-
overall_score = sum(scores) / len(scores) if scores else 0.0
167-
overall_passed = all(r.passed for r in rubric) if self.strict else overall_score >= 0.5
164+
if not passed_34:
165+
violations += 1
166+
167+
# Steep penalty scoring: 0 violations = 1.0, 1 = 0.5, 2 = 0.25, 3+ = 0.05
168+
if violations == 0:
169+
overall_score = 1.0
170+
elif violations == 1:
171+
overall_score = 0.5
172+
elif violations == 2:
173+
overall_score = 0.25
174+
else: # 3 or more violations
175+
overall_score = 0.05
176+
177+
overall_passed = violations == 0 if self.strict else overall_score >= 0.5
168178

169179
return VerificationResult(
170180
score=overall_score,

src/abide/forms/constrained.py

Lines changed: 50 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -83,9 +83,17 @@ def verify(self, poem: str | PoemStructure) -> VerificationResult:
8383
else:
8484
details.append(f"Line {i + 1}: ✗ empty line")
8585

86-
# Quadratic penalty for stricter GRPO training
87-
linear_letter = matches / max(1, min(len(self.letters), len(structure.lines)))
88-
letter_score = linear_letter**2
86+
# Steep penalties for GRPO training: 0 violations = 1.0, 1-2 = partial, 3+ = near zero
87+
expected_matches = min(len(self.letters), len(structure.lines))
88+
violations = expected_matches - matches
89+
if violations == 0:
90+
letter_score = 1.0
91+
elif violations == 1:
92+
letter_score = 0.5
93+
elif violations == 2:
94+
letter_score = 0.25
95+
else:
96+
letter_score = 0.05
8997

9098
# Combine scores
9199
score = line_result.score * 0.1 + letter_score * 0.9
@@ -335,9 +343,17 @@ def verify(self, poem: str | PoemStructure) -> VerificationResult:
335343
else:
336344
details.append(f"Line {i + 1}: ✗ missing '{target_letter}' in middle")
337345

338-
# Quadratic penalty for stricter GRPO training
339-
linear_mesostic = matches / max(1, min(len(self.target_word), len(structure.lines)))
340-
mesostic_score = linear_mesostic**2
346+
# Steep penalties for GRPO training: 0 violations = 1.0, 1-2 = partial, 3+ = near zero
347+
expected_matches = min(len(self.target_word), len(structure.lines))
348+
violations = expected_matches - matches
349+
if violations == 0:
350+
mesostic_score = 1.0
351+
elif violations == 1:
352+
mesostic_score = 0.5
353+
elif violations == 2:
354+
mesostic_score = 0.25
355+
else:
356+
mesostic_score = 0.05
341357

342358
# Combine scores
343359
score = line_result.score * 0.1 + mesostic_score * 0.9
@@ -440,12 +456,17 @@ def verify(self, poem: str | PoemStructure) -> VerificationResult:
440456
opening_counts = Counter(openings)
441457
detected_phrase, repeats = opening_counts.most_common(1)[0]
442458

443-
# Score based on repeats - quadratic penalty for stricter GRPO training
459+
# Score based on repeats - steep penalties for GRPO training
444460
if repeats >= self.min_repeats:
445461
anaphora_score = 1.0
446462
else:
447-
linear_anaphora = repeats / self.min_repeats
448-
anaphora_score = linear_anaphora**2
463+
violations = self.min_repeats - repeats
464+
if violations == 1:
465+
anaphora_score = 0.5
466+
elif violations == 2:
467+
anaphora_score = 0.25
468+
else:
469+
anaphora_score = 0.05
449470

450471
# Combine scores
451472
score = line_result.score * 0.1 + anaphora_score * 0.9
@@ -522,9 +543,16 @@ def verify(self, poem: str | PoemStructure) -> VerificationResult:
522543
palindrome_lines = sum(
523544
1 for line in structure.lines if self._is_letter_palindrome(line)
524545
)
525-
# Quadratic penalty for stricter GRPO training
526-
linear_palindrome = palindrome_lines / max(1, len(structure.lines))
527-
palindrome_score = linear_palindrome**2
546+
# Steep penalties for GRPO training: 0 violations = 1.0, 1-2 = partial, 3+ = near zero
547+
violations = len(structure.lines) - palindrome_lines
548+
if violations == 0:
549+
palindrome_score = 1.0
550+
elif violations == 1:
551+
palindrome_score = 0.5
552+
elif violations == 2:
553+
palindrome_score = 0.25
554+
else:
555+
palindrome_score = 0.05
528556
else:
529557
# Word level: first half of lines mirror second half
530558
lines = [line.strip().lower() for line in structure.lines]
@@ -541,9 +569,16 @@ def verify(self, poem: str | PoemStructure) -> VerificationResult:
541569
if lines[i] == lines[n - 1 - i]:
542570
matches += 1
543571

544-
# Quadratic penalty for stricter GRPO training
545-
linear_palindrome = matches / max(1, comparisons)
546-
palindrome_score = linear_palindrome**2
572+
# Steep penalties for GRPO training: 0 violations = 1.0, 1-2 = partial, 3+ = near zero
573+
violations = comparisons - matches
574+
if violations == 0:
575+
palindrome_score = 1.0
576+
elif violations == 1:
577+
palindrome_score = 0.5
578+
elif violations == 2:
579+
palindrome_score = 0.25
580+
else:
581+
palindrome_score = 0.05
547582

548583
# Combine scores
549584
score = line_result.score * 0.1 + palindrome_score * 0.9

src/abide/forms/epigram.py

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -70,26 +70,43 @@ def __init__(
7070
def verify(self, poem: str | PoemStructure) -> VerificationResult:
7171
structure = self._ensure_structure(poem)
7272

73-
# Check line count - quadratic penalty for stricter GRPO training
73+
# Check line count - steep penalties for strict GRPO training
74+
# Perfect = 1.0, 1 violation = 0.5, 2 violations = 0.25, 3+ = 0.05
7475
if structure.line_count < self.min_lines:
75-
linear_score = structure.line_count / self.min_lines
76+
violations = self.min_lines - structure.line_count
77+
if violations == 1:
78+
score = 0.5
79+
elif violations == 2:
80+
score = 0.25
81+
else:
82+
score = 0.05
7683
return VerificationResult(
77-
score=linear_score**2,
84+
score=score,
7885
passed=False,
7986
rubric=[],
8087
constraint_name=self.name,
8188
constraint_type=self.constraint_type,
82-
details={"error": f"Too few lines (minimum {self.min_lines})"},
89+
details={
90+
"error": f"Too few lines (minimum {self.min_lines}, got {structure.line_count})"
91+
},
8392
)
8493
if structure.line_count > self.max_lines:
85-
linear_score = self.max_lines / structure.line_count
94+
violations = structure.line_count - self.max_lines
95+
if violations == 1:
96+
score = 0.5
97+
elif violations == 2:
98+
score = 0.25
99+
else:
100+
score = 0.05
86101
return VerificationResult(
87-
score=linear_score**2,
102+
score=score,
88103
passed=False,
89104
rubric=[],
90105
constraint_name=self.name,
91106
constraint_type=self.constraint_type,
92-
details={"error": f"Too many lines (maximum {self.max_lines})"},
107+
details={
108+
"error": f"Too many lines (maximum {self.max_lines}, got {structure.line_count})"
109+
},
93110
)
94111

95112
# For 2-line epigrams, check couplet rhyme

0 commit comments

Comments
 (0)