Skip to content

Commit 1a4d5fb

Browse files
committed
Remove old debug code from main.py
The old debug code was checking for <think> tags instead of <思考> tags, which was misleading. The LanguageReward class now has its own debug mode that correctly checks for the configured tag.
1 parent 2625b28 commit 1a4d5fb

File tree

1 file changed

+0
-25
lines changed

1 file changed

+0
-25
lines changed

sandbox/grpo_language/main.py

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -155,31 +155,6 @@ async def evaluate_response(self, prompt: str, response: str, target: str) -> fl
155155
reward_fn, "__name__", reward_fn.__class__.__name__
156156
)
157157

158-
# Debug logging for LanguageReward to see what's happening
159-
if reward_fn_name == "LanguageReward":
160-
import re
161-
162-
import langid
163-
164-
think_matches = re.findall(
165-
r"<\s*think\s*>(.*?)<\s*/\s*think\s*>",
166-
response,
167-
re.IGNORECASE | re.DOTALL,
168-
)
169-
if think_matches:
170-
content = " ".join(think_matches)
171-
detected_lang, confidence = langid.classify(content)
172-
print(
173-
f"[LanguageReward Debug] Reward={reward:.2f} | "
174-
f"Blocks={len(think_matches)} | Lang={detected_lang} | "
175-
f"Sample: {response[:80]}..."
176-
)
177-
else:
178-
print(
179-
f"[LanguageReward Debug] Reward={reward:.2f} | "
180-
f"Blocks=0 | Sample: {response[:80]}..."
181-
)
182-
183158
# per function reward
184159
record_metric(
185160
f"reward/evaluate_response/sum_{reward_fn_name}_reward",

0 commit comments

Comments
 (0)