Remove old debug code from main.py

casteryh · casteryh · commit 1a4d5fb0c266 · 2025-10-31T15:37:56.000-07:00
The old debug code was checking for &lt;think&gt; tags instead of &lt;思考&gt; tags,
which was misleading. The LanguageReward class now has its own debug mode
that correctly checks for the configured tag.
diff --git a/sandbox/grpo_language/main.py b/sandbox/grpo_language/main.py
@@ -155,31 +155,6 @@ async def evaluate_response(self, prompt: str, response: str, target: str) -> fl
                 reward_fn, "__name__", reward_fn.__class__.__name__
             )
 
-            # Debug logging for LanguageReward to see what's happening
-            if reward_fn_name == "LanguageReward":
-                import re
-
-                import langid
-
-                think_matches = re.findall(
-                    r"<\s*think\s*>(.*?)<\s*/\s*think\s*>",
-                    response,
-                    re.IGNORECASE | re.DOTALL,
-                )
-                if think_matches:
-                    content = " ".join(think_matches)
-                    detected_lang, confidence = langid.classify(content)
-                    print(
-                        f"[LanguageReward Debug] Reward={reward:.2f} | "
-                        f"Blocks={len(think_matches)} | Lang={detected_lang} | "
-                        f"Sample: {response[:80]}..."
-                    )
-                else:
-                    print(
-                        f"[LanguageReward Debug] Reward={reward:.2f} | "
-                        f"Blocks=0 | Sample: {response[:80]}..."
-                    )
-
             # per function reward
             record_metric(
                 f"reward/evaluate_response/sum_{reward_fn_name}_reward",