simplify sampling, add todo

casteryh · casteryh · commit bf3dbf097c0a · 2025-11-21T12:24:51.000-08:00
diff --git a/src/forge/data/rewards.py b/src/forge/data/rewards.py
@@ -4,6 +4,7 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
+import random
 import re
 
 
@@ -111,7 +112,7 @@ class LanguageReward:
         match_reward: Reward when detected language matches target (default: 1.0)
         no_match_reward: Reward when language doesn't match (default: 0.0)
         tag: Tag name to use (default "思考" for multilingual, can use "think", etc.)
-        debug: If True, print debug samples showing model outputs and detected language
+        debug: If True, print debug samples showing model outputs and detected language for every sample
         debug_sample_rate: Fraction of calls to debug (e.g., 0.1 = 10% of calls)
 
     Note: Requires langid to be installed. Install with: pip install langid
@@ -164,13 +165,9 @@ def __call__(self, prompt: str, response: str, target: str | None = None) -> flo
         Returns:
             match_reward if detected language matches target, no_match_reward otherwise
         """
-        # Increment counter for sampling
-        self._debug_counter += 1
-        should_debug = (
-            self.debug
-            and self.debug_sample_rate > 0
-            and (self._debug_counter % int(1 / self.debug_sample_rate)) == 0
-        )
+
+        # TODO: refactor pending https://github.com/meta-pytorch/torchforge/issues/187
+        should_debug = debug or (random.random() < self.debug_sample_rate)
 
         if not response:
             if should_debug: