style(nyz): polish flake8 style

PaParaZz1 · PaParaZz1 · commit ab5f6e791a63 · 2025-03-10T13:53:11.000+08:00
diff --git a/ding/reward_model/__init__.py b/ding/reward_model/__init__.py
@@ -15,4 +15,4 @@
 from .icm_reward_model import ICMRewardModel
 # LLM/VLM reward model and verifier
 from .math_reward_model import MathRewardModel
-from .math_rule_reward_model import MathRuleRewardModel
+from .math_rule_reward_model import MathRuleRewardModel
diff --git a/ding/reward_model/math_reward_model.py b/ding/reward_model/math_reward_model.py
@@ -42,4 +42,4 @@ def collect_data(self, data: list) -> None:
         pass
 
     def clear_data(self) -> None:
-        pass
+        pass
diff --git a/ding/reward_model/math_rule_reward_model.py b/ding/reward_model/math_rule_reward_model.py
@@ -41,7 +41,8 @@ def estimate(self, data: List[str]) -> List[Dict]:
         """
         # 1. parse the query to get question and predicted answer
         # 2. get the ground truth answer according to the question
-        # 3. calculate the reward based on the predicted answer and the ground truth answer (format error -2, answer error -1, correct 1)
+        # 3. calculate the reward based on the predicted answer and the ground truth answer
+        # (format error -2, answer error -1, correct 1)
         pass
 
     # rule-based reward model does not need training, thus the following methods are empty

-Original file line number
+Diff line change
         pass
     def clear_data(self) -> None:
 -        pass
 +        pass