Skip to content

Commit 08499b4

Browse files
committed
add loggin
1 parent 4f50072 commit 08499b4

File tree

4 files changed

+21
-0
lines changed

4 files changed

+21
-0
lines changed

slime/code_golf_harbor_modal/configs/base.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ class RLConfig:
2727
harbor_rm_timeout_sec: int = 120
2828
harbor_length_bonus_weight: float = 0.2
2929
harbor_rm_profile: bool = False
30+
harbor_rm_log_samples: bool = False
3031

3132
@property
3233
def train_script(self) -> str:

slime/code_golf_harbor_modal/configs/qwen_8b_multi.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ def get_config() -> RLConfig:
2020
wandb_project="slime-code-golf",
2121
wandb_run_name_prefix="qwen8b-mbpp-harbor",
2222
harbor_rm_profile=True,
23+
harbor_rm_log_samples=True,
2324
slime_args=f"""
2425
# Model
2526
{QWEN3_8B_MODEL_ARGS}

slime/code_golf_harbor_modal/custom_rm.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ def _compose_reward(
112112

113113
async def _score_sample(sample: Any) -> float:
114114
profile_enabled = _get_env_bool("HARBOR_RM_PROFILE", default=False)
115+
log_samples = _get_env_bool("HARBOR_RM_LOG_SAMPLES", default=False)
115116
t0 = time.perf_counter()
116117
timings_s: dict[str, float] = {}
117118

@@ -175,6 +176,15 @@ def _mark(name: str) -> None:
175176
"candidate_bytes": candidate_size,
176177
"reward": reward,
177178
}
179+
if log_samples:
180+
print(
181+
"harbor_rm_sample: "
182+
f"task_id={label_payload.get('task_id')} "
183+
f"pass_rate={pass_rate:.6f} "
184+
f"reward={reward:.6f} "
185+
f"candidate_bytes={candidate_size} "
186+
f"reference_bytes={reference_size}"
187+
)
178188
if profile_enabled:
179189
timestamps = {**timings_s, "done": time.perf_counter()}
180190
timing_ms = {
@@ -219,6 +229,14 @@ def _mark(name: str) -> None:
219229
sample.metadata = {}
220230
sample.metadata["harbor_rm_error"] = repr(exc)
221231
print(f"harbor_rm_error: {exc!r}")
232+
if log_samples:
233+
print(
234+
"harbor_rm_sample_error: "
235+
f"task_id={label_payload.get('task_id')} "
236+
f"error={exc!r} "
237+
f"candidate_bytes={candidate_size} "
238+
f"reference_bytes={reference_size}"
239+
)
222240
if profile_enabled:
223241
timing_ms = {
224242
"total_until_error": round((time.perf_counter() - t0) * 1000, 2)

slime/code_golf_harbor_modal/modal_train.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ def _build_runtime_env(config: RLConfig, master_addr: str) -> dict:
115115
"HARBOR_RM_TIMEOUT_SEC": str(config.harbor_rm_timeout_sec),
116116
"HARBOR_LENGTH_BONUS_WEIGHT": str(config.harbor_length_bonus_weight),
117117
"HARBOR_RM_PROFILE": "1" if config.harbor_rm_profile else "0",
118+
"HARBOR_RM_LOG_SAMPLES": "1" if config.harbor_rm_log_samples else "0",
118119
}
119120
}
120121

0 commit comments

Comments
 (0)