Skip to content

Commit de40c73

Browse files
committed
fix bug, tested
1 parent 1771447 commit de40c73

File tree

3 files changed

+6
-4
lines changed

3 files changed

+6
-4
lines changed

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,3 +167,7 @@ applications/ColossalChat/wandb
167167
applications/ColossalChat/model
168168
applications/ColossalChat/eval
169169
applications/ColossalChat/rollouts
170+
applications/ColossalChat/*.txt
171+
applications/ColossalChat/*.db
172+
applications/ColossalChat/stdin
173+
applications/ColossalChat/*.zip

applications/ColossalChat/coati/distributed/consumer.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -132,9 +132,7 @@ def loop(self) -> None:
132132
format_acc = raw_batch["format_acc"][:, :, 0]
133133
ans_acc = raw_batch["ans_acc"][:, :, 0]
134134
response_len = (
135-
raw_batch["response_idx"][:, :, 1]
136-
- raw_batch["response_idx"][:, :, 0]
137-
+ 1
135+
raw_batch["response_idx"][:, :, 1] - raw_batch["response_idx"][:, :, 0] + 1
138136
).type(torch.float32)
139137
effective_group_mask = None
140138
if self.filter_range is not None and self.grpo_config.get("dynamic_batching", True):

applications/ColossalChat/coati/distributed/producer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,7 @@ def loop(self) -> None:
291291
reward_model_output = self.reward_model(
292292
outputs["input_ids"].view((-1, outputs["input_ids"].size(-1))),
293293
gt_answer=gt_answer,
294-
response_idx=outputs["response_idx"],
294+
response_idx=outputs["response_idx"].view((-1, 2)),
295295
)
296296
outputs["reward"] = (
297297
torch.tensor([value[0] for value in reward_model_output])

0 commit comments

Comments
 (0)