We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 4e7bba7 commit a02a9d4Copy full SHA for a02a9d4
d3rlpy/algos/qlearning/torch/cal_ql_impl.py
@@ -18,4 +18,4 @@ def _compute_policy_is_values(
18
value_obs=value_obs,
19
returns_to_go=returns_to_go,
20
)
21
- return torch.maximum(values, returns_to_go), log_probs
+ return torch.maximum(values, returns_to_go.view(1, -1, 1)), log_probs
0 commit comments