@@ -100,7 +100,7 @@ class PPOLoss(LossModule):
100
100
``samples_mc_entropy`` will control how many
101
101
samples will be used to compute this estimate.
102
102
Defaults to ``1``.
103
- entropy_coeff: scalar | Mapping[str , scalar], optional): entropy multiplier when computing the total loss.
103
+ entropy_coeff: scalar | Mapping[NestedKey , scalar], optional): entropy multiplier when computing the total loss.
104
104
* **Scalar**: one value applied to the summed entropy of every action head.
105
105
* **Mapping** ``{head_name: coeff}`` gives an individual coefficient for each action-head's entropy.
106
106
Defaults to ``0.01``.
@@ -351,7 +351,7 @@ def __init__(
351
351
* ,
352
352
entropy_bonus : bool = True ,
353
353
samples_mc_entropy : int = 1 ,
354
- entropy_coeff : float | NestedKey | None = None ,
354
+ entropy_coeff : float | Mapping [ NestedKey , float ] | None = None ,
355
355
log_explained_variance : bool = True ,
356
356
critic_coeff : float | None = None ,
357
357
loss_critic_type : str = "smooth_l1" ,
@@ -459,9 +459,7 @@ def __init__(
459
459
460
460
if isinstance (entropy_coeff , Mapping ):
461
461
# Store the mapping for per-head coefficients
462
- self ._entropy_coeff_map = {
463
- str (k ): float (v ) for k , v in entropy_coeff .items ()
464
- }
462
+ self ._entropy_coeff_map = {k : float (v ) for k , v in entropy_coeff .items ()}
465
463
# Register an empty buffer for compatibility
466
464
self .register_buffer ("entropy_coeff" , torch .tensor (0.0 ))
467
465
elif isinstance (entropy_coeff , (float , int , torch .Tensor )):
@@ -974,7 +972,7 @@ class ClipPPOLoss(PPOLoss):
974
972
``samples_mc_entropy`` will control how many
975
973
samples will be used to compute this estimate.
976
974
Defaults to ``1``.
977
- entropy_coeff: (scalar | Mapping[str , scalar], optional): entropy multiplier when computing the total loss.
975
+ entropy_coeff: (scalar | Mapping[NesstedKey , scalar], optional): entropy multiplier when computing the total loss.
978
976
* **Scalar**: one value applied to the summed entropy of every action head.
979
977
* **Mapping** ``{head_name: coeff}`` gives an individual coefficient for each action-head's entropy.
980
978
Defaults to ``0.01``.
@@ -1079,7 +1077,7 @@ def __init__(
1079
1077
clip_epsilon : float = 0.2 ,
1080
1078
entropy_bonus : bool = True ,
1081
1079
samples_mc_entropy : int = 1 ,
1082
- entropy_coeff : float | Mapping [str | tuple | list , float ] | None = None ,
1080
+ entropy_coeff : float | Mapping [NestedKey , float ] | None = None ,
1083
1081
critic_coeff : float | None = None ,
1084
1082
loss_critic_type : str = "smooth_l1" ,
1085
1083
normalize_advantage : bool = False ,
@@ -1267,7 +1265,7 @@ class KLPENPPOLoss(PPOLoss):
1267
1265
``samples_mc_entropy`` will control how many
1268
1266
samples will be used to compute this estimate.
1269
1267
Defaults to ``1``.
1270
- entropy_coeff: scalar | Mapping[str , scalar], optional): entropy multiplier when computing the total loss.
1268
+ entropy_coeff: scalar | Mapping[NestedKey , scalar], optional): entropy multiplier when computing the total loss.
1271
1269
* **Scalar**: one value applied to the summed entropy of every action head.
1272
1270
* **Mapping** ``{head_name: coeff}`` gives an individual coefficient for each action-head's entropy.
1273
1271
Defaults to ``0.01``.
@@ -1373,7 +1371,7 @@ def __init__(
1373
1371
samples_mc_kl : int = 1 ,
1374
1372
entropy_bonus : bool = True ,
1375
1373
samples_mc_entropy : int = 1 ,
1376
- entropy_coeff : float | Mapping [str | tuple | list , float ] | None = None ,
1374
+ entropy_coeff : float | Mapping [NestedKey , float ] | None = None ,
1377
1375
critic_coeff : float | None = None ,
1378
1376
loss_critic_type : str = "smooth_l1" ,
1379
1377
normalize_advantage : bool = False ,
0 commit comments