ray-project
diff --git a/‎pyproject.toml‎
Lines changed: 0 additions & 16 deletions b/‎pyproject.toml‎
Lines changed: 0 additions & 16 deletions
diff --git a/‎rllib/algorithms/__init__.py‎
Lines changed: 1 addition & 2 deletions b/‎rllib/algorithms/__init__.py‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎rllib/algorithms/algorithm.py‎
Lines changed: 12 additions & 13 deletions b/‎rllib/algorithms/algorithm.py‎
Lines changed: 12 additions & 13 deletions
diff --git a/‎rllib/algorithms/algorithm_config.py‎
Lines changed: 9 additions & 10 deletions b/‎rllib/algorithms/algorithm_config.py‎
Lines changed: 9 additions & 10 deletions
diff --git a/‎rllib/algorithms/appo/appo.py‎
Lines changed: 4 additions & 3 deletions b/‎rllib/algorithms/appo/appo.py‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎rllib/algorithms/appo/appo_tf_policy.py‎
Lines changed: 11 additions & 11 deletions b/‎rllib/algorithms/appo/appo_tf_policy.py‎
Lines changed: 11 additions & 11 deletions
diff --git a/‎rllib/algorithms/appo/appo_torch_policy.py‎
Lines changed: 9 additions & 8 deletions b/‎rllib/algorithms/appo/appo_torch_policy.py‎
Lines changed: 9 additions & 8 deletions
diff --git a/‎rllib/algorithms/appo/default_appo_rl_module.py‎
Lines changed: 2 additions & 3 deletions b/‎rllib/algorithms/appo/default_appo_rl_module.py‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎rllib/algorithms/appo/tests/test_appo.py‎
Lines changed: 1 addition & 1 deletion b/‎rllib/algorithms/appo/tests/test_appo.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎rllib/algorithms/appo/tests/test_appo_learner.py‎
Lines changed: 3 additions & 3 deletions b/‎rllib/algorithms/appo/tests/test_appo_learner.py‎
Lines changed: 3 additions & 3 deletions
@@ -65,22 +65,6 @@ afterray = ["psutil", "setproctitle"]
 "python/ray/__init__.py" = ["I"]
 "python/ray/dag/__init__.py" = ["I"]
 "python/ray/air/__init__.py" = ["I"]
-# "rllib/__init__.py" = ["I"]
-# "rllib/benchmarks/*" = ["I"]
-# "rllib/connectors/*" = ["I"]
-# "rllib/evaluation/*" = ["I"]
-# "rllib/models/*" = ["I"]
-"rllib/utils/*" = ["I"]
-"rllib/algorithms/*" = ["I"]
-# "rllib/core/*" = ["I"]
-# "rllib/examples/*" = ["I"]
-# "rllib/offline/*" = ["I"]
-# "rllib/tests/*" = ["I"]
-# "rllib/callbacks/*" = ["I"]
-# "rllib/env/*" = ["I"]
-# "rllib/execution/*" = ["I"]
-# "rllib/policy/*" = ["I"]
-# "rllib/tuned_examples/*" = ["I"]
 "release/*" = ["I"]
 
 # TODO(matthewdeng): Remove this line
 
@@ -6,15 +6,14 @@
 from ray.rllib.algorithms.dqn.dqn import DQN, DQNConfig
 from ray.rllib.algorithms.impala.impala import (
     IMPALA,
-    IMPALAConfig,
     Impala,
+    IMPALAConfig,
     ImpalaConfig,
 )
 from ray.rllib.algorithms.marwil.marwil import MARWIL, MARWILConfig
 from ray.rllib.algorithms.ppo.ppo import PPO, PPOConfig
 from ray.rllib.algorithms.sac.sac import SAC, SACConfig
 
-
 __all__ = [
     "Algorithm",
     "AlgorithmConfig",
 
@@ -13,6 +13,7 @@
 from collections import defaultdict
 from datetime import datetime
 from typing import (
+    TYPE_CHECKING,
     Any,
     Callable,
     Collection,
@@ -23,7 +24,6 @@
     Set,
     Tuple,
     Type,
-    TYPE_CHECKING,
     Union,
 )
 
@@ -47,9 +47,9 @@
 from ray.rllib.algorithms.utils import (
     AggregatorActor,
     _get_env_runner_bundles,
-    _get_offline_eval_runner_bundles,
     _get_learner_bundles,
     _get_main_process_bundle,
+    _get_offline_eval_runner_bundles,
 )
 from ray.rllib.callbacks.utils import make_callback
 from ray.rllib.connectors.agent.obs_preproc import ObsPreprocessorConnector
@@ -84,30 +84,30 @@
 from ray.rllib.execution.rollout_ops import synchronous_parallel_sample
 from ray.rllib.offline import get_dataset_and_shards
 from ray.rllib.offline.estimators import (
-    OffPolicyEstimator,
-    ImportanceSampling,
-    WeightedImportanceSampling,
     DirectMethod,
     DoublyRobust,
+    ImportanceSampling,
+    OffPolicyEstimator,
+    WeightedImportanceSampling,
 )
 from ray.rllib.offline.offline_evaluator import OfflineEvaluator
 from ray.rllib.policy.policy import Policy, PolicySpec
 from ray.rllib.policy.sample_batch import DEFAULT_POLICY_ID, SampleBatch
-from ray.rllib.utils import deep_update, FilterManager, force_list
+from ray.rllib.utils import FilterManager, deep_update, force_list
 from ray.rllib.utils.actor_manager import FaultTolerantActorManager
 from ray.rllib.utils.annotations import (
     DeveloperAPI,
     ExperimentalAPI,
     OldAPIStack,
-    override,
     OverrideToImplementCustomLogic,
     OverrideToImplementCustomLogic_CallToSuperRecommended,
     PublicAPI,
+    override,
 )
 from ray.rllib.utils.checkpoints import (
-    Checkpointable,
     CHECKPOINT_VERSION,
     CHECKPOINT_VERSION_LEARNER_AND_ENV_RUNNER,
+    Checkpointable,
     get_checkpoint_info,
     try_import_msgpack,
 )
@@ -134,9 +134,9 @@
     NUM_AGENT_STEPS_TRAINED,
     NUM_AGENT_STEPS_TRAINED_LIFETIME,
     NUM_ENV_STEPS_SAMPLED,
+    NUM_ENV_STEPS_SAMPLED_FOR_EVALUATION_THIS_ITER,
     NUM_ENV_STEPS_SAMPLED_LIFETIME,
     NUM_ENV_STEPS_SAMPLED_THIS_ITER,
-    NUM_ENV_STEPS_SAMPLED_FOR_EVALUATION_THIS_ITER,
     NUM_ENV_STEPS_TRAINED,
     NUM_ENV_STEPS_TRAINED_LIFETIME,
     NUM_EPISODES,
@@ -147,13 +147,13 @@
     RESTORE_ENV_RUNNERS_TIMER,
     RESTORE_EVAL_ENV_RUNNERS_TIMER,
     RESTORE_OFFLINE_EVAL_RUNNERS_TIMER,
+    STEPS_TRAINED_THIS_ITER_COUNTER,
     SYNCH_ENV_CONNECTOR_STATES_TIMER,
     SYNCH_EVAL_ENV_CONNECTOR_STATES_TIMER,
     SYNCH_WORKER_WEIGHTS_TIMER,
     TIMERS,
     TRAINING_ITERATION_TIMER,
     TRAINING_STEP_TIMER,
-    STEPS_TRAINED_THIS_ITER_COUNTER,
 )
 from ray.rllib.utils.metrics.learner_info import LEARNER_INFO
 from ray.rllib.utils.metrics.metrics_logger import MetricsLogger
@@ -164,7 +164,7 @@
 )
 from ray.rllib.utils.replay_buffers import MultiAgentReplayBuffer, ReplayBuffer
 from ray.rllib.utils.runners.runner_group import RunnerGroup
-from ray.rllib.utils.serialization import deserialize_type, NOT_SERIALIZABLE
+from ray.rllib.utils.serialization import NOT_SERIALIZABLE, deserialize_type
 from ray.rllib.utils.spaces import space_utils
 from ray.rllib.utils.typing import (
     AgentConnectorDataType,
@@ -191,8 +191,7 @@
 from ray.tune.execution.placement_groups import PlacementGroupFactory
 from ray.tune.experiment.trial import ExportFormat
 from ray.tune.logger import Logger, UnifiedLogger
-from ray.tune.registry import ENV_CREATOR, _global_registry
-from ray.tune.registry import get_trainable_cls
+from ray.tune.registry import ENV_CREATOR, _global_registry, get_trainable_cls
 from ray.tune.resources import Resources
 from ray.tune.result import TRAINING_ITERATION
 from ray.tune.trainable import Trainable
 
@@ -1,10 +1,11 @@
 import copy
 import dataclasses
-from enum import Enum
 import logging
 import math
 import sys
+from enum import Enum
 from typing import (
+    TYPE_CHECKING,
     Any,
     Callable,
     Collection,
@@ -13,16 +14,20 @@
     Optional,
     Tuple,
     Type,
-    TYPE_CHECKING,
     Union,
 )
-from typing_extensions import Self
 
 import gymnasium as gym
 import tree
 from packaging import version
+from typing_extensions import Self
 
 import ray
+from ray._common.deprecation import (
+    DEPRECATED_VALUE,
+    Deprecated,
+    deprecation_warning,
+)
 from ray.rllib.callbacks.callbacks import RLlibCallback
 from ray.rllib.connectors.connector_v2 import ConnectorV2
 from ray.rllib.core import DEFAULT_MODULE_ID
@@ -34,7 +39,7 @@
 from ray.rllib.core.rl_module.default_model_config import DefaultModelConfig
 from ray.rllib.core.rl_module.multi_rl_module import MultiRLModuleSpec
 from ray.rllib.core.rl_module.rl_module import RLModuleSpec
-from ray.rllib.env import INPUT_ENV_SPACES, INPUT_ENV_SINGLE_SPACES
+from ray.rllib.env import INPUT_ENV_SINGLE_SPACES, INPUT_ENV_SPACES
 from ray.rllib.env.multi_agent_env import MultiAgentEnv
 from ray.rllib.env.wrappers.atari_wrappers import is_atari
 from ray.rllib.evaluation.collectors.sample_collector import SampleCollector
@@ -49,11 +54,6 @@
     OldAPIStack,
     OverrideToImplementCustomLogic_CallToSuperRecommended,
 )
-from ray._common.deprecation import (
-    DEPRECATED_VALUE,
-    Deprecated,
-    deprecation_warning,
-)
 from ray.rllib.utils.framework import try_import_tf, try_import_torch
 from ray.rllib.utils.from_config import NotProvided, from_config
 from ray.rllib.utils.schedules.scheduler import Scheduler
@@ -84,7 +84,6 @@
 from ray.util import log_once
 from ray.util.placement_group import PlacementGroup
 
-
 if TYPE_CHECKING:
     from ray.rllib.algorithms.algorithm import Algorithm
     from ray.rllib.core.learner import Learner
 
@@ -10,23 +10,24 @@
 https://arxiv.org/pdf/1912.00167
 """
 
+import logging
 from typing import Optional, Type
+
 from typing_extensions import Self
-import logging
 
+from ray._common.deprecation import DEPRECATED_VALUE, deprecation_warning
 from ray.rllib.algorithms.algorithm_config import AlgorithmConfig, NotProvided
 from ray.rllib.algorithms.impala.impala import IMPALA, IMPALAConfig
 from ray.rllib.core.rl_module.rl_module import RLModuleSpec
 from ray.rllib.policy.policy import Policy
 from ray.rllib.utils.annotations import override
-from ray._common.deprecation import DEPRECATED_VALUE, deprecation_warning
 from ray.rllib.utils.metrics import (
     LAST_TARGET_UPDATE_TS,
+    LEARNER_STATS_KEY,
     NUM_AGENT_STEPS_SAMPLED,
     NUM_ENV_STEPS_SAMPLED,
     NUM_TARGET_UPDATES,
 )
-from ray.rllib.utils.metrics import LEARNER_STATS_KEY
 
 logger = logging.getLogger(__name__)
 
 
@@ -5,37 +5,37 @@
 Keep in sync with changes to VTraceTFPolicy.
 """
 
-import numpy as np
 import logging
-import gymnasium as gym
 from typing import Dict, List, Optional, Type, Union
 
+import gymnasium as gym
+import numpy as np
+
 from ray.rllib.algorithms.appo.utils import make_appo_models
 from ray.rllib.algorithms.impala import vtrace_tf as vtrace
 from ray.rllib.algorithms.impala.impala_tf_policy import (
-    _make_time_major,
     VTraceClipGradients,
     VTraceOptimizer,
+    _make_time_major,
 )
 from ray.rllib.evaluation.postprocessing import (
+    Postprocessing,
     compute_bootstrap_value,
     compute_gae_for_sample_batch,
-    Postprocessing,
 )
-from ray.rllib.models.tf.tf_action_dist import Categorical
-from ray.rllib.policy.sample_batch import SampleBatch
+from ray.rllib.models.modelv2 import ModelV2
+from ray.rllib.models.tf.tf_action_dist import Categorical, TFActionDistribution
 from ray.rllib.policy.dynamic_tf_policy_v2 import DynamicTFPolicyV2
 from ray.rllib.policy.eager_tf_policy_v2 import EagerTFPolicyV2
+from ray.rllib.policy.sample_batch import SampleBatch
 from ray.rllib.policy.tf_mixins import (
     EntropyCoeffSchedule,
-    LearningRateSchedule,
-    KLCoeffMixin,
-    ValueNetworkMixin,
     GradStatsMixin,
+    KLCoeffMixin,
+    LearningRateSchedule,
     TargetNetworkMixin,
+    ValueNetworkMixin,
 )
-from ray.rllib.models.modelv2 import ModelV2
-from ray.rllib.models.tf.tf_action_dist import TFActionDistribution
 from ray.rllib.utils.annotations import (
     override,
 )
 
@@ -5,37 +5,38 @@
 Keep in sync with changes to VTraceTFPolicy.
 """
 
-import gymnasium as gym
-import numpy as np
 import logging
 from typing import Any, Dict, List, Optional, Type, Union
 
+import gymnasium as gym
+import numpy as np
+
 import ray
-from ray.rllib.algorithms.appo.utils import make_appo_models
 import ray.rllib.algorithms.impala.vtrace_torch as vtrace
+from ray.rllib.algorithms.appo.utils import make_appo_models
 from ray.rllib.algorithms.impala.impala_torch_policy import (
-    make_time_major,
     VTraceOptimizer,
+    make_time_major,
 )
 from ray.rllib.evaluation.postprocessing import (
+    Postprocessing,
     compute_bootstrap_value,
     compute_gae_for_sample_batch,
-    Postprocessing,
 )
 from ray.rllib.models.action_dist import ActionDistribution
 from ray.rllib.models.modelv2 import ModelV2
 from ray.rllib.models.torch.torch_action_dist import (
-    TorchDistributionWrapper,
     TorchCategorical,
+    TorchDistributionWrapper,
 )
 from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
 from ray.rllib.policy.sample_batch import SampleBatch
 from ray.rllib.policy.torch_mixins import (
     EntropyCoeffSchedule,
-    LearningRateSchedule,
     KLCoeffMixin,
-    ValueNetworkMixin,
+    LearningRateSchedule,
     TargetNetworkMixin,
+    ValueNetworkMixin,
 )
 from ray.rllib.policy.torch_policy_v2 import TorchPolicyV2
 from ray.rllib.utils.annotations import override
 
@@ -8,12 +8,11 @@
     TARGET_NETWORK_ACTION_DIST_INPUTS,
     TargetNetworkAPI,
 )
-from ray.rllib.utils.typing import NetworkType
-
 from ray.rllib.utils.annotations import (
-    override,
     OverrideToImplementCustomLogic_CallToSuperRecommended,
+    override,
 )
+from ray.rllib.utils.typing import NetworkType
 from ray.util.annotations import DeveloperAPI
 
 
 
@@ -11,9 +11,9 @@
     NUM_ENV_STEPS_SAMPLED_LIFETIME,
 )
 from ray.rllib.utils.test_utils import (
+    check_compute_single_action,
     check_train_results,
     check_train_results_new_api_stack,
-    check_compute_single_action,
 )
 
 
 
@@ -1,6 +1,6 @@
 import unittest
-import numpy as np
 
+import numpy as np
 import tree  # pip install dm_tree
 
 import ray
@@ -13,7 +13,6 @@
 from ray.rllib.utils.metrics import LEARNER_RESULTS
 from ray.rllib.utils.torch_utils import convert_to_torch_tensor
 
-
 frag_length = 50
 
 FAKE_BATCH = {
@@ -119,7 +118,8 @@ def test_kl_coeff_changes(self):
 
 
 if __name__ == "__main__":
-    import pytest
     import sys
 
+    import pytest
+
     sys.exit(pytest.main(["-v", __file__]))
Original file line number	Diff line number	Diff line change
`@@ -8,12 +8,11 @@`
`8`	`8`	`TARGET_NETWORK_ACTION_DIST_INPUTS,`
`9`	`9`	`TargetNetworkAPI,`
`10`	`10`	`)`
`11`		`-from ray.rllib.utils.typing import NetworkType`
`12`		`-`
`13`	`11`	`from ray.rllib.utils.annotations import (`
`14`		`- override,`
`15`	`12`	`OverrideToImplementCustomLogic_CallToSuperRecommended,`
	`13`	`+ override,`
`16`	`14`	`)`
	`15`	`+from ray.rllib.utils.typing import NetworkType`
`17`	`16`	`from ray.util.annotations import DeveloperAPI`
`18`	`17`
`19`	`18`
Original file line number	Diff line number	Diff line change
`@@ -11,9 +11,9 @@`
`11`	`11`	`NUM_ENV_STEPS_SAMPLED_LIFETIME,`
`12`	`12`	`)`
`13`	`13`	`from ray.rllib.utils.test_utils import (`
	`14`	`+ check_compute_single_action,`
`14`	`15`	`check_train_results,`
`15`	`16`	`check_train_results_new_api_stack,`
`16`		`- check_compute_single_action,`
`17`	`17`	`)`
`18`	`18`
`19`	`19`