|
13 | 13 | from collections import defaultdict |
14 | 14 | from datetime import datetime |
15 | 15 | from typing import ( |
| 16 | + TYPE_CHECKING, |
16 | 17 | Any, |
17 | 18 | Callable, |
18 | 19 | Collection, |
|
23 | 24 | Set, |
24 | 25 | Tuple, |
25 | 26 | Type, |
26 | | - TYPE_CHECKING, |
27 | 27 | Union, |
28 | 28 | ) |
29 | 29 |
|
|
47 | 47 | from ray.rllib.algorithms.utils import ( |
48 | 48 | AggregatorActor, |
49 | 49 | _get_env_runner_bundles, |
50 | | - _get_offline_eval_runner_bundles, |
51 | 50 | _get_learner_bundles, |
52 | 51 | _get_main_process_bundle, |
| 52 | + _get_offline_eval_runner_bundles, |
53 | 53 | ) |
54 | 54 | from ray.rllib.callbacks.utils import make_callback |
55 | 55 | from ray.rllib.connectors.agent.obs_preproc import ObsPreprocessorConnector |
|
84 | 84 | from ray.rllib.execution.rollout_ops import synchronous_parallel_sample |
85 | 85 | from ray.rllib.offline import get_dataset_and_shards |
86 | 86 | from ray.rllib.offline.estimators import ( |
87 | | - OffPolicyEstimator, |
88 | | - ImportanceSampling, |
89 | | - WeightedImportanceSampling, |
90 | 87 | DirectMethod, |
91 | 88 | DoublyRobust, |
| 89 | + ImportanceSampling, |
| 90 | + OffPolicyEstimator, |
| 91 | + WeightedImportanceSampling, |
92 | 92 | ) |
93 | 93 | from ray.rllib.offline.offline_evaluator import OfflineEvaluator |
94 | 94 | from ray.rllib.policy.policy import Policy, PolicySpec |
95 | 95 | from ray.rllib.policy.sample_batch import DEFAULT_POLICY_ID, SampleBatch |
96 | | -from ray.rllib.utils import deep_update, FilterManager, force_list |
| 96 | +from ray.rllib.utils import FilterManager, deep_update, force_list |
97 | 97 | from ray.rllib.utils.actor_manager import FaultTolerantActorManager |
98 | 98 | from ray.rllib.utils.annotations import ( |
99 | 99 | DeveloperAPI, |
100 | 100 | ExperimentalAPI, |
101 | 101 | OldAPIStack, |
102 | | - override, |
103 | 102 | OverrideToImplementCustomLogic, |
104 | 103 | OverrideToImplementCustomLogic_CallToSuperRecommended, |
105 | 104 | PublicAPI, |
| 105 | + override, |
106 | 106 | ) |
107 | 107 | from ray.rllib.utils.checkpoints import ( |
108 | | - Checkpointable, |
109 | 108 | CHECKPOINT_VERSION, |
110 | 109 | CHECKPOINT_VERSION_LEARNER_AND_ENV_RUNNER, |
| 110 | + Checkpointable, |
111 | 111 | get_checkpoint_info, |
112 | 112 | try_import_msgpack, |
113 | 113 | ) |
|
134 | 134 | NUM_AGENT_STEPS_TRAINED, |
135 | 135 | NUM_AGENT_STEPS_TRAINED_LIFETIME, |
136 | 136 | NUM_ENV_STEPS_SAMPLED, |
| 137 | + NUM_ENV_STEPS_SAMPLED_FOR_EVALUATION_THIS_ITER, |
137 | 138 | NUM_ENV_STEPS_SAMPLED_LIFETIME, |
138 | 139 | NUM_ENV_STEPS_SAMPLED_THIS_ITER, |
139 | | - NUM_ENV_STEPS_SAMPLED_FOR_EVALUATION_THIS_ITER, |
140 | 140 | NUM_ENV_STEPS_TRAINED, |
141 | 141 | NUM_ENV_STEPS_TRAINED_LIFETIME, |
142 | 142 | NUM_EPISODES, |
|
147 | 147 | RESTORE_ENV_RUNNERS_TIMER, |
148 | 148 | RESTORE_EVAL_ENV_RUNNERS_TIMER, |
149 | 149 | RESTORE_OFFLINE_EVAL_RUNNERS_TIMER, |
| 150 | + STEPS_TRAINED_THIS_ITER_COUNTER, |
150 | 151 | SYNCH_ENV_CONNECTOR_STATES_TIMER, |
151 | 152 | SYNCH_EVAL_ENV_CONNECTOR_STATES_TIMER, |
152 | 153 | SYNCH_WORKER_WEIGHTS_TIMER, |
153 | 154 | TIMERS, |
154 | 155 | TRAINING_ITERATION_TIMER, |
155 | 156 | TRAINING_STEP_TIMER, |
156 | | - STEPS_TRAINED_THIS_ITER_COUNTER, |
157 | 157 | ) |
158 | 158 | from ray.rllib.utils.metrics.learner_info import LEARNER_INFO |
159 | 159 | from ray.rllib.utils.metrics.metrics_logger import MetricsLogger |
|
164 | 164 | ) |
165 | 165 | from ray.rllib.utils.replay_buffers import MultiAgentReplayBuffer, ReplayBuffer |
166 | 166 | from ray.rllib.utils.runners.runner_group import RunnerGroup |
167 | | -from ray.rllib.utils.serialization import deserialize_type, NOT_SERIALIZABLE |
| 167 | +from ray.rllib.utils.serialization import NOT_SERIALIZABLE, deserialize_type |
168 | 168 | from ray.rllib.utils.spaces import space_utils |
169 | 169 | from ray.rllib.utils.typing import ( |
170 | 170 | AgentConnectorDataType, |
|
191 | 191 | from ray.tune.execution.placement_groups import PlacementGroupFactory |
192 | 192 | from ray.tune.experiment.trial import ExportFormat |
193 | 193 | from ray.tune.logger import Logger, UnifiedLogger |
194 | | -from ray.tune.registry import ENV_CREATOR, _global_registry |
195 | | -from ray.tune.registry import get_trainable_cls |
| 194 | +from ray.tune.registry import ENV_CREATOR, _global_registry, get_trainable_cls |
196 | 195 | from ray.tune.resources import Resources |
197 | 196 | from ray.tune.result import TRAINING_ITERATION |
198 | 197 | from ray.tune.trainable import Trainable |
|
0 commit comments