|
17 | 17 | --help Show this message. |
18 | 18 | --max-step=<n> Maximum number of steps to run environment [default: 5e6]. |
19 | 19 | --run-path=<path> The sub-directory name for model and summary statistics [default: ppo]. |
20 | | - --load-model Whether to load the model or randomly initialize [default: False]. |
21 | | - --train-model Whether to train model, or only run inference [default: True]. |
22 | | - --summary-freq=<n> Frequency at which to save training statistics [default: 5000]. |
23 | | - --save-freq=<n> Frequency at which to save model [default: 20000]. |
| 20 | + --load Whether to load the model or randomly initialize [default: False]. |
| 21 | + --train Whether to train model, or only run inference [default: True]. |
| 22 | + --summary-freq=<n> Frequency at which to save training statistics [default: 10000]. |
| 23 | + --save-freq=<n> Frequency at which to save model [default: 50000]. |
24 | 24 | --gamma=<n> Reward discount rate [default: 0.99]. |
25 | 25 | --lambd=<n> Lambda parameter for GAE [default: 0.95]. |
26 | 26 | --time-horizon=<n> How many steps to collect per agent before adding to buffer [default: 2048]. |
|
40 | 40 | max_steps = float(options['--max-step']) |
41 | 41 | model_path = './models/{}'.format(str(options['--run-path'])) |
42 | 42 | summary_path = './summaries/{}'.format(str(options['--run-path'])) |
43 | | -load_model = options['--load-model'] |
44 | | -train_model = options['--train-model'] |
| 43 | +load_model = options['--load'] |
| 44 | +train_model = options['--train'] |
45 | 45 | summary_freq = int(options['--summary-freq']) |
46 | 46 | save_freq = int(options['--save-freq']) |
47 | 47 | env_name = options['<env>'] |
|
93 | 93 | summary_writer = tf.summary.FileWriter(summary_path) |
94 | 94 | info = env.reset(train_mode=train_model)[brain_name] |
95 | 95 | trainer = Trainer(ppo_model, sess, info, is_continuous, use_observations) |
96 | | - while steps <= max_steps: |
| 96 | + while steps <= max_steps or not train_model: |
97 | 97 | if env.global_done: |
98 | 98 | info = env.reset(train_mode=train_model)[brain_name] |
99 | 99 | # Decide and take an action |
|
0 commit comments