|
| 1 | +dataset: |
| 2 | + data_path: '$ALFWORLD_DATA/json_2.1.1/train' |
| 3 | + eval_id_data_path: '$ALFWORLD_DATA/json_2.1.1/valid_seen' # null/None to disable |
| 4 | + eval_ood_data_path: '$ALFWORLD_DATA/json_2.1.1/valid_unseen' # null/None to disable |
| 5 | + num_train_games: -1 # max training games (<=0 indicates full dataset) |
| 6 | + num_eval_games: -1 # max evaluation games (<=0 indicates full dataset) |
| 7 | + |
| 8 | +logic: |
| 9 | + domain: '$ALFWORLD_DATA/logic/alfred.pddl' # PDDL domain file that defines the world dynamics |
| 10 | + grammar: '$ALFWORLD_DATA/logic/alfred.twl2' # Grammar file that defines the text feedbacks |
| 11 | + |
| 12 | +env: |
| 13 | + type: 'AlfredTWEnv' # 'AlfredTWEnv' or 'AlfredThorEnv' or 'AlfredHybrid' |
| 14 | + regen_game_files: False # check if game is solvable by expert and save to game.tw-pddl file |
| 15 | + domain_randomization: False # shuffle Textworld print order and object id nums |
| 16 | + task_types: [1, 2, 3, 4, 5, 6] # task-type ids: 1 - Pick & Place, 2 - Examine in Light, 3 - Clean & Place, 4 - Heat & Place, 5 - Cool & Place, 6 - Pick Two & Place |
| 17 | + expert_timeout_steps: 150 # max steps before timeout for expert to solve the task |
| 18 | + expert_type: "handcoded" # 'handcoded' or 'downward'. Note: the downward planner is very slow for real-time use |
| 19 | + goal_desc_human_anns_prob: 0.0 # prob of using human-annotated goal language instead of templated goals (1.0 indicates all human annotations from ALFRED) |
| 20 | + |
| 21 | + hybrid: |
| 22 | + start_eps: 100000 # starting episode of hybrid training, tw-only training upto this point |
| 23 | + thor_prob: 0.5 # prob of AlfredThorEnv during hybrid training |
| 24 | + eval_mode: "tw" # 'tw' or 'thor' - env used for evaluation during hybrid training |
| 25 | + |
| 26 | + thor: |
| 27 | + screen_width: 300 # width of THOR window |
| 28 | + screen_height: 300 # height of THOR window |
| 29 | + smooth_nav: False # smooth rotations, looks, and translations during navigation (very slow) |
| 30 | + save_frames_to_disk: False # save frame PNGs to disk (useful for making videos) |
| 31 | + save_frames_path: './videos/' # path to save frame PNGs |
| 32 | + |
| 33 | +controller: |
| 34 | + type: 'oracle' # 'oracle' or 'oracle_astar' or 'mrcnn' or 'mrcnn_astar' (aka BUTLER) |
| 35 | + debug: False |
| 36 | + load_receps: True # load receptacle locations from precomputed dict (if available) |
| 37 | + |
| 38 | +mask_rcnn: |
| 39 | + pretrained_model_path: '$ALFWORLD_DATA/detectors/mrcnn.pth' |
| 40 | + |
| 41 | +general: |
| 42 | + random_seed: 42 |
| 43 | + use_cuda: True # disable this when running on machine without cuda |
| 44 | + visdom: False # plot training/eval curves, run with visdom server |
| 45 | + task: 'alfred' |
| 46 | + training_method: 'dagger' # 'dqn' or 'dagger' |
| 47 | + save_path: './training/' # path to save pytorch models |
| 48 | + observation_pool_capacity: 3 # k-size queue, 0 indicates no observation |
| 49 | + hide_init_receptacles: False # remove initial observation containing navigable receptacles |
| 50 | + |
| 51 | + training: |
| 52 | + batch_size: 10 |
| 53 | + max_episode: 50000 |
| 54 | + smoothing_eps: 0.1 |
| 55 | + optimizer: |
| 56 | + learning_rate: 0.001 |
| 57 | + clip_grad_norm: 5 |
| 58 | + |
| 59 | + evaluate: |
| 60 | + run_eval: True |
| 61 | + batch_size: 10 |
| 62 | + env: |
| 63 | + type: "AlfredTWEnv" |
| 64 | + |
| 65 | + checkpoint: |
| 66 | + report_frequency: 1000 # report every N episode |
| 67 | + experiment_tag: 'test' # name of experiment |
| 68 | + load_pretrained: False # during test, enable this so that the agent load your pretrained model |
| 69 | + load_from_tag: 'not loading anything' # name of pre-trained model to load in save_path |
| 70 | + |
| 71 | + model: |
| 72 | + encoder_layers: 1 |
| 73 | + decoder_layers: 1 |
| 74 | + encoder_conv_num: 5 |
| 75 | + block_hidden_dim: 64 |
| 76 | + n_heads: 1 |
| 77 | + dropout: 0.1 |
| 78 | + block_dropout: 0.1 |
| 79 | + recurrent: True |
| 80 | + |
| 81 | +rl: |
| 82 | + action_space: "admissible" # 'admissible' (candidates from text engine) or 'generation' (seq2seq-style generation) or 'beam_search_choice' or 'exhaustive' (not working) |
| 83 | + max_target_length: 20 # max token length for seq2seq generation |
| 84 | + beam_width: 10 # 1 means greedy |
| 85 | + generate_top_k: 3 |
| 86 | + |
| 87 | + training: |
| 88 | + max_nb_steps_per_episode: 50 # terminate after this many steps |
| 89 | + learn_start_from_this_episode: 0 # delay updates until this epsiode |
| 90 | + target_net_update_frequency: 500 # sync target net with online net per this many epochs |
| 91 | + |
| 92 | + replay: |
| 93 | + accumulate_reward_from_final: True |
| 94 | + count_reward_lambda: 0.0 # 0 to disable |
| 95 | + novel_object_reward_lambda: 0.0 # 0 to disable |
| 96 | + discount_gamma_game_reward: 0.9 |
| 97 | + discount_gamma_count_reward: 0.5 |
| 98 | + discount_gamma_novel_object_reward: 0.5 |
| 99 | + replay_memory_capacity: 500000 # adjust this depending on your RAM size |
| 100 | + replay_memory_priority_fraction: 0.5 |
| 101 | + update_per_k_game_steps: 5 |
| 102 | + replay_batch_size: 64 |
| 103 | + multi_step: 3 |
| 104 | + replay_sample_history_length: 4 |
| 105 | + replay_sample_update_from: 2 |
| 106 | + |
| 107 | + epsilon_greedy: |
| 108 | + noisy_net: False # if this is true, then epsilon greedy is disabled |
| 109 | + epsilon_anneal_episodes: 1000 # -1 if not annealing |
| 110 | + epsilon_anneal_from: 0.3 |
| 111 | + epsilon_anneal_to: 0.1 |
| 112 | + |
| 113 | +dagger: |
| 114 | + action_space: "generation" # 'admissible' (candidates from text engine) or 'generation' (seq2seq-style generation) or 'exhaustive' (not working) |
| 115 | + max_target_length: 20 # max token length for seq2seq generation |
| 116 | + beam_width: 10 # 1 means greedy |
| 117 | + generate_top_k: 5 |
| 118 | + unstick_by_beam_search: False # use beam-search for failed actions, set True during evaluation |
| 119 | + |
| 120 | + training: |
| 121 | + max_nb_steps_per_episode: 50 # terminate after this many steps |
| 122 | + |
| 123 | + fraction_assist: |
| 124 | + fraction_assist_anneal_episodes: 50000 |
| 125 | + fraction_assist_anneal_from: 1.0 |
| 126 | + fraction_assist_anneal_to: 0.01 |
| 127 | + |
| 128 | + fraction_random: |
| 129 | + fraction_random_anneal_episodes: 0 |
| 130 | + fraction_random_anneal_from: 0.0 |
| 131 | + fraction_random_anneal_to: 0.0 |
| 132 | + |
| 133 | + replay: |
| 134 | + replay_memory_capacity: 500000 |
| 135 | + update_per_k_game_steps: 5 |
| 136 | + replay_batch_size: 64 |
| 137 | + replay_sample_history_length: 4 |
| 138 | + replay_sample_update_from: 2 |
| 139 | + |
| 140 | +vision_dagger: |
| 141 | + model_type: "resnet" # 'resnet' (whole image features) or 'maskrcnn_whole' (whole image MaskRCNN feats) or 'maskrcnn' (top k MaskRCNN detection feats) or 'no_vision' (zero vision input) |
| 142 | + resnet_fc_dim: 64 |
| 143 | + maskrcnn_top_k_boxes: 10 # top k box features |
| 144 | + use_exploration_frame_feats: False # append feats from initial exploration (memory intensive!) |
| 145 | + sequence_aggregation_method: "average" # 'sum' or 'average' or 'rnn' |
0 commit comments