|
1 | | -#conf.py will parse the yaml and extract parameters based on what is specified |
| 1 | +# conf.py will parse the yaml and extract parameters based on what is specified |
2 | 2 |
|
3 | | -#will do stuff in fs_path / [username] / signal_data | shot_lists | processed shots, etc. |
| 3 | +# will do stuff in fs_path / [username] / signal_data | shot_lists | processed shots, etc. |
4 | 4 |
|
5 | 5 | fs_path: '/tigress' |
6 | | -target: 'hinge' #'maxhinge' #'maxhinge' #'binary' #'hinge' |
7 | | -num_gpus: 4 |
| 6 | +target: 'hinge' # 'maxhinge' # 'maxhinge' # 'binary' # 'hinge' |
| 7 | +num_gpus: 4 # per node |
8 | 8 |
|
9 | 9 | paths: |
10 | | - signal_prepath: '/signal_data/' #/signal_data/jet/ |
| 10 | + signal_prepath: '/signal_data/' # /signal_data/jet/ |
11 | 11 | shot_list_dir: '/shot_lists/' |
12 | 12 | tensorboard_save_path: '/Graph/' |
13 | | - data: d3d_data_0D #'d3d_to_jet_data' #'d3d_to_jet_data' # 'jet_to_d3d_data' #jet_data |
14 | | - specific_signals: [] #['q95','li','ip','betan','energy','lm','pradcore','pradedge','pradtot','pin','torquein','tmamp1','tmamp2','tmfreq1','tmfreq2','pechin','energydt','ipdirect','etemp_profile','edens_profile'] #if left empty will use all valid signals defined on a machine. Only use if need a custom set |
| 13 | + data: d3d_data_0D # 'd3d_to_jet_data' # 'd3d_to_jet_data' # 'jet_to_d3d_data' # jet_data |
| 14 | + # if specific_signals: [] left empty, it will use all valid signals defined on a machine. Only use if need a custom set |
| 15 | + specific_signals: [] # ['q95','li','ip','betan','energy','lm','pradcore','pradedge','pradtot','pin','torquein','tmamp1','tmamp2','tmfreq1','tmfreq2','pechin','energydt','ipdirect','etemp_profile','edens_profile'] |
15 | 16 | executable: "mpi_learn.py" |
16 | 17 | shallow_executable: "learn.py" |
17 | 18 |
|
18 | 19 | data: |
19 | | - bleed_in: 0 #how many shots from the test sit to use in training? |
20 | | - bleed_in_repeat_fac: 1 #how many times to repeat shots in training and validation? |
| 20 | + bleed_in: 0 # how many shots from the test sit to use in training? |
| 21 | + bleed_in_repeat_fac: 1 # how many times to repeat shots in training and validation? |
21 | 22 | bleed_in_remove_from_test: True |
22 | 23 | bleed_in_equalize_sets: False |
23 | | - signal_to_augment: None #'plasma current' #or None |
| 24 | + # TODO(KGF): make next parameter use 'none' instead of None |
| 25 | + signal_to_augment: None # 'plasma current' # or None |
24 | 26 | augmentation_mode: 'none' |
25 | 27 | augment_during_training: False |
26 | 28 | cut_shot_ends: True |
27 | 29 | T_min_warn: 30 |
28 | 30 | recompute: False |
29 | 31 | recompute_normalization: False |
30 | | - #specifies which of the signals in the signals_dirs order contains the plasma current info |
| 32 | + # specifies which of the signals in the signals_dirs order contains the plasma current info |
31 | 33 | current_index: 0 |
32 | 34 | plotting: False |
33 | | - #train/validate split |
34 | | - #how many shots to use |
35 | | - use_shots: 200000 #1000 #200000 |
36 | | - positive_example_penalty: 1.0 #by what factor to upweight positive examples? |
37 | | - #normalization timescale |
| 35 | + # how many shots to use |
| 36 | + use_shots: 200000 # 1000 # 200000 |
| 37 | + positive_example_penalty: 1.0 # by what factor to upweight positive examples? |
| 38 | + # normalization timescale |
38 | 39 | dt: 0.001 |
39 | | - #maximum TTD considered |
| 40 | + # maximum TTD considered |
40 | 41 | T_max: 1000.0 |
41 | | - #The shortest works best so far: less overfitting. log TTd prediction also works well. 0.5 better than 0.2 |
42 | | - T_warning: 1.024 #1.024 #1.024 #0.512 #0.25 #1.0 #1.0 #warning time in seconds |
| 42 | + # The shortest works best so far: less overfitting. log TTd prediction also works well. 0.5 better than 0.2 |
| 43 | + T_warning: 1.024 # 1.024 # 1.024 # 0.512 # 0.25 # 1.0 # 1.0 # warning time in seconds |
43 | 44 | current_thresh: 750000 |
44 | 45 | current_end_thresh: 10000 |
45 | | - #the characteristic decay length of the decaying moving average window |
| 46 | + # the characteristic decay length of the decaying moving average window |
46 | 47 | window_decay: 2 |
47 | | - #the width of the actual window |
| 48 | + # the width of the actual window |
48 | 49 | window_size: 10 |
49 | | - #TODO optimize |
| 50 | + # TODO(KGF): optimize the normalizer parameters |
50 | 51 | normalizer: 'var' |
51 | 52 | norm_stat_range: 100.0 |
52 | 53 | equalize_classes: False |
53 | | - # shallow_sample_prob: 0.01 #the fraction of samples with which to train the shallow model |
| 54 | + # shallow_sample_prob: 0.01 # the fraction of samples with which to train the shallow model |
54 | 55 | floatx: 'float32' |
55 | 56 |
|
56 | 57 | model: |
57 | 58 | loss_scale_factor: 1.0 |
58 | 59 | use_batch_norm: false |
59 | 60 | torch: False |
60 | | - shallow: True |
| 61 | + shallow: False |
61 | 62 | shallow_model: |
62 | | - num_samples: 1000000 #1000000 #the number of samples to use for training |
63 | | - type: "xgboost" #"xgboost" #"xgboost" #"random_forest" "xgboost" |
64 | | - n_estimators: 100 #for random forest |
65 | | - max_depth: 3 #for random forest and xgboost (def = 3) |
66 | | - C: 1.0 #for svm |
67 | | - kernel: "rbf" #rbf, sigmoid, linear, poly, for svm |
68 | | - learning_rate: 0.1 #xgboost |
69 | | - scale_pos_weight: 10.0 #xgboost |
70 | | - final_hidden_layer_size: 10 #final layers has this many neurons, every layer before twice as many |
| 63 | + num_samples: 1000000 # 1000000 # the number of samples to use for training |
| 64 | + type: "xgboost" # "xgboost" #"random_forest" |
| 65 | + n_estimators: 100 # for random forest |
| 66 | + max_depth: 3 # for random forest and xgboost (def = 3) |
| 67 | + C: 1.0 # for svm |
| 68 | + kernel: "rbf" # rbf, sigmoid, linear, poly, for svm |
| 69 | + learning_rate: 0.1 # used in xgboost |
| 70 | + scale_pos_weight: 10.0 # used in xgboost |
| 71 | + final_hidden_layer_size: 10 # final layers has this many neurons, every layer before twice as many |
71 | 72 | num_hidden_layers: 3 |
72 | 73 | learning_rate_mlp: 0.0001 |
73 | 74 | mlp_regularization: 0.0001 |
74 | | - skip_train: False #should a finished model be loaded if available |
75 | | - #length of LSTM memory |
| 75 | + skip_train: False # should a finished model be loaded if available |
| 76 | + # length of LSTM memory |
76 | 77 | pred_length: 200 |
77 | 78 | pred_batch_size: 128 |
78 | | - #TODO optimize |
| 79 | + # TODO(KGF): optimize length of LSTM memory |
79 | 80 | length: 128 |
80 | 81 | skip: 1 |
81 | | - #hidden layer size |
82 | | - #TODO optimize |
| 82 | + # hidden layer size |
| 83 | + # TODO(KGF): optimize size of RNN layers |
83 | 84 | rnn_size: 200 |
84 | | - #size 100 slight overfitting, size 20 no overfitting. 200 is not better than 100. Prediction much better with size 100, size 20 cannot capture the data. |
| 85 | + # size 100 slight overfitting, size 20 no overfitting. 200 is not better than 100. Prediction much better with size 100, size 20 cannot capture the data. |
85 | 86 | rnn_type: 'LSTM' |
86 | | - #TODO optimize |
| 87 | + # TODO(KGF): optimize number of RNN layers |
87 | 88 | rnn_layers: 2 |
88 | 89 | num_conv_filters: 128 |
89 | 90 | size_conv_filters: 3 |
90 | 91 | num_conv_layers: 3 |
91 | 92 | pool_size: 2 |
92 | 93 | dense_size: 128 |
93 | 94 | extra_dense_input: False |
94 | | - #have not found a difference yet |
| 95 | + # have not found a difference yet |
95 | 96 | optimizer: 'adam' |
96 | 97 | clipnorm: 10.0 |
97 | 98 | regularization: 0.001 |
98 | 99 | dense_regularization: 0.001 |
99 | | - #1e-4 is too high, 5e-7 is too low. 5e-5 seems best at 256 batch size, full dataset and ~10 epochs, and lr decay of 0.90. 1e-4 also works well if we decay a lot (i.e ~0.7 or more) |
100 | | - lr: 0.00002 #0.00001 #0.0005 #for adam plots 0.0000001 #0.00005 #0.00005 #0.00005 |
101 | | - lr_decay: 0.97 #0.98 #0.9 |
| 100 | + # lr=1e-4 is too high, 5e-7 is too low. 5e-5 seems best at 256 batch size, full dataset |
| 101 | + # and ~10 epochs, and lr decay of 0.90 |
| 102 | + # lr=1e-4 also works well if we decay a lot (i.e ~0.7 or more) |
| 103 | + lr: 0.00002 # 0.00001 # 0.0005 # for adam plots 0.0000001 # 0.00005 # 0.00005 # 0.00005 |
| 104 | + lr_decay: 0.97 # 0.98 # 0.9 |
102 | 105 | stateful: True |
103 | 106 | return_sequences: True |
104 | 107 | dropout_prob: 0.1 |
105 | | - #only relevant if we want to do mpi training. The number of steps with a single replica |
| 108 | + # only relevant if we want to do MPI training. The number of steps with a single replica |
106 | 109 | warmup_steps: 0 |
107 | | - ignore_timesteps: 100 #how many initial timesteps to ignore during evaluation (to let the internal state settle) |
| 110 | + ignore_timesteps: 100 # how many initial timesteps to ignore during evaluation (to let the internal state settle) |
108 | 111 | backend: 'tensorflow' |
109 | 112 | training: |
110 | 113 | as_array_of_shots: True |
111 | 114 | shuffle_training: True |
112 | 115 | train_frac: 0.75 |
113 | 116 | validation_frac: 0.33 |
114 | | - batch_size: 128 #256 |
115 | | - #THIS WAS THE CULPRIT FOR NO TRAINING! Lower than 1000 performs very poorly |
| 117 | + batch_size: 128 # 256 |
| 118 | + # THE MAX_PATCH_LENGTH WAS THE CULPRIT FOR NO TRAINING! Lower than 1000 performs very poorly |
116 | 119 | max_patch_length: 100000 |
117 | | - #How many shots are we loading at once? |
| 120 | + # How many shots are we loading at once? |
118 | 121 | num_shots_at_once: 200 |
119 | | - num_epochs: 1000 |
| 122 | + num_epochs: 1000 # large number = maximum number of epochs. Early stopping will occur if loss does not decrease |
120 | 123 | use_mock_data: False |
121 | 124 | data_parallel: False |
122 | 125 | hyperparam_tuning: False |
123 | 126 | batch_generator_warmup_steps: 0 |
124 | 127 | use_process_generator: False |
125 | | - num_batches_minimum: 20 #minimum number of batches per epoch |
126 | | - ranking_difficulty_fac: 1.0 #how much to upweight incorrectly classified shots during training |
| 128 | + num_batches_minimum: 20 # minimum number of batches per epoch |
| 129 | + ranking_difficulty_fac: 1.0 # how much to upweight incorrectly classified shots during training |
127 | 130 | callbacks: |
128 | 131 | list: ['earlystop'] |
129 | 132 | metrics: ['val_loss','val_roc','train_loss'] |
|
0 commit comments