|
1 |
| -#conf.py will parse the yaml and extract parameters based on what is specified |
| 1 | +# conf.py will parse the yaml and extract parameters based on what is specified |
2 | 2 |
|
3 |
| -#will do stuff in fs_path / [username] / signal_data | shot_lists | processed shots, etc. |
| 3 | +# will do stuff in fs_path / [username] / signal_data | shot_lists | processed shots, etc. |
4 | 4 |
|
5 | 5 | fs_path: '/tigress'
|
6 |
| -target: 'hinge' #'maxhinge' #'maxhinge' #'binary' #'hinge' |
7 |
| -num_gpus: 4 |
| 6 | +target: 'hinge' # 'maxhinge' # 'maxhinge' # 'binary' # 'hinge' |
| 7 | +num_gpus: 4 # per node |
8 | 8 |
|
9 | 9 | paths:
|
10 |
| - signal_prepath: '/signal_data/' #/signal_data/jet/ |
| 10 | + signal_prepath: '/signal_data/' # /signal_data/jet/ |
11 | 11 | shot_list_dir: '/shot_lists/'
|
12 | 12 | tensorboard_save_path: '/Graph/'
|
13 |
| - data: d3d_data_0D #'d3d_to_jet_data' #'d3d_to_jet_data' # 'jet_to_d3d_data' #jet_data |
14 |
| - specific_signals: [] #['q95','li','ip','betan','energy','lm','pradcore','pradedge','pradtot','pin','torquein','tmamp1','tmamp2','tmfreq1','tmfreq2','pechin','energydt','ipdirect','etemp_profile','edens_profile'] #if left empty will use all valid signals defined on a machine. Only use if need a custom set |
| 13 | + data: d3d_data_0D # 'd3d_to_jet_data' # 'd3d_to_jet_data' # 'jet_to_d3d_data' # jet_data |
| 14 | + # if specific_signals: [] left empty, it will use all valid signals defined on a machine. Only use if need a custom set |
| 15 | + specific_signals: [] # ['q95','li','ip','betan','energy','lm','pradcore','pradedge','pradtot','pin','torquein','tmamp1','tmamp2','tmfreq1','tmfreq2','pechin','energydt','ipdirect','etemp_profile','edens_profile'] |
15 | 16 | executable: "mpi_learn.py"
|
16 | 17 | shallow_executable: "learn.py"
|
17 | 18 |
|
18 | 19 | data:
|
19 |
| - bleed_in: 0 #how many shots from the test sit to use in training? |
20 |
| - bleed_in_repeat_fac: 1 #how many times to repeat shots in training and validation? |
| 20 | + bleed_in: 0 # how many shots from the test sit to use in training? |
| 21 | + bleed_in_repeat_fac: 1 # how many times to repeat shots in training and validation? |
21 | 22 | bleed_in_remove_from_test: True
|
22 | 23 | bleed_in_equalize_sets: False
|
23 |
| - signal_to_augment: None #'plasma current' #or None |
| 24 | + # TODO(KGF): make next parameter use 'none' instead of None |
| 25 | + signal_to_augment: None # 'plasma current' # or None |
24 | 26 | augmentation_mode: 'none'
|
25 | 27 | augment_during_training: False
|
26 | 28 | cut_shot_ends: True
|
27 | 29 | T_min_warn: 30
|
28 | 30 | recompute: False
|
29 | 31 | recompute_normalization: False
|
30 |
| - #specifies which of the signals in the signals_dirs order contains the plasma current info |
| 32 | + # specifies which of the signals in the signals_dirs order contains the plasma current info |
31 | 33 | current_index: 0
|
32 | 34 | plotting: False
|
33 |
| - #train/validate split |
34 |
| - #how many shots to use |
35 |
| - use_shots: 200000 #1000 #200000 |
36 |
| - positive_example_penalty: 1.0 #by what factor to upweight positive examples? |
37 |
| - #normalization timescale |
| 35 | + # how many shots to use |
| 36 | + use_shots: 200000 # 1000 # 200000 |
| 37 | + positive_example_penalty: 1.0 # by what factor to upweight positive examples? |
| 38 | + # normalization timescale |
38 | 39 | dt: 0.001
|
39 |
| - #maximum TTD considered |
| 40 | + # maximum TTD considered |
40 | 41 | T_max: 1000.0
|
41 |
| - #The shortest works best so far: less overfitting. log TTd prediction also works well. 0.5 better than 0.2 |
42 |
| - T_warning: 1.024 #1.024 #1.024 #0.512 #0.25 #1.0 #1.0 #warning time in seconds |
| 42 | + # The shortest works best so far: less overfitting. log TTd prediction also works well. 0.5 better than 0.2 |
| 43 | + T_warning: 1.024 # 1.024 # 1.024 # 0.512 # 0.25 # 1.0 # 1.0 # warning time in seconds |
43 | 44 | current_thresh: 750000
|
44 | 45 | current_end_thresh: 10000
|
45 |
| - #the characteristic decay length of the decaying moving average window |
| 46 | + # the characteristic decay length of the decaying moving average window |
46 | 47 | window_decay: 2
|
47 |
| - #the width of the actual window |
| 48 | + # the width of the actual window |
48 | 49 | window_size: 10
|
49 |
| - #TODO optimize |
| 50 | + # TODO(KGF): optimize the normalizer parameters |
50 | 51 | normalizer: 'var'
|
51 | 52 | norm_stat_range: 100.0
|
52 | 53 | equalize_classes: False
|
53 |
| - # shallow_sample_prob: 0.01 #the fraction of samples with which to train the shallow model |
| 54 | + # shallow_sample_prob: 0.01 # the fraction of samples with which to train the shallow model |
54 | 55 | floatx: 'float32'
|
55 | 56 |
|
56 | 57 | model:
|
57 | 58 | loss_scale_factor: 1.0
|
58 | 59 | use_batch_norm: false
|
59 | 60 | torch: False
|
60 |
| - shallow: True |
| 61 | + shallow: False |
61 | 62 | shallow_model:
|
62 |
| - num_samples: 1000000 #1000000 #the number of samples to use for training |
63 |
| - type: "xgboost" #"xgboost" #"xgboost" #"random_forest" "xgboost" |
64 |
| - n_estimators: 100 #for random forest |
65 |
| - max_depth: 3 #for random forest and xgboost (def = 3) |
66 |
| - C: 1.0 #for svm |
67 |
| - kernel: "rbf" #rbf, sigmoid, linear, poly, for svm |
68 |
| - learning_rate: 0.1 #xgboost |
69 |
| - scale_pos_weight: 10.0 #xgboost |
70 |
| - final_hidden_layer_size: 10 #final layers has this many neurons, every layer before twice as many |
| 63 | + num_samples: 1000000 # 1000000 # the number of samples to use for training |
| 64 | + type: "xgboost" # "xgboost" #"random_forest" |
| 65 | + n_estimators: 100 # for random forest |
| 66 | + max_depth: 3 # for random forest and xgboost (def = 3) |
| 67 | + C: 1.0 # for svm |
| 68 | + kernel: "rbf" # rbf, sigmoid, linear, poly, for svm |
| 69 | + learning_rate: 0.1 # used in xgboost |
| 70 | + scale_pos_weight: 10.0 # used in xgboost |
| 71 | + final_hidden_layer_size: 10 # final layers has this many neurons, every layer before twice as many |
71 | 72 | num_hidden_layers: 3
|
72 | 73 | learning_rate_mlp: 0.0001
|
73 | 74 | mlp_regularization: 0.0001
|
74 |
| - skip_train: False #should a finished model be loaded if available |
75 |
| - #length of LSTM memory |
| 75 | + skip_train: False # should a finished model be loaded if available |
| 76 | + # length of LSTM memory |
76 | 77 | pred_length: 200
|
77 | 78 | pred_batch_size: 128
|
78 |
| - #TODO optimize |
| 79 | + # TODO(KGF): optimize length of LSTM memory |
79 | 80 | length: 128
|
80 | 81 | skip: 1
|
81 |
| - #hidden layer size |
82 |
| - #TODO optimize |
| 82 | + # hidden layer size |
| 83 | + # TODO(KGF): optimize size of RNN layers |
83 | 84 | rnn_size: 200
|
84 |
| - #size 100 slight overfitting, size 20 no overfitting. 200 is not better than 100. Prediction much better with size 100, size 20 cannot capture the data. |
| 85 | + # size 100 slight overfitting, size 20 no overfitting. 200 is not better than 100. Prediction much better with size 100, size 20 cannot capture the data. |
85 | 86 | rnn_type: 'LSTM'
|
86 |
| - #TODO optimize |
| 87 | + # TODO(KGF): optimize number of RNN layers |
87 | 88 | rnn_layers: 2
|
88 | 89 | num_conv_filters: 128
|
89 | 90 | size_conv_filters: 3
|
90 | 91 | num_conv_layers: 3
|
91 | 92 | pool_size: 2
|
92 | 93 | dense_size: 128
|
93 | 94 | extra_dense_input: False
|
94 |
| - #have not found a difference yet |
| 95 | + # have not found a difference yet |
95 | 96 | optimizer: 'adam'
|
96 | 97 | clipnorm: 10.0
|
97 | 98 | regularization: 0.001
|
98 | 99 | dense_regularization: 0.001
|
99 |
| - #1e-4 is too high, 5e-7 is too low. 5e-5 seems best at 256 batch size, full dataset and ~10 epochs, and lr decay of 0.90. 1e-4 also works well if we decay a lot (i.e ~0.7 or more) |
100 |
| - lr: 0.00002 #0.00001 #0.0005 #for adam plots 0.0000001 #0.00005 #0.00005 #0.00005 |
101 |
| - lr_decay: 0.97 #0.98 #0.9 |
| 100 | + # lr=1e-4 is too high, 5e-7 is too low. 5e-5 seems best at 256 batch size, full dataset |
| 101 | + # and ~10 epochs, and lr decay of 0.90 |
| 102 | + # lr=1e-4 also works well if we decay a lot (i.e ~0.7 or more) |
| 103 | + lr: 0.00002 # 0.00001 # 0.0005 # for adam plots 0.0000001 # 0.00005 # 0.00005 # 0.00005 |
| 104 | + lr_decay: 0.97 # 0.98 # 0.9 |
102 | 105 | stateful: True
|
103 | 106 | return_sequences: True
|
104 | 107 | dropout_prob: 0.1
|
105 |
| - #only relevant if we want to do mpi training. The number of steps with a single replica |
| 108 | + # only relevant if we want to do MPI training. The number of steps with a single replica |
106 | 109 | warmup_steps: 0
|
107 |
| - ignore_timesteps: 100 #how many initial timesteps to ignore during evaluation (to let the internal state settle) |
| 110 | + ignore_timesteps: 100 # how many initial timesteps to ignore during evaluation (to let the internal state settle) |
108 | 111 | backend: 'tensorflow'
|
109 | 112 | training:
|
110 | 113 | as_array_of_shots: True
|
111 | 114 | shuffle_training: True
|
112 | 115 | train_frac: 0.75
|
113 | 116 | validation_frac: 0.33
|
114 |
| - batch_size: 128 #256 |
115 |
| - #THIS WAS THE CULPRIT FOR NO TRAINING! Lower than 1000 performs very poorly |
| 117 | + batch_size: 128 # 256 |
| 118 | + # THE MAX_PATCH_LENGTH WAS THE CULPRIT FOR NO TRAINING! Lower than 1000 performs very poorly |
116 | 119 | max_patch_length: 100000
|
117 |
| - #How many shots are we loading at once? |
| 120 | + # How many shots are we loading at once? |
118 | 121 | num_shots_at_once: 200
|
119 |
| - num_epochs: 1000 |
| 122 | + num_epochs: 1000 # large number = maximum number of epochs. Early stopping will occur if loss does not decrease |
120 | 123 | use_mock_data: False
|
121 | 124 | data_parallel: False
|
122 | 125 | hyperparam_tuning: False
|
123 | 126 | batch_generator_warmup_steps: 0
|
124 | 127 | use_process_generator: False
|
125 |
| - num_batches_minimum: 20 #minimum number of batches per epoch |
126 |
| - ranking_difficulty_fac: 1.0 #how much to upweight incorrectly classified shots during training |
| 128 | + num_batches_minimum: 20 # minimum number of batches per epoch |
| 129 | + ranking_difficulty_fac: 1.0 # how much to upweight incorrectly classified shots during training |
127 | 130 | callbacks:
|
128 | 131 | list: ['earlystop']
|
129 | 132 | metrics: ['val_loss','val_roc','train_loss']
|
|
0 commit comments