Skip to content

Commit a7a41a8

Browse files
Julian Kates-HarbeckJulian Kates-Harbeck
authored andcommitted
2 parents bf75108 + eaa3629 commit a7a41a8

File tree

4 files changed

+169
-5
lines changed

4 files changed

+169
-5
lines changed

data/signals.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -176,10 +176,10 @@ def fetch_nstx_data(signal_path,shot_num,c):
176176
pin = Signal("Input Power (beam for d3d)",['jpf/gs/bl-ptot<s','d3d/bmspinj'],[jet,d3d]) #Total Beam Power
177177

178178
pradtot = Signal("Radiated Power",['jpf/db/b5r-ptot>out'],[jet])
179-
pradcore = ChannelSignal("Radiated Power Core",[ 'd3d/'+r'\bol_l15_p'],[d3d])
180-
pradedge = ChannelSignal("Radiated Power Edge",['d3d/'+r'\bol_l03_p'],[d3d])
181-
#pradcore = ChannelSignal("Radiated Power Core",['ppf/bolo/kb5h/channel14', 'd3d/'+r'\bol_l15_p'],[jet,d3d])
182-
#pradedge = ChannelSignal("Radiated Power Edge",['ppf/bolo/kb5h/channel10','d3d/'+r'\bol_l03_p'],[jet,d3d])
179+
#pradcore = ChannelSignal("Radiated Power Core",[ 'd3d/'+r'\bol_l15_p'],[d3d])
180+
#pradedge = ChannelSignal("Radiated Power Edge",['d3d/'+r'\bol_l03_p'],[d3d])
181+
pradcore = ChannelSignal("Radiated Power Core",['ppf/bolo/kb5h/channel14', 'd3d/'+r'\bol_l15_p'],[jet,d3d])
182+
pradedge = ChannelSignal("Radiated Power Edge",['ppf/bolo/kb5h/channel10','d3d/'+r'\bol_l03_p'],[jet,d3d])
183183
# pechin = Signal("ECH input power, not always on",['d3d/pcechpwrf'],[d3d])
184184
pechin = Signal("ECH input power, not always on",['RF/ECH.TOTAL.ECHPWRC'],[d3d])
185185

@@ -238,6 +238,7 @@ def fetch_nstx_data(signal_path,shot_num,c):
238238
fully_defined_signals = {sig_name: sig for (sig_name, sig) in all_signals_restricted.items() if sig.is_defined_on_machines(all_machines)}
239239
d3d_signals = {sig_name: sig for (sig_name, sig) in all_signals_restricted.items() if sig.is_defined_on_machine(d3d)}
240240
jet_signals = {sig_name: sig for (sig_name, sig) in all_signals_restricted.items() if sig.is_defined_on_machine(jet)}
241+
jet_signals_0D = {sig_name: sig for (sig_name, sig) in all_signals_restricted.items() if (sig.is_defined_on_machine(jet) and sig.num_channels == 1)}
241242

242243

243244
#['pcechpwrf'] #Total ECH Power Not always on!

examples/extract_best_overtime.py

Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
import pandas as pd
2+
import glob
3+
from subprocess import Popen
4+
import yaml
5+
import os
6+
import math
7+
import numpy as np
8+
from random import shuffle
9+
from joblib import Parallel, delayed
10+
import multiprocessing
11+
12+
import matplotlib
13+
matplotlib.use('Agg')
14+
import matplotlib.pylab as plt
15+
16+
import pdb
17+
18+
def arrangeTrialsAtRandom(filenames,scale=1.0):
19+
shuffle(filenames)
20+
previous = pd.read_csv(filenames[0])
21+
previous['times'] = previous['times'].apply(lambda x: x/60.0/scale)
22+
dataframes = [previous]
23+
for filename in filenames[1:]:
24+
shift = max(previous['times'].values)
25+
current = pd.read_csv(filename)
26+
current['times'] = current['times'].apply(lambda x: x/60.0/scale+shift)
27+
dataframes.append(current)
28+
previous = current
29+
return pd.concat(dataframes)
30+
31+
def getOneBestValidationAUC(T_of_test,dataset):
32+
#select subset of dataframe by time for all
33+
dataset = dataset[dataset.times <= T_of_test]
34+
35+
#apply emulate_converge script
36+
aucs = dataset['val_roc'].values
37+
if len(aucs) > 0:
38+
return max(aucs)
39+
else:
40+
return 0.0
41+
42+
def doPlot(parallel_aucs, serial_aucs, times, errors):
43+
times = list(times)
44+
times_histo = np.histogram(parallel_aucs,bins=times)
45+
#values,edges = times_histo
46+
parallel_values = parallel_aucs[1:]
47+
edges = times
48+
print(len(parallel_values))
49+
print(len(edges))
50+
serial_values = np.array(serial_aucs[1:])
51+
errors = np.array(errors[1:])
52+
edges = np.array(times[:-1])
53+
print(errors.shape)
54+
print(edges.shape)
55+
print(serial_values.shape)
56+
57+
58+
plt.figure()
59+
plt.plot(edges, parallel_values,label = "Distributed search") #, width=np.diff(edges), ec="k", align="edge")
60+
plt.plot(edges, serial_values, label="Sequential search") #, width=np.diff(edges), ec="k", align="edge")
61+
#plt.fill_between(edges, serial_values-errors,serial_values+errors)
62+
plt.legend(loc = (0.6,0.7))
63+
plt.xlabel("Time [minutes]", fontsize=20)
64+
#plt.yscale('log')
65+
plt.ylabel('Best validation AUC', fontsize=20)
66+
plt.savefig("times.png")
67+
68+
plt.figure()
69+
plt.plot(edges, parallel_values,label = "Distributed search") #, width=np.diff(edges), ec="k", align="edge")
70+
plt.plot(edges, serial_values, label="Sequential search") #, width=np.diff(edges), ec="k", align="edge")
71+
#plt.fill_between(edges, serial_values-errors,serial_values+errors)
72+
plt.legend(loc = (0.6,0.7))
73+
plt.xlabel("Time [minutes]", fontsize=20)
74+
plt.xscale('log')
75+
plt.xlim([0,100])
76+
plt.ylabel('Best validation AUC', fontsize=20)
77+
plt.savefig("times_logx_start.png")
78+
79+
plt.figure()
80+
plt.plot(edges, parallel_values,label = "Distributed search") #, width=np.diff(edges), ec="k", align="edge")
81+
plt.plot(edges, serial_values, label="Sequential search") #, width=np.diff(edges), ec="k", align="edge")
82+
#plt.fill_between(edges, serial_values-errors,serial_values+errors)
83+
plt.legend(loc = (0.6,0.7))
84+
plt.xlabel("Time [minutes]", fontsize=20)
85+
plt.xscale('log')
86+
plt.xlim([100,10000])
87+
plt.ylabel('Best validation AUC', fontsize=20)
88+
plt.savefig("times_logx.png")
89+
90+
91+
def getReplica(filenames, times):
92+
serial_auc_replica = arrangeTrialsAtRandom(filenames,100.0)
93+
94+
best_serial_aucs_over_time = []
95+
for T in times:
96+
current_best = 0
97+
##pass AUCs and real epoch counts to emulate_converge
98+
auc = getOneBestValidationAUC(T,serial_auc_replica)
99+
if auc > current_best: current_best = auc
100+
101+
best_serial_aucs_over_time.append(current_best)
102+
103+
#replicas.append(best_serial_aucs_over_time)
104+
return best_serial_aucs_over_time
105+
106+
def getTimeReplica(filenames,T):
107+
current_best = 0
108+
for filename in filenames:
109+
#get AUCs for this trial, one per effective epoch
110+
try:
111+
dataset = pd.read_csv(filename)
112+
dataset['times'] = dataset['times'].apply(lambda x: x/60.0)
113+
except:
114+
print("No data in {}".format(filename))
115+
continue
116+
##pass AUCs and real epoch counts to emulate_converge
117+
auc = getOneBestValidationAUC(T,dataset)
118+
if auc > current_best: current_best = auc
119+
return current_best
120+
121+
def getTimeReplicaSerial(serial_auc_replica,T):
122+
current_best = 0
123+
##pass AUCs and real epoch counts to emulate_converge
124+
auc = getOneBestValidationAUC(T,serial_auc_replica)
125+
if auc > current_best: current_best = auc
126+
127+
#replicas.append(best_serial_aucs_over_time)
128+
return current_best
129+
130+
131+
if __name__ == '__main__':
132+
133+
filenames = glob.glob("/tigress/FRNN/JET_Titan_hyperparameter_run/*/temporal_csv_log.csv")
134+
patience = 5
135+
136+
times = np.linspace(0,310*30,186*30)
137+
138+
best_parallel_aucs_over_time = []
139+
num_cores = multiprocessing.cpu_count()
140+
print ("Running on ", num_cores, " CPU cores")
141+
best_parallel_aucs_over_time = Parallel(n_jobs=num_cores)(delayed(getTimeReplica)(filenames, T) for T in times)
142+
143+
Nreplicas = 20
144+
replicas = []
145+
146+
147+
for i in range(Nreplicas):
148+
serial_auc_replica = arrangeTrialsAtRandom(filenames,100.0)
149+
150+
#replicas = Parallel(n_jobs=num_cores)(delayed(getReplica)(filenames, times) for i in range(Nreplicas))
151+
best_serial_aucs_over_time = Parallel(n_jobs=num_cores)(delayed(getTimeReplicaSerial)(serial_auc_replica, T) for T in times)
152+
replicas.append(best_serial_aucs_over_time)
153+
154+
155+
from statistics import mean,stdev
156+
best_serial_aucs_over_time = list(map(mean, zip(*replicas)))
157+
errors = list(map(stdev, zip(*replicas)))
158+
159+
doPlot(best_parallel_aucs_over_time, best_serial_aucs_over_time, times, errors)

plasma/conf_parser.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,10 @@ def parameters(input_file):
8383
params['paths']['shot_files'] = [jet_carbon_wall]
8484
params['paths']['shot_files_test'] = [jet_iterlike_wall]
8585
params['paths']['use_signals_dict'] = jet_signals
86+
elif params['paths']['data'] == 'jet_data_0D':
87+
params['paths']['shot_files'] = [jet_carbon_wall]
88+
params['paths']['shot_files_test'] = [jet_iterlike_wall]
89+
params['paths']['use_signals_dict'] = jet_signals_0D
8690
elif params['paths']['data'] == 'jet_carbon_data':
8791
params['paths']['shot_files'] = [jet_carbon_wall]
8892
params['paths']['shot_files_test'] = []

plasma/primitives/data.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,7 @@ def fetch_data(self,machine,shot_num,c):
253253

254254

255255
class ChannelSignal(Signal):
256-
def __init__(self,description,paths,machines,tex_label=None,causal_shifts=None,mapping_range=(0,1),num_channels=32,data_avail_tolerances=None,is_strictly_positive=False,mapping_paths=None):
256+
def __init__(self,description,paths,machines,tex_label=None,causal_shifts=None,data_avail_tolerances=None,is_strictly_positive=False,mapping_paths=None):
257257
super(ChannelSignal, self).__init__(description,paths,machines,tex_label,causal_shifts,is_ip=False,data_avail_tolerances=data_avail_tolerances,is_strictly_positive=is_strictly_positive,mapping_paths=mapping_paths)
258258
nums,new_paths = self.get_channel_nums(paths)
259259
self.channel_nums = nums

0 commit comments

Comments
 (0)