@@ -46,8 +46,8 @@ def _default_get_global_data(filename, sample_interval=0):
46
46
df = pd .read_csv (filename )
47
47
file_name = os .path .splitext (os .path .basename (filename ))[0 ]
48
48
49
- x = df .iloc [:, :- data_info .METRICS_OUTPUT_NUM ].values
50
- y = df .iloc [:, - data_info .MINI_MODEL_TARGET_NUM :].values
49
+ x = df .iloc [:, :- data_info .instance . METRICS_OUTPUT_NUM ].values
50
+ y = df .iloc [:, - data_info .instance . MINI_MODEL_TARGET_NUM :].values
51
51
52
52
# Construct the new data
53
53
opunit = OpUnit [file_name .upper ()]
@@ -66,14 +66,14 @@ def _txn_get_mini_runner_data(filename, txn_sample_interval):
66
66
# prepending a column of ones as the base transaction data feature
67
67
base_x = pd .DataFrame (data = np .ones ((df .shape [0 ], 1 ), dtype = int ))
68
68
df = pd .concat ([base_x , df ], axis = 1 )
69
- x = df .iloc [:, :- data_info .METRICS_OUTPUT_NUM ].values
70
- y = df .iloc [:, - data_info .MINI_MODEL_TARGET_NUM :].values
71
- start_times = df .iloc [:, data_info .TARGET_CSV_INDEX [data_info .Target .START_TIME ]].values
72
- cpu_ids = df .iloc [:, data_info .TARGET_CSV_INDEX [data_info .Target .CPU_ID ]].values
69
+ x = df .iloc [:, :- data_info .instance . METRICS_OUTPUT_NUM ].values
70
+ y = df .iloc [:, - data_info .instance . MINI_MODEL_TARGET_NUM :].values
71
+ start_times = df .iloc [:, data_info .instance . target_csv_index [data_info . instance .Target .START_TIME ]].values
72
+ cpu_ids = df .iloc [:, data_info .instance . target_csv_index [data_info . instance .Target .CPU_ID ]].values
73
73
74
74
logging .info ("Loaded file: {}" .format (OpUnit [file_name .upper ()]))
75
75
76
- interval = data_info .CONTENDING_OPUNIT_INTERVAL
76
+ interval = data_info .instance . CONTENDING_OPUNIT_INTERVAL
77
77
78
78
# Map from interval start time to the data in this interval
79
79
interval_x_map = {}
@@ -123,15 +123,14 @@ def _pipeline_get_grouped_op_unit_data(filename, warmup_period, ee_sample_interv
123
123
data_list = []
124
124
with open (filename , "r" ) as f :
125
125
reader = csv .reader (f , delimiter = "," , skipinitialspace = True )
126
- indexes = next (reader )
127
- data_info .parse_csv_header (indexes , True )
128
- features_vector_index = data_info .RAW_FEATURES_CSV_INDEX [ExecutionFeature .FEATURES ]
129
- input_output_boundary = data_info .RAW_FEATURES_CSV_INDEX [data_info .INPUT_OUTPUT_BOUNDARY ]
130
- input_end_boundary = len (data_info .INPUT_CSV_INDEX )
126
+ next (reader )
127
+ features_vector_index = data_info .instance .raw_features_csv_index [ExecutionFeature .FEATURES ]
128
+ input_output_boundary = data_info .instance .raw_features_csv_index [data_info .instance .INPUT_OUTPUT_BOUNDARY ]
129
+ input_end_boundary = len (data_info .instance .input_csv_index )
131
130
132
131
for line in reader :
133
132
# extract the time
134
- cpu_time = line [data_info .RAW_TARGET_CSV_INDEX [Target .START_TIME ]]
133
+ cpu_time = line [data_info .instance . raw_target_csv_index [Target .START_TIME ]]
135
134
if start_time is None :
136
135
start_time = cpu_time
137
136
@@ -144,7 +143,7 @@ def _pipeline_get_grouped_op_unit_data(filename, warmup_period, ee_sample_interv
144
143
record = [d for i ,d in enumerate (line ) if i >= input_output_boundary ]
145
144
data = list (map (data_util .convert_string_to_numeric , record ))
146
145
x_multiple = data [:input_end_boundary ]
147
- metrics = np .array (data [- data_info .METRICS_OUTPUT_NUM :])
146
+ metrics = np .array (data [- data_info .instance . METRICS_OUTPUT_NUM :])
148
147
149
148
# Get the opunits located within
150
149
opunits = []
@@ -156,12 +155,12 @@ def _pipeline_get_grouped_op_unit_data(filename, warmup_period, ee_sample_interv
156
155
157
156
opunit = OpUnit [feature ]
158
157
x_loc = [v [idx ] if type (v ) == list else v for v in x_multiple ]
159
- if x_loc [data_info .INPUT_CSV_INDEX [ExecutionFeature .NUM_ROWS ]] == 0 :
158
+ if x_loc [data_info .instance . input_csv_index [ExecutionFeature .NUM_ROWS ]] == 0 :
160
159
logging .info ("Skipping {} OU with 0 tuple num" .format (opunit .name ))
161
160
continue
162
161
163
162
if opunit == OpUnit .CREATE_INDEX :
164
- concurrency = x_loc [data_info .CONCURRENCY_INDEX ]
163
+ concurrency = x_loc [data_info .instance . CONCURRENCY_INDEX ]
165
164
# TODO(lin): we won't do sampling for CREATE_INDEX. We probably should encapsulate this when
166
165
# generating the data
167
166
sample_interval = 0
@@ -189,15 +188,13 @@ def _pipeline_get_grouped_op_unit_data(filename, warmup_period, ee_sample_interv
189
188
def _interval_get_grouped_op_unit_data (filename ):
190
189
# In the default case, the data does not need any pre-processing and the file name indicates the opunit
191
190
df = pd .read_csv (filename , skipinitialspace = True )
192
- headers = list (df .columns .values )
193
- data_info .parse_csv_header (headers , False )
194
191
file_name = os .path .splitext (os .path .basename (filename ))[0 ]
195
192
196
- x = df .iloc [:, :- data_info .METRICS_OUTPUT_NUM ].values
197
- y = df .iloc [:, - data_info .MINI_MODEL_TARGET_NUM :].values
198
- start_times = df .iloc [:, data_info .TARGET_CSV_INDEX [Target .START_TIME ]].values
199
- cpu_ids = df .iloc [:, data_info .TARGET_CSV_INDEX [Target .CPU_ID ]].values
200
- interval = data_info .PERIODIC_OPUNIT_INTERVAL
193
+ x = df .iloc [:, :- data_info .instance . METRICS_OUTPUT_NUM ].values
194
+ y = df .iloc [:, - data_info .instance . MINI_MODEL_TARGET_NUM :].values
195
+ start_times = df .iloc [:, data_info .instance . target_csv_index [Target .START_TIME ]].values
196
+ cpu_ids = df .iloc [:, data_info .instance . target_csv_index [Target .CPU_ID ]].values
197
+ interval = data_info .instance . PERIODIC_OPUNIT_INTERVAL
201
198
202
199
# Map from interval start time to the data in this interval
203
200
interval_x_map = {}
@@ -248,9 +245,9 @@ def __init__(self, name, opunit_features, metrics, sample_interval=0, concurrenc
248
245
"""
249
246
self .name = name
250
247
self .opunit_features = opunit_features
251
- self .y = metrics [- data_info .MINI_MODEL_TARGET_NUM :]
248
+ self .y = metrics [- data_info .instance . MINI_MODEL_TARGET_NUM :]
252
249
self .y_pred = None
253
- index_map = data_info .TARGET_CSV_INDEX
250
+ index_map = data_info .instance . target_csv_index
254
251
self .start_time = metrics [index_map [Target .START_TIME ]]
255
252
self .end_time = self .start_time + self .y [index_map [Target .ELAPSED_US ]] - 1
256
253
self .cpu_id = int (metrics [index_map [Target .CPU_ID ]])
@@ -282,7 +279,7 @@ def get_end_time(self, concurrent_counting_mode):
282
279
if concurrent_counting_mode is ConcurrentCountingMode .EXACT :
283
280
end_time = self .end_time
284
281
if concurrent_counting_mode is ConcurrentCountingMode .ESTIMATED :
285
- end_time = self .start_time + self .y_pred [data_info .TARGET_CSV_INDEX [Target .ELAPSED_US ]] - 1
282
+ end_time = self .start_time + self .y_pred [data_info .instance . target_csv_index [Target .ELAPSED_US ]] - 1
286
283
if concurrent_counting_mode is ConcurrentCountingMode .INTERVAL :
287
284
end_time = self .start_time + global_model_config .INTERVAL_START + global_model_config .INTERVAL_SIZE
288
285
return end_time
0 commit comments