Skip to content

Commit dbe1166

Browse files
committed
fix extractor/profileisolator bug and update test case
Signed-off-by: Sunyanan Choochotkaew <[email protected]>
1 parent 0639591 commit dbe1166

File tree

9 files changed

+24
-15
lines changed

9 files changed

+24
-15
lines changed

src/train/extractor/extractor.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ def get_workload_feature_data(self, query_results, features):
147147

148148
if all(col in aggr_query_data.columns for col in container_id_cols):
149149
aggr_query_data.rename(columns={query: feature}, inplace=True)
150-
aggr_query_data[container_id_colname] = aggr_query_data[container_id_cols].apply(lambda x: '/'.join(x), axis=1)
150+
aggr_query_data[container_id_colname] = aggr_query_data[container_id_cols].apply(lambda x: '/'.join([str(xi) for xi in x]), axis=1)
151151
# separate for each container_id
152152
container_id_list = pd.unique(aggr_query_data[container_id_colname])
153153

@@ -230,17 +230,20 @@ def get_power_data(self, query_results, energy_components, source):
230230
if usage_ratio_query not in query_results:
231231
# sum over mode (idle, dynamic) and unit col
232232
df = aggr_query_data.groupby([TIMESTAMP_COL]).sum().reset_index().set_index(TIMESTAMP_COL)
233+
time_diff_values = df.reset_index()[[TIMESTAMP_COL]].diff().dropna().values.mean()
233234
df = df.loc[:, df.columns != unit_col]
234235
# rename
235236
colname = component_to_col(component)
236237
df.rename(columns={query: colname}, inplace=True)
237238
# find current value from aggregated query
238239
df = df.sort_index()[colname].diff().dropna()
240+
df /= time_diff_values
239241
df = df.mask(df.lt(0)).ffill().fillna(0).convert_dtypes()
240242
power_data_list += [df]
241243
else:
242244
# sum over mode (idle, dynamic)
243245
aggr_query_data = aggr_query_data.groupby([unit_col, TIMESTAMP_COL]).sum().reset_index().set_index(TIMESTAMP_COL)
246+
time_diff_values = aggr_query_data.reset_index()[[TIMESTAMP_COL]].diff().dropna().values.mean()
244247
# add per unit_col
245248
unit_vals = pd.unique(aggr_query_data[unit_col])
246249
for unit_val in unit_vals:
@@ -250,16 +253,19 @@ def get_power_data(self, query_results, energy_components, source):
250253
df.rename(columns={query: colname}, inplace=True)
251254
# find current value from aggregated query
252255
df = df.sort_index()[colname].diff().dropna()
256+
df /= time_diff_values
253257
df = df.mask(df.lt(0)).ffill().fillna(0).convert_dtypes()
254258
power_data_list += [df]
255259
else:
256260
# sum over mode
257261
aggr_query_data = aggr_query_data.groupby([TIMESTAMP_COL]).sum()
262+
time_diff_values = aggr_query_data.reset_index()[[TIMESTAMP_COL]].diff().dropna().values.mean()
258263
# rename
259264
colname = component_to_col(component)
260265
aggr_query_data.rename(columns={query: colname}, inplace=True)
261266
# find current value from aggregated query
262267
df = aggr_query_data.sort_index()[colname].diff().dropna()
268+
df /= time_diff_values
263269
df = df.mask(df.lt(0)).ffill().fillna(0).convert_dtypes()
264270
power_data_list += [df]
265271
power_data = pd.concat(power_data_list, axis=1).dropna()

src/train/isolator/isolator.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,8 @@ def isolate(self, data, label_cols, energy_source):
128128
data = data.reset_index()
129129
data = data.set_index([TIMESTAMP_COL])
130130
target_data, _ = isolate_container(data, self.background_containers, label_cols)
131-
isolated_data = target_data.copy()
131+
isolated_data = squeeze_data(target_data, label_cols)
132+
print(isolated_data)
132133
try:
133134
for label_col in label_cols:
134135
component = col_to_component(label_col)
@@ -137,7 +138,6 @@ def isolate(self, data, label_cols, energy_source):
137138
return None
138139
isolated_data[label_col] = isolated_data[label_col] - isolated_data['profile']
139140
isolated_data.drop(columns='profile', inplace=True)
140-
isolated_data = isolated_data.reset_index()
141141
if index_list[0] is not None:
142142
isolated_data = isolated_data.set_index(index_list)
143143
return isolated_data

tests/data/prom_output/.DS_Store

6 KB
Binary file not shown.

tests/data/prom_output/idle.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

tests/data/prom_output/prom_response.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

tests/data/prom_response.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

tests/data/prom_response_idle.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

tests/extractor_test.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,18 +62,22 @@ def save_extract_results(instance, feature_group, extracted_data, node_level, sa
6262
save_csv(save_path, filename, extracted_data)
6363

6464
def get_expected_power_columns(energy_components=test_energy_components, num_of_unit=test_num_of_unit):
65-
return [component_to_col(component, "package", unit_val) for component in energy_components for unit_val in range(0,num_of_unit)]
65+
# TODO: if ratio applied,
66+
# return [component_to_col(component, "package", unit_val) for component in energy_components for unit_val in range(0,num_of_unit)]
67+
return [component_to_col(component) for component in energy_components]
6668

6769
def assert_extract(extracted_data, power_columns, energy_components, num_of_unit, feature_group):
6870
extracted_data_column_names = extracted_data.columns
6971
# basic assert
7072
assert extracted_data is not None, "extracted data is None"
7173
assert len(power_columns) > 0, "no power label column {}".format(extracted_data_column_names)
7274
assert node_info_column in extracted_data_column_names, "no {} in column {}".format(node_info_column, extracted_data_column_names)
73-
expected_power_column_length = len(energy_components) * num_of_unit
75+
# TODO: if ratio applied, expected_power_column_length = len(energy_components) * num_of_unit
76+
expected_power_column_length = len(energy_components)
7477
# detail assert
7578
assert len(power_columns) == expected_power_column_length, "unexpected power label columns {}, expected {}".format(power_columns, expected_power_column_length)
76-
expected_col_size = expected_power_column_length + len(FeatureGroups[FeatureGroup[feature_group]]) + 1 + num_of_unit # power ratio
79+
# TODO: if ratio applied, expected_col_size must + 1 for power_ratio
80+
expected_col_size = expected_power_column_length + len(FeatureGroups[FeatureGroup[feature_group]]) + num_of_unit # power ratio
7781
assert len(extracted_data_column_names) == expected_col_size, "unexpected column length: expected {}, got {}({}) ".format(expected_col_size, extracted_data_column_names, len(extracted_data_column_names))
7882

7983
def process(query_results, feature_group, save_path=extractor_output_path, customize_extractors=test_customize_extractors, energy_source=test_energy_source, num_of_unit=2):

tests/isolator_test.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ def save_results(instance, extractor_name, feature_group, isolated_data, save_pa
5252
def assert_isolate(extractor_result, isolated_data):
5353
isolated_data_column_names = isolated_data.columns
5454
assert isolated_data is not None, "isolated data is None"
55-
value_df = isolated_data.drop(columns=container_level_index)
55+
value_df = isolated_data.reset_index().drop(columns=container_level_index)
5656
negative_df = value_df[(value_df<0).all(1)]
5757
assert len(negative_df) == 0, "all data must be non-negative \n {}".format(negative_df)
5858
assert len(extractor_result.columns) == len(isolated_data_column_names), "unexpected column length: expected {}, got {}({}) ".format(len(extractor_result.columns), isolated_data_column_names, len(isolated_data_column_names))
@@ -90,10 +90,9 @@ def process(test_isolators=test_isolators, customize_isolators=[], extract_path=
9090
save_results(test_instance, extractor_name, feature_group, isolated_data, save_path=save_path)
9191

9292

93-
9493
if __name__ == '__main__':
9594
# Add customize isolator here
96-
customize_isolators = [TrainIsolator(idle_data=test_idle_data, profiler=DefaultProfiler)]
97-
customize_isolators = [TrainIsolator(target_hints=["coremark"])]
98-
customize_isolators += [ProfileBackgroundIsolator(test_profiles, test_idle_data)]
95+
customize_isolators = [ProfileBackgroundIsolator(test_profiles, test_idle_data)]
96+
customize_isolators += [TrainIsolator(idle_data=test_idle_data, profiler=DefaultProfiler)]
97+
customize_isolators += [TrainIsolator(target_hints=["coremark"])]
9998
process(customize_isolators=customize_isolators)

0 commit comments

Comments
 (0)