@@ -147,7 +147,7 @@ def get_workload_feature_data(self, query_results, features):
147147
148148 if all (col in aggr_query_data .columns for col in container_id_cols ):
149149 aggr_query_data .rename (columns = {query : feature }, inplace = True )
150- aggr_query_data [container_id_colname ] = aggr_query_data [container_id_cols ].apply (lambda x : '/' .join (x ), axis = 1 )
150+ aggr_query_data [container_id_colname ] = aggr_query_data [container_id_cols ].apply (lambda x : '/' .join ([ str ( xi ) for xi in x ] ), axis = 1 )
151151 # separate for each container_id
152152 container_id_list = pd .unique (aggr_query_data [container_id_colname ])
153153
@@ -230,17 +230,20 @@ def get_power_data(self, query_results, energy_components, source):
230230 if usage_ratio_query not in query_results :
231231 # sum over mode (idle, dynamic) and unit col
232232 df = aggr_query_data .groupby ([TIMESTAMP_COL ]).sum ().reset_index ().set_index (TIMESTAMP_COL )
233+ time_diff_values = df .reset_index ()[[TIMESTAMP_COL ]].diff ().dropna ().values .mean ()
233234 df = df .loc [:, df .columns != unit_col ]
234235 # rename
235236 colname = component_to_col (component )
236237 df .rename (columns = {query : colname }, inplace = True )
237238 # find current value from aggregated query
238239 df = df .sort_index ()[colname ].diff ().dropna ()
240+ df /= time_diff_values
239241 df = df .mask (df .lt (0 )).ffill ().fillna (0 ).convert_dtypes ()
240242 power_data_list += [df ]
241243 else :
242244 # sum over mode (idle, dynamic)
243245 aggr_query_data = aggr_query_data .groupby ([unit_col , TIMESTAMP_COL ]).sum ().reset_index ().set_index (TIMESTAMP_COL )
246+ time_diff_values = aggr_query_data .reset_index ()[[TIMESTAMP_COL ]].diff ().dropna ().values .mean ()
244247 # add per unit_col
245248 unit_vals = pd .unique (aggr_query_data [unit_col ])
246249 for unit_val in unit_vals :
@@ -250,16 +253,19 @@ def get_power_data(self, query_results, energy_components, source):
250253 df .rename (columns = {query : colname }, inplace = True )
251254 # find current value from aggregated query
252255 df = df .sort_index ()[colname ].diff ().dropna ()
256+ df /= time_diff_values
253257 df = df .mask (df .lt (0 )).ffill ().fillna (0 ).convert_dtypes ()
254258 power_data_list += [df ]
255259 else :
256260 # sum over mode
257261 aggr_query_data = aggr_query_data .groupby ([TIMESTAMP_COL ]).sum ()
262+ time_diff_values = aggr_query_data .reset_index ()[[TIMESTAMP_COL ]].diff ().dropna ().values .mean ()
258263 # rename
259264 colname = component_to_col (component )
260265 aggr_query_data .rename (columns = {query : colname }, inplace = True )
261266 # find current value from aggregated query
262267 df = aggr_query_data .sort_index ()[colname ].diff ().dropna ()
268+ df /= time_diff_values
263269 df = df .mask (df .lt (0 )).ffill ().fillna (0 ).convert_dtypes ()
264270 power_data_list += [df ]
265271 power_data = pd .concat (power_data_list , axis = 1 ).dropna ()
0 commit comments