55# node_type = index_collection.index_train_machine(machine_id, new_spec)
66# index_collection.save()
77
8- import enum
98import logging
109import os
1110import re
1716
1817from kepler_model .util .loader import load_json , load_node_type_index
1918from kepler_model .util .saver import save_machine_spec , save_node_type_index
19+ from kepler_model .util .similarity import compute_jaccard_similarity , compute_looseness , compute_similarity , compute_uncertainty , find_best_candidate , get_candidate_score , get_num_of_none , get_similarity_weight
20+ from kepler_model .util .train_types import NodeAttribute
2021
2122logger = logging .getLogger (__name__ )
2223
@@ -100,13 +101,6 @@ def get_machine_spec(cmd_machine_spec_file: str):
100101 return spec
101102 return discover_spec_values ()
102103
103- class NodeAttribute (str , enum .Enum ):
104- PROCESSOR = "processor"
105- CORES = "cores"
106- CHIPS = "chips"
107- MEMORY = "memory"
108- FREQ = "frequency"
109-
110104def load_node_type_spec (node_type_index_json ):
111105 node_type_spec_index = dict ()
112106 if node_type_index_json is not None :
@@ -181,6 +175,34 @@ def cover(self, compare_spec):
181175 return False
182176 return True
183177
178+ def get_uncertain_attribute_freq (self , compare_spec ):
179+ uncertain_attribute_freq = dict ()
180+ if not self .cover (compare_spec ):
181+ # not covered
182+ return None
183+ size = self .get_size ()
184+ for attr in NodeAttribute :
185+ if compare_spec .attrs [attr ] is None :
186+ uncertain_attribute_freq [attr ] = size
187+ return uncertain_attribute_freq
188+
189+ def get_similarity (self , compare_spec , debug = False ):
190+ total_similarity = 0
191+ for attr in NodeAttribute :
192+ similarity = 0
193+ # compare similar string
194+ if compare_spec .attrs [attr ] is not None and attr in [NodeAttribute .PROCESSOR ]:
195+ similarity = compute_jaccard_similarity (self .attrs [attr ], compare_spec .attrs [attr ])
196+ # compare number
197+ elif compare_spec .attrs [attr ] is not None :
198+ similarity = compute_similarity (self .attrs [attr ], compare_spec .attrs [attr ])
199+ if debug :
200+ print (attr , self .attrs [attr ], compare_spec .attrs [attr ], similarity , get_similarity_weight (attr ))
201+ total_similarity += (similarity * get_similarity_weight (attr ))
202+ if total_similarity > 1 :
203+ total_similarity = 1
204+ return total_similarity
205+
184206 def __str__ (self ):
185207 out_str = ""
186208 for attr in NodeAttribute :
@@ -218,7 +240,7 @@ def index_train_machine(self, machine_id, new_spec):
218240 if not new_spec .complete_info ():
219241 print ("Machine info not completed: " , str (new_spec ))
220242 return - 1
221- covered_index = self .get_node_type (new_spec )
243+ covered_index , _ , _ = self .get_node_type (new_spec )
222244 if covered_index == - 1 :
223245 covered_index = 0
224246 if len (self .node_type_index .keys ()) > 0 :
@@ -227,13 +249,31 @@ def index_train_machine(self, machine_id, new_spec):
227249 self .node_type_index [covered_index ].add_member (machine_id )
228250 return covered_index
229251
230- def get_node_type (self , compare_spec ):
252+ def get_node_type (self , in_spec : NodeTypeSpec , loose_search : bool = False ):
231253 if len (self .node_type_index ) == 0 :
232- return - 1
233- for index , node_type_spec in self .node_type_index .items ():
234- if node_type_spec .cover (compare_spec ):
235- return index
236- return - 1
254+ return - 1 , - 1 , - 1
255+ compare_spec = in_spec .copy ()
256+ num_of_none = get_num_of_none (compare_spec )
257+ similarity_map , max_similarity , most_similar_index , has_candidate , candidate_uncertain_attribute_freq , candidate_uncertain_attribute_total = self ._find_candidates (in_spec , loose_search )
258+ if max_similarity == 1 :
259+ return most_similar_index , 0 , 0
260+ if has_candidate :
261+ # covered
262+ candidate_score = get_candidate_score (candidate_uncertain_attribute_freq , candidate_uncertain_attribute_total )
263+ best_candidate_index , max_score = find_best_candidate (candidate_score )
264+ uncertainty = compute_uncertainty (max_score , num_of_none )
265+ return best_candidate_index , uncertainty , 0
266+ elif loose_search :
267+ if most_similar_index != - 1 :
268+ candidate_uncertain_attribute_freq , candidate_uncertain_attribute_total , num_of_none = self ._loose_search (compare_spec , similarity_map , max_similarity , most_similar_index , candidate_uncertain_attribute_freq , candidate_uncertain_attribute_total )
269+ candidate_score = get_candidate_score (candidate_uncertain_attribute_freq , candidate_uncertain_attribute_total )
270+ logger .debug (f"candidate score: { candidate_score } " )
271+ most_similar_score = candidate_score [most_similar_index ]
272+ uncertainty = compute_uncertainty (most_similar_score , num_of_none )
273+ if max_similarity != - 1 :
274+ looseness = compute_looseness (max_similarity )
275+ return most_similar_index , uncertainty , looseness
276+ return - 1 , - 1 , - 1
237277
238278 def get_json (self ):
239279 json_obj = dict ()
@@ -251,3 +291,68 @@ def copy(self):
251291 for node_type in removed_items :
252292 del node_collection .node_type_index [node_type ]
253293 return node_collection
294+
295+ def _find_candidates (self , compare_spec , loose_search = False ):
296+ """
297+ This function returns most similar node_type index.
298+ - similarity value for the compare_spec to each node_type in collection index will be computed
299+ - among candidates with similarity value, the most frequently-found node_type will be selected
300+ - loose_search flag allows adding candidate even if the compare spec is not covered
301+ """
302+ candidate_uncertain_attribute_freq = dict ()
303+ candidate_uncertain_attribute_total = dict ()
304+ most_similar_index = - 1
305+ max_similarity = - 1
306+ most_similar_freq = - 1
307+ completed_info = compare_spec .complete_info ()
308+ has_candidate = False
309+ similarity_map = dict ()
310+ for attr in NodeAttribute :
311+ candidate_uncertain_attribute_freq [attr ] = []
312+ candidate_uncertain_attribute_total [attr ] = 0
313+ for index , node_type_spec in self .node_type_index .items ():
314+ freq = node_type_spec .get_size ()
315+ if loose_search :
316+ similarity = node_type_spec .get_similarity (compare_spec )
317+ similarity_map [index ] = similarity
318+ if similarity > max_similarity or (similarity == max_similarity and most_similar_freq < freq ):
319+ most_similar_index = index
320+ max_similarity = similarity
321+ most_similar_freq = freq
322+ logger .debug (f"{ index } - { node_type_spec } : { similarity } " )
323+ if node_type_spec .cover (compare_spec ):
324+ if completed_info :
325+ return similarity_map , 1 , index , has_candidate , candidate_uncertain_attribute_freq , candidate_uncertain_attribute_total
326+ else :
327+ for attr in NodeAttribute :
328+ if compare_spec .attrs [attr ] is None :
329+ candidate_uncertain_attribute_freq [attr ] += [(index , freq )]
330+ candidate_uncertain_attribute_total [attr ] += freq
331+ has_candidate = True
332+ return similarity_map , max_similarity , most_similar_index , has_candidate , candidate_uncertain_attribute_freq , candidate_uncertain_attribute_total
333+
334+ def _loose_search (self , compare_spec , similarity_map , max_similarity , most_similar_index , candidate_uncertain_attribute_freq , candidate_uncertain_attribute_total ):
335+ """
336+ This function tries loosing the attribute that doesn't match the spec with maximum similarility and recompute uncertainty value of selection.
337+ """
338+ num_of_none = get_num_of_none (compare_spec )
339+ most_similar_spec = self .node_type_index [most_similar_index ]
340+ # remove uncovered spec
341+ for attr in NodeAttribute :
342+ if compare_spec .attrs [attr ] != most_similar_spec .attrs [attr ]:
343+ logger .debug (f"Loosen { attr } ({ compare_spec .attrs [attr ]} -->{ most_similar_spec .attrs [attr ]} )" )
344+ compare_spec .attrs [attr ] = None
345+ num_of_none += 1
346+ # find uncertainty
347+ for index , node_type_spec in self .node_type_index .items ():
348+ if node_type_spec .cover (compare_spec ):
349+ similarity = similarity_map [index ]
350+ freq = node_type_spec .get_size ()
351+ if similarity == max_similarity and freq > self .node_type_index [most_similar_index ].get_size ():
352+ logger .debug (f"change most similar index from { most_similar_index } to { index } " )
353+ most_similar_index = index
354+ for attr in NodeAttribute :
355+ if compare_spec .attrs [attr ] is None :
356+ candidate_uncertain_attribute_freq [attr ] += [(index , freq )]
357+ candidate_uncertain_attribute_total [attr ] += freq
358+ return candidate_uncertain_attribute_freq , candidate_uncertain_attribute_total , num_of_none
0 commit comments