1010
1111from kepler_model .util .train_types import get_valid_feature_groups , ModelOutputType , FeatureGroups , FeatureGroup , PowerSourceMap , weight_support_trainers
1212from kepler_model .util .config import getConfig , model_toppath , ERROR_KEY , MODEL_SERVER_MODEL_REQ_PATH , MODEL_SERVER_MODEL_LIST_PATH , initial_pipeline_urls , download_path
13- from kepler_model .util .loader import parse_filters , is_valid_model , load_json , load_weight , get_model_group_path , get_archived_file , METADATA_FILENAME , CHECKPOINT_FOLDERNAME , get_pipeline_path , any_node_type , is_matched_type , get_largest_candidates
13+ from kepler_model .util .loader import parse_filters , is_valid_model , load_json , load_weight , get_model_group_path , get_archived_file , METADATA_FILENAME , CHECKPOINT_FOLDERNAME , get_pipeline_path , any_node_type , is_matched_type , get_largest_candidates , default_pipelines , get_node_type_from_name
1414from kepler_model .util .saver import WEIGHT_FILENAME
1515from kepler_model .train import NodeTypeSpec , NodeTypeIndexCollection
1616
@@ -65,10 +65,26 @@ def __init__(self, metrics, output_type, source="rapl-sysfs", node_type=-1, weig
6565"""
6666
6767
68- def select_best_model (spec , valid_groupath , filters , energy_source , pipeline_name = "" , trainer_name = "" , node_type = any_node_type , weight = False ):
69- model_names = [f for f in os .listdir (valid_groupath ) if f != CHECKPOINT_FOLDERNAME and not os .path .isfile (os .path .join (valid_groupath , f )) and (trainer_name == "" or trainer_name in f )]
68+ def select_best_model (spec , valid_grouppath , filters , energy_source , pipeline_name = "" , trainer_name = "" , node_type = any_node_type , weight = False , loose_node_type = True ):
69+ # Find initial model list filtered by trainer
70+ initial_model_names = [f for f in os .listdir (valid_grouppath ) if f != CHECKPOINT_FOLDERNAME and not os .path .isfile (os .path .join (valid_grouppath , f )) and (trainer_name == "" or trainer_name in f )]
71+ if pipeline_name == "" and energy_source in default_pipelines :
72+ pipeline_name = default_pipelines [energy_source ]
73+
74+ if node_type != any_node_type :
75+ model_names = [name for name in initial_model_names if "_{}" .format (node_type ) in name ]
76+ if len (model_names ) == 0 :
77+ if not loose_node_type :
78+ return None , None
79+ logger .warning (f"{ valid_grouppath } has no matched model for node type={ node_type } , try all available models" )
80+ model_names = initial_model_names
81+ else :
82+ model_names = initial_model_names
83+
84+ # Filter weight models
7085 if weight :
71- model_names = [name for name in model_names if name .split ("_" )[0 ] in weight_support_trainers ]
86+ candidates = [name for name in model_names if name .split ("_" )[0 ] in weight_support_trainers ]
87+
7288 # Load metadata of trainers
7389 best_cadidate = None
7490 best_response = None
@@ -85,7 +101,7 @@ def select_best_model(spec, valid_groupath, filters, energy_source, pipeline_nam
85101 logger .warn ("no large candidates, select from all availables" )
86102 candidates = model_names
87103 for model_name in candidates :
88- model_savepath = os .path .join (valid_groupath , model_name )
104+ model_savepath = os .path .join (valid_grouppath , model_name )
89105 metadata = load_json (model_savepath , METADATA_FILENAME )
90106 if metadata is None or not is_valid_model (metadata , filters ) or ERROR_KEY not in metadata :
91107 # invalid metadata
@@ -98,7 +114,7 @@ def select_best_model(spec, valid_groupath, filters, energy_source, pipeline_nam
98114 logger .warn ("weight failed: %s" , model_savepath )
99115 continue
100116 else :
101- response = get_archived_file (valid_groupath , model_name )
117+ response = get_archived_file (valid_grouppath , model_name )
102118 if not os .path .exists (response ):
103119 # archived model file does not exists
104120 logger .warn ("archive failed: %s" , response )
@@ -130,20 +146,36 @@ def get_model():
130146 output_type = ModelOutputType [req .output_type ]
131147 best_model = None
132148 best_response = None
149+ best_uncertainty = None
150+ best_looseness = None
133151 # find best model comparing best candidate from each valid feature group complied with filtering conditions
134152 for fg in valid_fgs :
135- valid_groupath = get_model_group_path (model_toppath , output_type , fg , energy_source , pipeline_name = pipelineName [energy_source ])
153+ pipeline_name = pipelineName [energy_source ]
154+ valid_groupath = get_model_group_path (model_toppath , output_type , fg , energy_source , pipeline_name = pipeline_name )
155+ node_type = req .node_type
156+ if req .node_type == any_node_type and req .spec is not None and not req .spec .is_none () and pipeline_name in nodeCollection :
157+ node_type , uncertainty , looseness = nodeCollection [pipeline_name ].get_node_type (req .spec , loose_search = True )
158+ else :
159+ uncertainty = 0
160+ looseness = 0
136161 if os .path .exists (valid_groupath ):
137- best_candidate , response = select_best_model (req .spec , valid_groupath , filters , energy_source , req .pipeline_name , req .trainer_name , req . node_type , req .weight )
162+ best_candidate , response = select_best_model (req .spec , valid_groupath , filters , energy_source , req .pipeline_name , req .trainer_name , node_type , req .weight )
138163 if best_candidate is None :
139164 continue
165+ if node_type != any_node_type and best_model is not None and get_node_type_from_name (best_model ['model_name' ]) == node_type :
166+ if get_node_type_from_name (best_candidate ['model_name' ]) != node_type :
167+ continue
140168 if best_model is None or best_model [ERROR_KEY ] > best_candidate [ERROR_KEY ]:
141169 best_model = best_candidate
142170 best_response = response
171+ best_uncertainty = uncertainty
172+ best_looseness = looseness
173+ logger .info (f"response: model { best_model ['model_name' ]} by { best_model ['features' ]} with { ERROR_KEY } ={ best_model [ERROR_KEY ]} selected with uncertainty={ best_uncertainty } , looseness={ best_looseness } " )
143174 if best_model is None :
144175 return make_response ("cannot find model for {} at the moment" .format (model_request ), 400 )
145176 if req .weight :
146177 try :
178+ best_response ["model_name" ] = best_model ['model_name' ]
147179 response = app .response_class (response = json .dumps (best_response ), status = 200 , mimetype = "application/json" )
148180 return response
149181 except ValueError as err :
@@ -154,13 +186,13 @@ def get_model():
154186 except ValueError as err :
155187 return make_response ("send archived model error: {}" .format (err ), 400 )
156188
157-
158189# get_available_models: return name list of best-candidate pipelines
159190@app .route (MODEL_SERVER_MODEL_LIST_PATH , methods = ["GET" ])
160191def get_available_models ():
161192 fg = request .args .get ("fg" )
162193 ot = request .args .get ("ot" )
163194 energy_source = request .args .get ("source" )
195+ node_type = request .args .get ("type" )
164196 filter = request .args .get ("filter" )
165197
166198 try :
@@ -181,21 +213,27 @@ def get_available_models():
181213 filters = dict ()
182214 else :
183215 filters = parse_filters (filter )
216+ if node_type is None :
217+ node_type = - 1
218+ else :
219+ node_type = int (node_type )
184220
185221 model_names = dict ()
186222 for output_type in output_types :
223+ logger .debug (f"Searching output type { output_type } " )
187224 model_names [output_type .name ] = dict ()
188225 for fg in valid_fgs :
226+ logger .debug (f"Searching feature group { fg } " )
189227 valid_groupath = get_model_group_path (model_toppath , output_type , fg , energy_source , pipeline_name = pipelineName [energy_source ])
190228 if os .path .exists (valid_groupath ):
191- best_candidate , _ = select_best_model (None , valid_groupath , filters , energy_source )
229+ best_candidate , _ = select_best_model (None , valid_groupath , filters , energy_source , node_type = node_type , loose_node_type = False )
192230 if best_candidate is None :
193231 continue
194232 model_names [output_type .name ][fg .name ] = best_candidate ["model_name" ]
195233 response = app .response_class (response = json .dumps (model_names ), status = 200 , mimetype = "application/json" )
196234 return response
197235 except (ValueError , Exception ) as err :
198- return make_response ("failed to get best model list: {}" . format ( err ) , 400 )
236+ return make_response (f "failed to get best model list: { err } " , 400 )
199237
200238
201239# upack_zip_files: unpack all model.zip files to model folder and copy model.json to model/weight.zip
0 commit comments