@@ -1233,27 +1233,116 @@ def __getitem__(self, idx):
12331233 best_task = Task .get_task (task_id = best_task_id )
12341234 best_model = Model (model_id = best_model_id )
12351235
1236- # Get hyperparameters from best task
1237- best_params = best_task .get_parameters ()
1236+ print (f"🔍 Analyzing best model architecture..." )
12381237
1239- # Handle different parameter formats
1240- def safe_get_param (params , key , default , param_type ):
1238+ # Get model weights first to dynamically detect architecture
1239+ model_path = best_model .get_local_copy ()
1240+ checkpoint = torch .load (model_path , map_location = 'cpu' )
1241+
1242+ # Dynamically infer all architecture parameters from checkpoint
1243+ def infer_architecture_from_checkpoint (checkpoint ):
1244+ """Dynamically infer model architecture from saved weights."""
1245+ config = {}
1246+
12411247 try :
1242- value = params .get (key , default )
1243- if isinstance (value , dict ) and 'value' in value :
1244- value = value ['value' ]
1245- return param_type (value )
1246- except (ValueError , TypeError ):
1247- return param_type (default )
1248-
1249- hidden_size = safe_get_param (best_params , 'General/hidden_size' , 256 , int )
1250- num_layers = safe_get_param (best_params , 'General/num_layers' , 4 , int )
1251- dropout_rate = safe_get_param (best_params , 'General/dropout_rate' , 0.1 , float )
1252- use_layer_norm = safe_get_param (best_params , 'General/use_layer_norm' , False , bool )
1253- attention_dropout = safe_get_param (best_params , 'General/attention_dropout' , 0.1 , float )
1254- batch_size = safe_get_param (best_params , 'General/batch_size' , 32 , int )
1255-
1256- print (f"Evaluating model with: hidden_size={ hidden_size } , num_layers={ num_layers } " )
1248+ # Infer hidden_size from LSTM weights
1249+ lstm_weight_shape = checkpoint ['lstm.weight_ih_l0' ].shape
1250+ config ['hidden_size' ] = lstm_weight_shape [0 ] // 4 # LSTM has 4 gates
1251+
1252+ # Infer num_layers by counting LSTM layer weights
1253+ config ['num_layers' ] = 0
1254+ layer_idx = 0
1255+ while f'lstm.weight_ih_l{ layer_idx } ' in checkpoint :
1256+ config ['num_layers' ] += 1
1257+ layer_idx += 1
1258+
1259+ # Check if layer normalization is used
1260+ config ['use_layer_norm' ] = 'layer_norm.weight' in checkpoint
1261+
1262+ # Infer input_size from first LSTM layer
1263+ config ['input_size' ] = checkpoint ['lstm.weight_ih_l0' ].shape [1 ]
1264+
1265+ # Infer num_classes from final layer
1266+ config ['num_classes' ] = checkpoint ['fc.weight' ].shape [0 ]
1267+
1268+ # Check if attention dropout layer exists (harder to detect, use default)
1269+ config ['attention_dropout' ] = 0.1 # Default, hard to infer from weights
1270+
1271+ # Dropout rate is hard to infer from weights, use default
1272+ config ['dropout_rate' ] = 0.1 # Default, hard to infer from weights
1273+
1274+ print (f"✅ Inferred architecture:" )
1275+ print (f" - hidden_size: { config ['hidden_size' ]} " )
1276+ print (f" - num_layers: { config ['num_layers' ]} " )
1277+ print (f" - input_size: { config ['input_size' ]} " )
1278+ print (f" - num_classes: { config ['num_classes' ]} " )
1279+ print (f" - use_layer_norm: { config ['use_layer_norm' ]} " )
1280+ print (f" - dropout_rate: { config ['dropout_rate' ]} (default)" )
1281+ print (f" - attention_dropout: { config ['attention_dropout' ]} (default)" )
1282+
1283+ return config
1284+
1285+ except Exception as e :
1286+ print (f"❌ Error inferring architecture: { e } " )
1287+ return None
1288+
1289+ # Try to infer architecture from checkpoint
1290+ inferred_config = infer_architecture_from_checkpoint (checkpoint )
1291+
1292+ if inferred_config :
1293+ # Use inferred configuration
1294+ hidden_size = inferred_config ['hidden_size' ]
1295+ num_layers = inferred_config ['num_layers' ]
1296+ dropout_rate = inferred_config ['dropout_rate' ]
1297+ use_layer_norm = inferred_config ['use_layer_norm' ]
1298+ attention_dropout = inferred_config ['attention_dropout' ]
1299+ input_size = inferred_config ['input_size' ] # Override with actual
1300+ num_classes = inferred_config ['num_classes' ] # Override with actual
1301+ batch_size = 32 # Default for evaluation
1302+
1303+ else :
1304+ print ("⚠️ Could not infer architecture, using fallback methods..." )
1305+
1306+ # Fallback 1: Try model design
1307+ model_design = best_model .get_model_design ()
1308+ if model_design :
1309+ print (f"📋 Using model design: { model_design } " )
1310+ hidden_size = int (model_design .get ('hidden_size' , 256 ))
1311+ num_layers = int (model_design .get ('num_layers' , 4 ))
1312+ dropout_rate = float (model_design .get ('dropout_rate' , 0.1 ))
1313+ use_layer_norm = bool (model_design .get ('use_layer_norm' , False ))
1314+ attention_dropout = float (model_design .get ('attention_dropout' , 0.1 ))
1315+ batch_size = int (model_design .get ('batch_size' , 32 ))
1316+ else :
1317+ print ("📋 Using task parameters..." )
1318+ # Fallback 2: Task parameters
1319+ best_params = best_task .get_parameters ()
1320+
1321+ def safe_get_param (params , key , default , param_type ):
1322+ try :
1323+ value = params .get (key , default )
1324+ if isinstance (value , dict ) and 'value' in value :
1325+ value = value ['value' ]
1326+ return param_type (value )
1327+ except (ValueError , TypeError ):
1328+ return param_type (default )
1329+
1330+ hidden_size = safe_get_param (best_params , 'General/hidden_size' , 256 , int )
1331+ num_layers = safe_get_param (best_params , 'General/num_layers' , 4 , int )
1332+ dropout_rate = safe_get_param (best_params , 'General/dropout_rate' , 0.1 , float )
1333+ use_layer_norm = safe_get_param (best_params , 'General/use_layer_norm' , False , bool )
1334+ attention_dropout = safe_get_param (best_params , 'General/attention_dropout' , 0.1 , float )
1335+ batch_size = safe_get_param (best_params , 'General/batch_size' , 32 , int )
1336+
1337+ print (f"🏗️ Final model configuration:" )
1338+ print (f" - input_size: { input_size } " )
1339+ print (f" - hidden_size: { hidden_size } " )
1340+ print (f" - num_layers: { num_layers } " )
1341+ print (f" - num_classes: { num_classes } " )
1342+ print (f" - dropout_rate: { dropout_rate } " )
1343+ print (f" - use_layer_norm: { use_layer_norm } " )
1344+ print (f" - attention_dropout: { attention_dropout } " )
1345+ print (f" - batch_size: { batch_size } " )
12571346
12581347 # Load dataset
12591348 action_classes = ["Falling" , "No Action" , "Waving" ]
@@ -1274,8 +1363,11 @@ def make_torch_dataset_for_loader(split_data, split_labels):
12741363 test_loader = DataLoader (make_torch_dataset_for_loader (test_data , test_labels ),
12751364 batch_size = batch_size , shuffle = False )
12761365
1277- # Initialize model
1366+ # Initialize model with dynamically detected architecture
12781367 device = torch .device ("cuda" if torch .cuda .is_available () else "cpu" )
1368+
1369+ print (f"🚀 Creating model with detected architecture..." )
1370+
12791371 model = ActionRecognitionBiLSTMWithAttention (
12801372 input_size = input_size ,
12811373 hidden_size = hidden_size ,
@@ -1286,9 +1378,16 @@ def make_torch_dataset_for_loader(split_data, split_labels):
12861378 attention_dropout = attention_dropout
12871379 ).to (device )
12881380
1289- # Load model weights
1290- model_path = best_model .get_local_copy ()
1291- model .load_state_dict (torch .load (model_path , map_location = device ))
1381+ # Load model weights (checkpoint already loaded above)
1382+ try :
1383+ model .load_state_dict (checkpoint )
1384+ print ("✅ Model weights loaded successfully!" )
1385+ except RuntimeError as e :
1386+ print (f"❌ Error loading model weights: { e } " )
1387+ print (f"Model architecture: { model } " )
1388+ print (f"Checkpoint keys: { list (checkpoint .keys ())} " )
1389+ raise
1390+
12921391 model .eval ()
12931392
12941393 # Evaluate
@@ -1368,33 +1467,44 @@ def deploy_model_github(
13681467 print (f"Deploying model { best_model_id } with test accuracy: { test_accuracy :.2f} %" )
13691468
13701469 if test_accuracy >= min_accuracy_threshold :
1371- # Get the model
1372- model = Model (model_id = best_model_id )
1373-
1374- # Publish the model for deployment
1375- model .publish ()
1376-
1377- # Add deployment tags
1378- model .add_tags (["deployed" , "production" , "github-actions" ])
1379-
1380- # Update model metadata
1381- model .update_design (config_dict = {
1382- "deployment_status" : "deployed" ,
1383- "test_accuracy" : test_accuracy ,
1384- "deployment_date" : str (task .created ),
1385- "deployment_threshold" : min_accuracy_threshold ,
1386- "deployed_by" : "GitHub Actions"
1387- })
1388-
1389- logger .report_scalar ("Deployment" , "Status" , 1 , 0 ) # 1 = deployed
1390- logger .report_scalar ("Deployment" , "Test_Accuracy" , test_accuracy , 0 )
1391-
1392- print (f"✅ Model deployed successfully!" )
1393- print (f"📊 Test Accuracy: { test_accuracy :.2f} %" )
1394- print (f"🎯 Threshold: { min_accuracy_threshold } %" )
1395- print (f"🏷️ Model ID: { best_model_id } " )
1396-
1397- return "deployed"
1470+ try :
1471+ # Get the model
1472+ model = Model (model_id = best_model_id )
1473+
1474+ # Publish the model for deployment
1475+ model .publish ()
1476+ print (f"📤 Model published successfully" )
1477+
1478+ # Add deployment tags
1479+ model .add_tags (["deployed" , "production" , "github-actions" ])
1480+ print (f"🏷️ Added deployment tags" )
1481+
1482+ # Update model metadata
1483+ model .update_design (config_dict = {
1484+ "deployment_status" : "deployed" ,
1485+ "test_accuracy" : test_accuracy ,
1486+ "deployment_date" : str (task .created ),
1487+ "deployment_threshold" : min_accuracy_threshold ,
1488+ "deployed_by" : "GitHub Actions"
1489+ })
1490+ print (f"📋 Updated model metadata" )
1491+
1492+ logger .report_scalar ("Deployment" , "Status" , 1 , 0 ) # 1 = deployed
1493+ logger .report_scalar ("Deployment" , "Test_Accuracy" , test_accuracy , 0 )
1494+
1495+ print (f"✅ Model deployed successfully!" )
1496+ print (f"📊 Test Accuracy: { test_accuracy :.2f} %" )
1497+ print (f"🎯 Threshold: { min_accuracy_threshold } %" )
1498+ print (f"🏷️ Model ID: { best_model_id } " )
1499+
1500+ return "deployed"
1501+
1502+ except Exception as e :
1503+ print (f"❌ Error during deployment: { e } " )
1504+ logger .report_scalar ("Deployment" , "Status" , 0 , 0 ) # 0 = failed
1505+ logger .report_scalar ("Deployment" , "Test_Accuracy" , test_accuracy , 0 )
1506+ print (f"⚠️ Model met accuracy threshold but deployment failed" )
1507+ return "deployment_failed"
13981508 else :
13991509 logger .report_scalar ("Deployment" , "Status" , 0 , 0 ) # 0 = not deployed
14001510 logger .report_scalar ("Deployment" , "Test_Accuracy" , test_accuracy , 0 )
@@ -1504,12 +1614,16 @@ def guardian_github_pipeline():
15041614
15051615 # Step 6: Deploy model if it meets threshold
15061616 logging .info ("Starting model deployment..." )
1507- deployment_status = deploy_model_github (
1508- best_model_id = best_model_id ,
1509- test_accuracy = accuracy_value ,
1510- min_accuracy_threshold = 85.0 # Deploy if accuracy >= 85%
1511- )
1512- logging .info (f"Deployment completed. Status: { deployment_status } " )
1617+ try :
1618+ deployment_status = deploy_model_github (
1619+ best_model_id = best_model_id ,
1620+ test_accuracy = accuracy_value ,
1621+ min_accuracy_threshold = 85.0 # Deploy if accuracy >= 85%
1622+ )
1623+ logging .info (f"Deployment completed. Status: { deployment_status } " )
1624+ except Exception as e :
1625+ logging .error (f"Deployment failed with error: { e } " )
1626+ deployment_status = "deployment_error"
15131627
15141628 logging .info ("Guardian GitHub Pipeline finished successfully." )
15151629 return accuracy_value , deployment_status
0 commit comments