@@ -1345,15 +1345,37 @@ def make_torch_dataset_for_loader(split_data, split_labels):
13451345 name = "Deploy_Model_GitHub" ,
13461346 return_values = ["deployment_status" ],
13471347 cache = False ,
1348- packages = ["clearml" ]
1348+ packages = ["clearml" , "pymongo" , "torch" , "gridfs" ]
13491349)
13501350def deploy_model_github (
13511351 best_model_id : str ,
1352+ best_model_path : str ,
13521353 test_accuracy : float ,
1353- min_accuracy_threshold : float = 85.0
1354+ min_accuracy_threshold : float = 85.0 ,
1355+ mongo_uri : str = None
13541356):
1355- """Deploy the best model if it meets accuracy threshold."""
1357+ """Deploy the best model if it meets accuracy threshold and save to MongoDB ."""
13561358 from clearml import Model , Task
1359+ import os
1360+ import torch
1361+ import json
1362+ import logging
1363+ import shutil
1364+ import sys
1365+
1366+ # Add the current directory to the path for importing local modules
1367+ current_dir = os .path .dirname (os .path .abspath (__file__ ))
1368+ if current_dir not in sys .path :
1369+ sys .path .append (current_dir )
1370+
1371+ # Try to import the mongodb_model_distribution module
1372+ try :
1373+ from mongodb_model_distribution import GuardianModelDistribution
1374+ has_model_distribution = True
1375+ print ("✅ Found mongodb_model_distribution module" )
1376+ except ImportError :
1377+ has_model_distribution = False
1378+ print ("⚠️ mongodb_model_distribution module not found. Will use basic MongoDB storage." )
13571379
13581380 task = Task .init (
13591381 project_name = "Guardian_Training" ,
@@ -1385,20 +1407,153 @@ def deploy_model_github(
13851407 print (f"⚠️ Could not add tags: { tag_error } " )
13861408 # Continue anyway - tags are not critical
13871409
1410+ # Get the best task to retrieve hyperparameters
1411+ best_task = Task .get_task (task_id = model .task )
1412+ if not best_task :
1413+ print ("⚠️ Could not retrieve task for model hyperparameters" )
1414+ hyperparams = {}
1415+ else :
1416+ hyperparams = best_task .get_parameters ()
1417+ print (f"📋 Retrieved hyperparameters from task { best_task .id } " )
1418+
13881419 # Update model metadata
13891420 try :
13901421 model .update_design (config_dict = {
13911422 "deployment_status" : "deployed" ,
13921423 "test_accuracy" : test_accuracy ,
13931424 "deployment_date" : str (task .created ),
13941425 "deployment_threshold" : min_accuracy_threshold ,
1395- "deployed_by" : "GitHub Actions"
1426+ "deployed_by" : "GitHub Actions" ,
1427+ "mongodb_stored" : False # Will update if MongoDB storage succeeds
13961428 })
13971429 print (f"📋 Updated model metadata" )
13981430 except Exception as metadata_error :
13991431 print (f"⚠️ Could not update metadata: { metadata_error } " )
14001432 # Continue anyway - metadata is not critical
14011433
1434+ # MongoDB integration - Store model weights and hyperparameters
1435+ if mongo_uri :
1436+ try :
1437+ print (f"🔄 Connecting to MongoDB for model storage..." )
1438+
1439+ # Ensure the model path exists
1440+ if not os .path .exists (best_model_path ):
1441+ model_path = model .get_local_copy ()
1442+ print (f"📥 Model weights downloaded to { model_path } " )
1443+ else :
1444+ model_path = best_model_path
1445+ print (f"📄 Using existing model weights at { model_path } " )
1446+
1447+ # Load model to extract architecture
1448+ try :
1449+ checkpoint = torch .load (model_path , map_location = 'cpu' )
1450+ print (f"✅ Model weights loaded successfully!" )
1451+ except Exception as e :
1452+ print (f"⚠️ Error loading model weights: { e } " )
1453+ checkpoint = {}
1454+
1455+ # Create model name with timestamp and accuracy
1456+ model_name = f"guardian_model_{ best_model_id [:8 ]} _{ int (test_accuracy )} "
1457+
1458+ # Prepare model metadata for distribution
1459+ model_metadata = {
1460+ "model_id" : best_model_id ,
1461+ "test_accuracy" : float (test_accuracy ),
1462+ "deployment_date" : str (task .created ),
1463+ "training_task_id" : str (best_task .id ) if best_task else "unknown" ,
1464+ "architecture" : model .get_model_design () or {},
1465+ "hyperparameters" : hyperparams ,
1466+ "checkpoint_keys" : list (checkpoint .keys ()) if checkpoint else [],
1467+ "input_size" : hyperparams .get ("General/input_size" , {}).get ("value" , 34 ),
1468+ "hidden_size" : hyperparams .get ("General/hidden_size" , {}).get ("value" , 256 ),
1469+ "num_layers" : hyperparams .get ("General/num_layers" , {}).get ("value" , 4 ),
1470+ "num_classes" : hyperparams .get ("General/num_classes" , {}).get ("value" , 3 ),
1471+ "framework" : "PyTorch" ,
1472+ "model_type" : "BiLSTM_ActionRecognition" ,
1473+ "description" : "Guardian AI Action Recognition Model"
1474+ }
1475+
1476+ # Use the GuardianModelDistribution class if available
1477+ if has_model_distribution :
1478+ print ("🔄 Using GuardianModelDistribution for model storage..." )
1479+ distributor = GuardianModelDistribution (uri = mongo_uri )
1480+
1481+ if distributor .connect ():
1482+ # Upload model using the distribution system
1483+ result = distributor .upload_model (
1484+ model_path = model_path ,
1485+ model_metadata = model_metadata ,
1486+ model_name = model_name
1487+ )
1488+
1489+ if result :
1490+ print (f"🗃️ Model uploaded to distribution system:" )
1491+ print (f" Model Name: { result ['model_name' ]} " )
1492+ print (f" Document ID: { result ['document_id' ]} " )
1493+ print (f" Download Command: { result ['download_command' ]} " )
1494+
1495+ # Update model metadata to reflect MongoDB storage
1496+ model .update_design (config_dict = {"mongodb_stored" : True })
1497+ else :
1498+ print ("❌ Failed to upload model to distribution system" )
1499+ else :
1500+ print ("❌ Failed to connect to MongoDB distribution system" )
1501+ else :
1502+ # Fallback to basic MongoDB storage
1503+ from pymongo import MongoClient
1504+ import gridfs
1505+
1506+ # Connect to MongoDB
1507+ client = MongoClient (mongo_uri )
1508+ db = client .guardian_models
1509+ fs = gridfs .GridFS (db )
1510+
1511+ # Store the model weights
1512+ with open (model_path , 'rb' ) as f :
1513+ weights_file_id = fs .put (
1514+ f ,
1515+ filename = f"{ model_name } .pth" ,
1516+ metadata = {
1517+ "model_id" : best_model_id ,
1518+ "accuracy" : float (test_accuracy ),
1519+ "deployment_date" : str (task .created )
1520+ }
1521+ )
1522+
1523+ # Prepare model metadata and hyperparameters
1524+ model_info = {
1525+ "model_name" : model_name ,
1526+ "model_id" : best_model_id ,
1527+ "test_accuracy" : float (test_accuracy ),
1528+ "weights_file_id" : weights_file_id ,
1529+ "hyperparameters" : hyperparams ,
1530+ "deployment_date" : str (task .created ),
1531+ "deployment_status" : "deployed" ,
1532+ "architecture" : model .get_model_design () or {},
1533+ "checkpoint_keys" : list (checkpoint .keys ()) if checkpoint else [],
1534+ "file_size_mb" : os .path .getsize (model_path ) / (1024 * 1024 ),
1535+ "status" : "available" ,
1536+ "download_count" : 0 ,
1537+ "uploaded_at" : str (task .created ),
1538+ "file_id" : weights_file_id
1539+ }
1540+
1541+ # Store model metadata
1542+ db .model_metadata .insert_one (model_info )
1543+
1544+ print (f"🗃️ Model weights and metadata saved to MongoDB" )
1545+ print (f" Model Name: { model_name } " )
1546+ print (f" File Size: { model_info ['file_size_mb' ]:.2f} MB" )
1547+
1548+ # Update model metadata to reflect MongoDB storage
1549+ model .update_design (config_dict = {"mongodb_stored" : True })
1550+
1551+ except Exception as mongo_error :
1552+ print (f"❌ MongoDB storage error: { mongo_error } " )
1553+ logger .report_text (f"MongoDB storage failed: { mongo_error } " )
1554+ else :
1555+ print ("ℹ️ MongoDB URI not provided, skipping database storage" )
1556+
14021557 logger .report_scalar ("Deployment" , "Status" , 1 , 0 ) # 1 = deployed
14031558 logger .report_scalar ("Deployment" , "Test_Accuracy" , test_accuracy , 0 )
14041559
@@ -1440,6 +1595,13 @@ def guardian_github_pipeline():
14401595 dataset_name = "Guardian_Dataset"
14411596 dataset_project = "Guardian_Training"
14421597
1598+ # Get MongoDB URI from environment variable
1599+ mongo_uri = os .environ .get ("MONGODB_URI" , None )
1600+ if mongo_uri :
1601+ logging .info ("MongoDB URI configured for model storage" )
1602+ else :
1603+ logging .warning ("MongoDB URI not found in environment variables. Models will not be stored in MongoDB." )
1604+
14431605 # Multiple path options for your self-hosted runner
14441606 possible_paths = [
14451607 # Your absolute dataset path
@@ -1506,6 +1668,49 @@ def guardian_github_pipeline():
15061668 )
15071669 logging .info (f"HPO completed. Best task ID: { best_task_id } , Best model ID: { best_model_id } " )
15081670
1671+ # Get the best model path from ClearML
1672+ try :
1673+ from clearml import Model
1674+ logging .info (f"Retrieving best model with ID: { best_model_id } " )
1675+
1676+ # Create a specific path for the best model that includes the model ID
1677+ best_model_filename = f"best_bilstm_github_{ best_model_id } .pth"
1678+ best_model_path = os .path .join (os .getcwd (), best_model_filename )
1679+
1680+ # Check if we already have this specific model
1681+ if os .path .exists (best_model_path ):
1682+ logging .info (f"Best model already exists at { best_model_path } " )
1683+ else :
1684+ # Download the model from ClearML by ID
1685+ best_model = Model (model_id = best_model_id )
1686+ downloaded_path = best_model .get_local_copy ()
1687+
1688+ # If the downloaded path is different from our desired path, copy it
1689+ if downloaded_path != best_model_path :
1690+ shutil .copy2 (downloaded_path , best_model_path )
1691+ logging .info (f"Copied best model from { downloaded_path } to { best_model_path } " )
1692+ else :
1693+ logging .info (f"Downloaded best model to { best_model_path } " )
1694+
1695+ # Verify the model file exists and has content
1696+ if not os .path .exists (best_model_path ) or os .path .getsize (best_model_path ) == 0 :
1697+ logging .error (f"Best model file is missing or empty at { best_model_path } " )
1698+ raise FileNotFoundError (f"Best model file not found: { best_model_path } " )
1699+
1700+ # Verify model architecture by loading it
1701+ try :
1702+ import torch
1703+ checkpoint = torch .load (best_model_path , map_location = 'cpu' )
1704+ logging .info (f"Successfully verified model file integrity. Model contains { len (checkpoint )} keys." )
1705+ except Exception as e :
1706+ logging .error (f"Failed to load model for verification: { e } " )
1707+ best_model = Model (model_id = best_model_id )
1708+ best_model_path = best_model .get_local_copy ()
1709+ logging .warning (f"Re-downloaded model to { best_model_path } after verification failure" )
1710+ except Exception as e :
1711+ logging .error (f"Failed to retrieve best model: { e } " )
1712+ best_model_path = "" # Empty string if model download fails
1713+
15091714 # Step 5: Evaluate best model
15101715 logging .info ("Starting model evaluation..." )
15111716 test_accuracy = evaluate_model_github (
@@ -1523,8 +1728,10 @@ def guardian_github_pipeline():
15231728 try :
15241729 deployment_status = deploy_model_github (
15251730 best_model_id = best_model_id ,
1731+ best_model_path = best_model_path ,
15261732 test_accuracy = accuracy_value ,
1527- min_accuracy_threshold = 85.0 # Deploy if accuracy >= 85%
1733+ min_accuracy_threshold = 85.0 , # Deploy if accuracy >= 85%
1734+ mongo_uri = mongo_uri
15281735 )
15291736 logging .info (f"Deployment completed. Status: { deployment_status } " )
15301737 except Exception as e :
0 commit comments