88import torch
99import yaml
1010from torch import amp , nn , optim
11- from torch .utils .data import DataLoader
1211
13- # Assumendo che la struttura dei package sia corretta rispetto alla root
14- from src .common import DEVICE , get_dataloader , get_model
15-
16-
17- def find_best_model (models_list : list [str ], base_path : Path = Path ("." )) -> str :
18- """Identify the model with the highest top1 metric.
19-
20- Args:
21- models_list: List of model names to check.
22- base_path: Directory containing model folders.
23-
24- Returns:
25- The name of the best model.
26-
27- """
28- best_top1 = - 1.0
29- best_model_name = ""
30-
31- for model_name in models_list :
32- metrics_path = base_path / model_name / "metrics.json"
33- if metrics_path .exists ():
34- with metrics_path .open ("r" ) as f :
35- data = json .load (f )
36- if data ["top1" ] > best_top1 :
37- best_top1 = data ["top1" ]
38- best_model_name = model_name
39-
40- if not best_model_name :
41- raise FileNotFoundError (
42- "No valid metrics.json found to determine the best model."
43- )
44-
45- return best_model_name
46-
47-
48- def validate (model : nn .Module , loader : DataLoader , criterion : nn .Module ) -> float :
49- """Calculate average loss on the validation set.
50-
51- Args:
52- model: The neural network model.
53- loader: DataLoader for validation.
54- criterion: Loss function.
55-
56- Returns:
57- Average validation loss.
58-
59- """
60- model .eval ()
61- running_loss = 0.0
62- with torch .no_grad ():
63- for images , targets in loader :
64- images , targets = images .to (DEVICE ), targets .to (DEVICE )
65- outputs = model (images )
66- loss = criterion (outputs , targets )
67- running_loss += loss .item () * images .size (0 )
68- return running_loss / len (loader .dataset )
69-
70-
71- def train_one_epoch (
72- model : nn .Module ,
73- loader : DataLoader ,
74- criterion : nn .Module ,
75- optimizer : optim .Optimizer ,
76- scaler : amp .GradScaler ,
77- ) -> float :
78- """Run one fine-tuning epoch using AMP.
79-
80- Args:
81- model: The neural network model.
82- loader: DataLoader for the training set.
83- criterion: Loss function.
84- optimizer: Optimizer.
85- scaler: GradScaler for AMP.
86-
87- Returns:
88- Average training loss.
89-
90- """
91- model .train ()
92- running_loss = 0.0
93-
94- for images , targets in loader :
95- images , targets = images .to (DEVICE ), targets .to (DEVICE )
96- optimizer .zero_grad ()
97-
98- with amp .autocast (device_type = "cuda" if torch .cuda .is_available () else "cpu" ):
99- outputs = model (images )
100- loss = criterion (outputs , targets )
101-
102- scaler .scale (loss ).backward ()
103- scaler .step (optimizer )
104- scaler .update ()
105-
106- running_loss += loss .item () * images .size (0 )
107-
108- return running_loss / len (loader .dataset )
12+ from src .common import DEVICE , get_dataloader , get_model , train_epoch , validate
10913
11014
11115def main () -> None :
@@ -120,54 +24,55 @@ def main() -> None:
12024 with open (args .config ) as conf_file :
12125 config : dict [str , Any ] = yaml .safe_load (conf_file )
12226
123- # 2. Setup directory di output (sovrascriviamo o creiamo una cartella fine_tuned)
12427 out_dir = Path (args .output_dir )
12528 out_dir .mkdir (parents = True , exist_ok = True )
12629
127- # 3. Caricamento dati (Phase 2 per fine-tuning)
128- # Nota: Assumiamo che phase2 sia la directory dei dati di training bilanciati
129- train_loader = get_dataloader (
30+
31+ t_loader = get_dataloader (
13032 data_path = Path (config ["finetuning" ]["data_path" ]),
13133 batch_size = config ["finetuning" ]["batch_size" ],
13234 )
13335
134- # Usiamo il set di validazione originale per il monitoraggio
135- val_loader = get_dataloader (
36+ v_loader = get_dataloader (
13637 data_path = Path (config ["data" ]["valset_path" ]),
13738 batch_size = config ["finetuning" ]["batch_size" ],
13839 )
13940
140- # 4. Inizializzazione modello e caricamento pesi precedenti
141- model = get_model (args .model , len (train_loader .dataset .classes ))
41+ # Model initialization loading first stage's weights
42+ model = get_model (args .model , len (t_loader .dataset .classes ))
14243 weights_path = out_dir .parent / "model.pth"
143- model .load_state_dict (torch .load (weights_path , map_location = DEVICE ))
144- model .to (DEVICE )
44+ model .load_state_dict (torch .load (weights_path , map_location = DEVICE )).to (DEVICE )
14545
146- # 5. Configurazione training
147- # Per il fine-tuning si usa solitamente un Learning Rate più basso (es. 1e-5 o 1e-4)
46+ # Unfreeze layers
47+ for param in model .parameters ():
48+ param .requires_grad = True
14849
50+ # Fine-tuning setup
14951 criterion = nn .CrossEntropyLoss ()
15052 optimizer = optim .Adam (model .parameters (), lr = config ["finetuning" ]["lr" ])
15153 scaler = amp .GradScaler ()
15254
55+ # Model fine-tuning
15356 history = []
154-
155- # 6. Loop di fine-tuning
15657 epochs = config ["finetuning" ]["epochs" ]
58+ print (f"Fine-tuning { args .model } ..." )
15759 for epoch in range (epochs ):
158- t_loss = train_one_epoch (model , train_loader , criterion , optimizer , scaler )
159- v_loss = validate (model , val_loader , criterion )
60+ t_loss = train_epoch (model , t_loader , criterion , optimizer , scaler )
61+ v_loss = validate (model , v_loader , criterion )
16062
16163 history .append ({"epoch" : epoch + 1 , "train_loss" : t_loss , "val_loss" : v_loss })
16264
16365 print (
16466 f"Epoch { epoch + 1 } /{ epochs } | "
165- f"FT Train Loss: { t_loss :.4f} | "
166- f"FT Val Loss: { v_loss :.4f} "
67+ f"T- Loss: { t_loss :.4f} | "
68+ f"V- Loss: { v_loss :.4f} "
16769 )
70+ print (f"Model { args .model } fine-tuned successfully!" )
16871
169- # 7. Salvataggio artefatti
72+ # Saving the model
17073 torch .save (model .state_dict (), out_dir / "model.pth" )
74+
75+ # Saving training and validation loss in loss.json file
17176 with open (out_dir / "loss.json" , "w" ) as f :
17277 json .dump (history , f , indent = 4 )
17378
0 commit comments