THU-MIG · nielseni6 · Oct 15, 2024 · Oct 15, 2024 · Oct 16, 2024 · Oct 21, 2024
diff --git a/.gitignore b/.gitignore
@@ -51,6 +51,7 @@ coverage.xml
 .hypothesis/
 .pytest_cache/
 mlruns/
+figures/
 
 # Translations
 *.mo

diff --git a/run_attribution.py b/run_attribution.py
@@ -0,0 +1,49 @@
+from ultralytics import YOLOv10, YOLO
+# from ultralytics.engine.pgt_trainer import PGTTrainer
+# from ultralytics import BaseTrainer
+# from ultralytics.engine.trainer import BaseTrainer
+import os
+from ultralytics.models.yolo.segment import PGTSegmentationTrainer
+
+
+# Set CUDA device (only needed for multi-gpu machines) 
+os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" 
+os.environ["CUDA_VISIBLE_DEVICES"] = "4" 
+
+# model = YOLOv10()
+# model = YOLO('yolov8n-seg.yaml').load('yolov8n.pt')  # build from YAML and transfer weights
+
+# model = YOLO()
+# If you want to finetune the model with pretrained weights, you could load the 
+# pretrained weights like below
+# model = YOLOv10.from_pretrained('jameslahm/yolov10{n/s/m/b/l/x}')
+# or
+# wget https://github.com/THU-MIG/yolov10/releases/download/v1.1/yolov10{n/s/m/b/l/x}.pt
+model = YOLOv10('yolov10n.pt', task='segment')
+
+args = dict(model='yolov10n.pt', data='coco128-seg.yaml')
+trainer = PGTSegmentationTrainer(overrides=args)
+trainer.train(
+            # debug=True, 
+            #   args = dict(pgt_coeff=0.1),
+              )
+
+# model.train(
+#             # data='coco.yaml', 
+#             data='coco128-seg.yaml', 
+#             trainer=model._smart_load("pgt_trainer"), # This is needed to generate attributions (will be used later to train via PGT)
+#             # Add return_images as input parameter
+#             epochs=500, batch=16, imgsz=640,
+#             debug=True, # If debug = True, the attributions will be saved in the figures folder
+#             # cfg='/home/nielseni6/PythonScripts/yolov10/ultralytics/cfg/models/v8/yolov8-seg.yaml',
+#             # overrides=dict(task="segment"),
+#             )
+
+# Save the trained model
+model.save('yolov10_coco_trained.pt')
+
+# Evaluate the model on the validation set
+results = model.val(data='coco.yaml')
+
+# Print the evaluation results
+print(results)
diff --git a/run_pgt_train.py b/run_pgt_train.py
@@ -0,0 +1,63 @@
+from ultralytics import YOLOv10, YOLO, YOLOv10PGT
+# from ultralytics.engine.pgt_trainer import PGTTrainer
+import os
+from ultralytics.models.yolo.segment import PGTSegmentationTrainer
+import argparse
+from datetime import datetime
+import torch
+
+# nohup python run_pgt_train.py --device 7 > ./output_logs/gpu7_yolov10_pgt_train.log 2>&1 & 
+
+def main(args):
+    model = YOLOv10PGT('yolov10n.pt')
+
+    if args.pgt_coeff is None:
+        model.train(data=args.data_yaml, epochs=args.epochs, batch=args.batch_size)
+    else:
+        model.train(    
+                    data=args.data_yaml, 
+                    epochs=args.epochs, 
+                    batch=args.batch_size,
+                    # amp=False,
+                    pgt_coeff=args.pgt_coeff,
+                    # cfg='pgt_train.yaml',  # Load and train model with the config file
+                    )
+    # If you want to finetune the model with pretrained weights, you could load the 
+    # pretrained weights like below 
+    # model = YOLOv10.from_pretrained('jameslahm/yolov10{n/s/m/b/l/x}')
+    # or
+    # wget https://github.com/THU-MIG/yolov10/releases/download/v1.1/yolov10{n/s/m/b/l/x}.pt
+    # model = YOLOv10('yolov10n.pt', task='segment')
+
+    # Create a directory to save model weights if it doesn't exist
+    model_weights_dir = 'model_weights'
+    if not os.path.exists(model_weights_dir):
+        os.makedirs(model_weights_dir)
+
+    # Save the trained model with a unique name based on the current date and time
+    current_time = datetime.now().strftime('%Y%m%d_%H%M%S')
+    data_yaml_base = os.path.splitext(os.path.basename(args.data_yaml))[0]
+    model_save_path = os.path.join(model_weights_dir, f'yolov10_{data_yaml_base}_trained_{current_time}.pt')
+    model.save(model_save_path)
+    # torch.save(trainer.model.state_dict(), model_save_path)
+
+    # Evaluate the model on the validation set
+    results = model.val(data=args.data_yaml)
+
+    # Print the evaluation results
+    print(results)
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='Train YOLOv10 model with PGT segmentation.')
+    parser.add_argument('--device', type=str, default='0', help='CUDA device number')
+    parser.add_argument('--batch_size', type=int, default=32, help='Batch size for training')
+    parser.add_argument('--epochs', type=int, default=100, help='Number of epochs for training')
+    parser.add_argument('--data_yaml', type=str, default='coco.yaml', help='Path to the data YAML file')
+    parser.add_argument('--pgt_coeff', type=float, default=None, help='Coefficient for PGT')
+    args = parser.parse_args()
+
+    # Set CUDA device (only needed for multi-gpu machines)
+    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
+    os.environ["CUDA_VISIBLE_DEVICES"] = args.device
+    main(args)
+
diff --git a/run_train.py b/run_train.py
@@ -0,0 +1,83 @@
+from ultralytics import YOLOv10, YOLO
+# from ultralytics.engine.pgt_trainer import PGTTrainer
+# from ultralytics import BaseTrainer
+# from ultralytics.engine.trainer import BaseTrainer
+import os
+
+# Set CUDA device (only needed for multi-gpu machines) 
+os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" 
+os.environ["CUDA_VISIBLE_DEVICES"] = "4" 
+
+model = YOLOv10()
+# model = YOLO()
+# If you want to finetune the model with pretrained weights, you could load the 
+# pretrained weights like below
+# model = YOLOv10.from_pretrained('jameslahm/yolov10{n/s/m/b/l/x}')
+# or
+# wget https://github.com/THU-MIG/yolov10/releases/download/v1.1/yolov10{n/s/m/b/l/x}.pt
+# model = YOLOv10('yolov10m.pt')
+
+model.train(data='coco.yaml', 
+            # Add return_images as input parameter
+            epochs=500, batch=16, imgsz=640,
+            )
+
+# Save the trained model
+model.save('yolov10_coco_trained.pt')
+
+# Evaluate the model on the validation set
+results = model.val(data='coco.yaml')
+
+# Print the evaluation results
+print(results)
+
+# import torch
+# from torch.utils.data import DataLoader
+# from torchvision import datasets, transforms
+
+# # Define the transformation for the dataset
+# transform = transforms.Compose([
+#     transforms.Resize((640, 640)),
+#     transforms.ToTensor()
+# ])
+
+# # Load the COCO dataset
+# train_dataset = datasets.CocoDetection(root='data/nielseni6/coco/train2017', annFile='/data/nielseni6/coco/annotations/instances_train2017.json', transform=transform)
+# val_dataset = datasets.CocoDetection(root='data/nielseni6/coco/val2017', annFile='/data/nielseni6/coco/annotations/instances_val2017.json', transform=transform)
+
+# # Create data loaders
+# train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True, num_workers=4)
+# val_loader = DataLoader(val_dataset, batch_size=256, shuffle=False, num_workers=4)
+
+# model = YOLOv10()
+
+# # Define the optimizer
+# optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
+
+# # Training loop
+# for epoch in range(500):
+#     model.train()
+#     for images, targets in train_loader:
+#         images = images.to('cuda')
+#         targets = [{k: v.to('cuda') for k, v in t.items()} for t in targets]
+#         loss = model(images, targets)
+#         loss.backward()
+#         optimizer.step()
+#         optimizer.zero_grad()
+
+#     # Validation loop
+#     model.eval()
+#     with torch.no_grad():
+#         for images, targets in val_loader:
+#             images = images.to('cuda')
+#             targets = [{k: v.to('cuda') for k, v in t.items()} for t in targets]
+#             results = model(images, targets)
+
+# # Save the trained model
+# model.save('yolov10_coco_trained.pt')
+
+# # Evaluate the model on the validation set
+# results = model.val(data='coco.yaml')
+
+# # Print the evaluation results
+# print(results)
diff --git a/run_val.py b/run_val.py
@@ -0,0 +1,32 @@
+from ultralytics import YOLOv10, YOLO, YOLOv10PGT
+# from ultralytics.engine.pgt_trainer import PGTTrainer
+import os
+from ultralytics.models.yolo.segment import PGTSegmentationTrainer
+import argparse
+from datetime import datetime
+
+# nohup python run_pgt_train.py --device 1 > ./output_logs/gpu1_yolov10_pgt_train.log 2>&1 & 
+
+def main(args):
+
+    model = YOLOv10PGT(args.model_path)
+
+    # Evaluate the model on the validation set
+    results = model.val(data=args.data_yaml)
+
+    # Print the evaluation results
+    print(results)
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='Train YOLOv10 model with PGT segmentation.')
+    parser.add_argument('--device', type=str, default='1', help='CUDA device number')
+    parser.add_argument('--batch_size', type=int, default=64, help='Batch size for training')
+    parser.add_argument('--epochs', type=int, default=100, help='Number of epochs for training')
+    parser.add_argument('--data_yaml', type=str, default='coco.yaml', help='Path to the data YAML file')
+    parser.add_argument('--model_path', type=str, default='yolov10n.pt', help='Path to the model file')
+    args = parser.parse_args()
+
+    # Set CUDA device (only needed for multi-gpu machines)
+    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
+    os.environ["CUDA_VISIBLE_DEVICES"] = args.device
+    main(args)
diff --git a/ultralytics/__init__.py b/ultralytics/__init__.py
@@ -3,7 +3,7 @@
 __version__ = "8.1.34"
 
 from ultralytics.data.explorer.explorer import Explorer
-from ultralytics.models import RTDETR, SAM, YOLO, YOLOWorld, YOLOv10
+from ultralytics.models import RTDETR, SAM, YOLO, YOLOWorld, YOLOv10, YOLOv10PGT
 from ultralytics.models.fastsam import FastSAM
 from ultralytics.models.nas import NAS
 from ultralytics.utils import ASSETS, SETTINGS as settings
@@ -23,5 +23,6 @@
     "download",
     "settings",
     "Explorer",
-    "YOLOv10"
+    "YOLOv10",
+    "YOLOv10PGT",
 )
diff --git a/ultralytics/cfg/default.yaml b/ultralytics/cfg/default.yaml
@@ -41,6 +41,7 @@ overlap_mask: True # (bool) masks should overlap during training (segment train
 mask_ratio: 4 # (int) mask downsample ratio (segment train only)
 # Classification
 dropout: 0.0 # (float) use dropout regularization (classify train only)
+pgt_coeff: 2.0 # (float) PGT loss coefficient 
 
 # Val/Test settings ----------------------------------------------------------------------------------------------------
 val: True # (bool) validate/test during training

diff --git a/ultralytics/cfg/pgt_train.yaml b/ultralytics/cfg/pgt_train.yaml
@@ -0,0 +1,128 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# Default training settings and hyperparameters for medium-augmentation COCO training
+
+task: detect # (str) YOLO task, i.e. detect, segment, classify, pose
+mode: train # (str) YOLO mode, i.e. train, val, predict, export, track, benchmark
+
+# Train settings -------------------------------------------------------------------------------------------------------
+model: # (str, optional) path to model file, i.e. yolov8n.pt, yolov8n.yaml
+data: # (str, optional) path to data file, i.e. coco128.yaml
+epochs: 100 # (int) number of epochs to train for
+time: # (float, optional) number of hours to train for, overrides epochs if supplied
+patience: 100 # (int) epochs to wait for no observable improvement for early stopping of training
+batch: 16 # (int) number of images per batch (-1 for AutoBatch)
+imgsz: 640 # (int | list) input images size as int for train and val modes, or list[w,h] for predict and export modes
+save: True # (bool) save train checkpoints and predict results
+save_period: -1 # (int) Save checkpoint every x epochs (disabled if < 1)
+val_period: 1 # (int) Validation every x epochs
+cache: False # (bool) True/ram, disk or False. Use cache for data loading
+device: # (int | str | list, optional) device to run on, i.e. cuda device=0 or device=0,1,2,3 or device=cpu
+workers: 8 # (int) number of worker threads for data loading (per RANK if DDP)
+project: # (str, optional) project name
+name: # (str, optional) experiment name, results saved to 'project/name' directory
+exist_ok: False # (bool) whether to overwrite existing experiment
+pretrained: True # (bool | str) whether to use a pretrained model (bool) or a model to load weights from (str)
+optimizer: auto # (str) optimizer to use, choices=[SGD, Adam, Adamax, AdamW, NAdam, RAdam, RMSProp, auto]
+verbose: True # (bool) whether to print verbose output
+seed: 0 # (int) random seed for reproducibility
+deterministic: True # (bool) whether to enable deterministic mode
+single_cls: False # (bool) train multi-class data as single-class
+rect: False # (bool) rectangular training if mode='train' or rectangular validation if mode='val'
+cos_lr: False # (bool) use cosine learning rate scheduler
+close_mosaic: 10 # (int) disable mosaic augmentation for final epochs (0 to disable)
+resume: False # (bool) resume training from last checkpoint
+amp: True # (bool) Automatic Mixed Precision (AMP) training, choices=[True, False], True runs AMP check
+fraction: 1.0 # (float) dataset fraction to train on (default is 1.0, all images in train set)
+profile: False # (bool) profile ONNX and TensorRT speeds during training for loggers
+freeze: None # (int | list, optional) freeze first n layers, or freeze list of layer indices during training
+multi_scale: False # (bool) Whether to use multiscale during training
+# Segmentation
+overlap_mask: True # (bool) masks should overlap during training (segment train only)
+mask_ratio: 4 # (int) mask downsample ratio (segment train only)
+# Classification
+dropout: 0.0 # (float) use dropout regularization (classify train only)
+pgt_coeff: 1.0 # (float) PGT loss coefficient 
+
+# Val/Test settings ----------------------------------------------------------------------------------------------------
+val: True # (bool) validate/test during training
+split: val # (str) dataset split to use for validation, i.e. 'val', 'test' or 'train'
+save_json: False # (bool) save results to JSON file
+save_hybrid: False # (bool) save hybrid version of labels (labels + additional predictions)
+conf: # (float, optional) object confidence threshold for detection (default 0.25 predict, 0.001 val)
+iou: 0.7 # (float) intersection over union (IoU) threshold for NMS
+max_det: 300 # (int) maximum number of detections per image
+half: False # (bool) use half precision (FP16)
+dnn: False # (bool) use OpenCV DNN for ONNX inference
+plots: True # (bool) save plots and images during train/val
+
+# Predict settings -----------------------------------------------------------------------------------------------------
+source: # (str, optional) source directory for images or videos
+vid_stride: 1 # (int) video frame-rate stride
+stream_buffer: False # (bool) buffer all streaming frames (True) or return the most recent frame (False)
+visualize: False # (bool) visualize model features
+augment: False # (bool) apply image augmentation to prediction sources
+agnostic_nms: False # (bool) class-agnostic NMS
+classes: # (int | list[int], optional) filter results by class, i.e. classes=0, or classes=[0,2,3]
+retina_masks: False # (bool) use high-resolution segmentation masks
+embed: # (list[int], optional) return feature vectors/embeddings from given layers
+
+# Visualize settings ---------------------------------------------------------------------------------------------------
+show: False # (bool) show predicted images and videos if environment allows
+save_frames: False # (bool) save predicted individual video frames
+save_txt: False # (bool) save results as .txt file
+save_conf: False # (bool) save results with confidence scores
+save_crop: False # (bool) save cropped images with results
+show_labels: True # (bool) show prediction labels, i.e. 'person'
+show_conf: True # (bool) show prediction confidence, i.e. '0.99'
+show_boxes: True # (bool) show prediction boxes
+line_width: # (int, optional) line width of the bounding boxes. Scaled to image size if None.
+
+# Export settings ------------------------------------------------------------------------------------------------------
+format: torchscript # (str) format to export to, choices at https://docs.ultralytics.com/modes/export/#export-formats
+keras: False # (bool) use Kera=s
+optimize: False # (bool) TorchScript: optimize for mobile
+int8: False # (bool) CoreML/TF INT8 quantization
+dynamic: False # (bool) ONNX/TF/TensorRT: dynamic axes
+simplify: False # (bool) ONNX: simplify model using `onnxslim`
+opset: # (int, optional) ONNX: opset version
+workspace: 4 # (int) TensorRT: workspace size (GB)
+nms: False # (bool) CoreML: add NMS
+
+# Hyperparameters ------------------------------------------------------------------------------------------------------
+lr0: 0.01 # (float) initial learning rate (i.e. SGD=1E-2, Adam=1E-3)
+lrf: 0.01 # (float) final learning rate (lr0 * lrf)
+momentum: 0.937 # (float) SGD momentum/Adam beta1
+weight_decay: 0.0005 # (float) optimizer weight decay 5e-4
+warmup_epochs: 3.0 # (float) warmup epochs (fractions ok)
+warmup_momentum: 0.8 # (float) warmup initial momentum
+warmup_bias_lr: 0.1 # (float) warmup initial bias lr
+box: 7.5 # (float) box loss gain
+cls: 0.5 # (float) cls loss gain (scale with pixels)
+dfl: 1.5 # (float) dfl loss gain
+pose: 12.0 # (float) pose loss gain
+kobj: 1.0 # (float) keypoint obj loss gain
+label_smoothing: 0.0 # (float) label smoothing (fraction)
+nbs: 64 # (int) nominal batch size
+hsv_h: 0.015 # (float) image HSV-Hue augmentation (fraction)
+hsv_s: 0.7 # (float) image HSV-Saturation augmentation (fraction)
+hsv_v: 0.4 # (float) image HSV-Value augmentation (fraction)
+degrees: 0.0 # (float) image rotation (+/- deg)
+translate: 0.1 # (float) image translation (+/- fraction)
+scale: 0.5 # (float) image scale (+/- gain)
+shear: 0.0 # (float) image shear (+/- deg)
+perspective: 0.0 # (float) image perspective (+/- fraction), range 0-0.001
+flipud: 0.0 # (float) image flip up-down (probability)
+fliplr: 0.5 # (float) image flip left-right (probability)
+bgr: 0.0 # (float) image channel BGR (probability)
+mosaic: 1.0 # (float) image mosaic (probability)
+mixup: 0.0 # (float) image mixup (probability)
+copy_paste: 0.0 # (float) segment copy-paste (probability)
+auto_augment: randaugment # (str) auto augmentation policy for classification (randaugment, autoaugment, augmix)
+erasing: 0.4 # (float) probability of random erasing during classification training (0-1)
+crop_fraction: 1.0 # (float) image crop fraction for classification evaluation/inference (0-1)
+
+# Custom config.yaml ---------------------------------------------------------------------------------------------------
+cfg: # (str, optional) for overriding defaults.yaml
+
+# Tracker settings ------------------------------------------------------------------------------------------------------
+tracker: botsort.yaml # (str) tracker type, choices=[botsort.yaml, bytetrack.yaml]