From 9af66eaf4d937722bfefd8f995e421e25779434f Mon Sep 17 00:00:00 2001 From: lujulia <39236354+lujulia@users.noreply.github.com> Date: Sat, 9 Jul 2022 22:12:48 +0800 Subject: [PATCH 01/59] Add files via upload --- .../__pycache__/nano.cpython-38.pyc | Bin 0 -> 1752 bytes .../yolox_pedestrian/coco_format/nano.py | 48 +++++ .../yolox_pedestrian/coco_format/yolox_s.py | 25 +++ .../__pycache__/yolox_voc_nano.cpython-38.pyc | Bin 0 -> 3782 bytes .../voc_format/yolox_voc_nano.py | 145 +++++++++++++++ .../voc_format/yolox_voc_nano_adam.py | 176 ++++++++++++++++++ .../voc_format/yolox_voc_s.py | 123 ++++++++++++ 7 files changed, 517 insertions(+) create mode 100644 exps/example/yolox_pedestrian/coco_format/__pycache__/nano.cpython-38.pyc create mode 100644 exps/example/yolox_pedestrian/coco_format/nano.py create mode 100644 exps/example/yolox_pedestrian/coco_format/yolox_s.py create mode 100644 exps/example/yolox_pedestrian/voc_format/__pycache__/yolox_voc_nano.cpython-38.pyc create mode 100644 exps/example/yolox_pedestrian/voc_format/yolox_voc_nano.py create mode 100644 exps/example/yolox_pedestrian/voc_format/yolox_voc_nano_adam.py create mode 100644 exps/example/yolox_pedestrian/voc_format/yolox_voc_s.py diff --git a/exps/example/yolox_pedestrian/coco_format/__pycache__/nano.cpython-38.pyc b/exps/example/yolox_pedestrian/coco_format/__pycache__/nano.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c5a2714b6d8e3b14aa9c82ade2dce7a62be5bc18 GIT binary patch literal 1752 zcmZWq&2Jk;6rY*>@UFdXX^ASrM+*|7EC?GA$Ex9@q#TGFRFKL_Xf@uM*z4|BXVx?^ z_9;P?koXr+a?FKu{|to0WlkVY5hrd4ap1jmQk*i@{PxYv?wj}C{C?vvy4^N`Mz4Q2 zeA^}DcO0xX8wT$~*Y5!cB4|dU<&)+tViawbS-BlKl-wr567~TRwsiKb$QJZ237r%8 zk_tdDoA1P80lIvwcOz>p-Of<){Q*`b08cHl*paD^?L0~*M^^=$nwT*qTLSwp~74DBg=ZqA+)yyjZgx?f2lb*)lf#7UWyvn#dYX%X|H zC~ID)Wubc$T^6$|U-K*#|C{R5X&viy zCZixPHBXaRCp?p;qj(|8yje1S4Y4duR%&Bc99`%tOKamP$+KnxHrYut37dFww~7m% zOB2X~4`D7(cc+!{Fuho$3Ot^rV_J{_#&0^sG>?;vYb`Yp$ksC3Ii|*smy>aPO#a^L zkIP*4C(|tD{aa-+&1C`A`tr`FckcCJR}H}PDwF-aGAnoE|H|0MGR8Zl%6Z){cv1GM zJ>^5yyU_LX0EAwk?#k~~>RQd1%>p2E@2rNK2@>+Gp$pIw`3Sl`0BER877TedA@^HK zAf+=<(89{Cg`L|AP!ABjCA^WfaOc3y2{3cw&RyY)_V@OpHOF!5+?+SJ=B>H6kF54! z#XI!p?1)le>cm4Xi_X~yco_`N2?hg3ao@!t-vF4CLvloJk*{7{P>>F-*~Hps$^+?8 z5Df!#9(WDKt->ifY1PbE}gxMr60*UhW%vL zyOEW^PkpoZq?4OY`0*l8Y0#97sx_8Y!0JbBLz{qDsa< zKbh&sZ^ZF1)pGRB+K;z?n;mR~0g6YY35q9ZNY!ih(<+206&6#yiGZ3`9fWm+*8z;f zt18SOD~%bS(kRNIl@@vOfWjIyLHI5%ldA3|> zgre|Y!E{gMn8+-Pe_2z| hZu3^;Z0$jeD}I|y8=81GmeJkByI-T|JO+7L{{j`~!sP$} literal 0 HcmV?d00001 diff --git a/exps/example/yolox_pedestrian/coco_format/nano.py b/exps/example/yolox_pedestrian/coco_format/nano.py new file mode 100644 index 000000000..90cc639ad --- /dev/null +++ b/exps/example/yolox_pedestrian/coco_format/nano.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. + +import os + +import torch.nn as nn + +from yolox.exp import Exp as MyExp + + +class Exp(MyExp): + def __init__(self): + super(Exp, self).__init__() + self.depth = 0.33 + self.width = 0.25 + self.input_size = (416, 416) + self.mosaic_scale = (0.5, 1.5) + self.random_size = (10, 20) + self.test_size = (416, 416) + self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] + self.enable_mixup = False + + # Define yourself dataset path + self.data_dir = "datasets/pedestrian_coco" + self.train_ann = "train_annotations.json" + self.val_ann = "valid_annotations.json" + + self.num_classes = 1 + + def get_model(self, sublinear=False): + + def init_yolo(M): + for m in M.modules(): + if isinstance(m, nn.BatchNorm2d): + m.eps = 1e-3 + m.momentum = 0.03 + if "model" not in self.__dict__: + from yolox.models import YOLOX, YOLOPAFPN, YOLOXHead + in_channels = [256, 512, 1024] + # NANO model use depthwise = True, which is main difference. + backbone = YOLOPAFPN(self.depth, self.width, in_channels=in_channels, depthwise=True) + head = YOLOXHead(self.num_classes, self.width, in_channels=in_channels, depthwise=True) + self.model = YOLOX(backbone, head) + + self.model.apply(init_yolo) + self.model.head.initialize_biases(1e-2) + return self.model diff --git a/exps/example/yolox_pedestrian/coco_format/yolox_s.py b/exps/example/yolox_pedestrian/coco_format/yolox_s.py new file mode 100644 index 000000000..97291a30d --- /dev/null +++ b/exps/example/yolox_pedestrian/coco_format/yolox_s.py @@ -0,0 +1,25 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. +import os + +from yolox.exp import Exp as MyExp + + +class Exp(MyExp): + def __init__(self): + super(Exp, self).__init__() + self.depth = 0.33 + self.width = 0.50 + self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] + + # Define yourself dataset path + self.data_dir = "datasets/pedestrian_coco" + self.train_ann = "train_annotations.json" + self.val_ann = "valid_annotations.json" + + self.num_classes = 1 + + self.max_epoch = 10 + self.data_num_workers = 4 + self.eval_interval = 1 diff --git a/exps/example/yolox_pedestrian/voc_format/__pycache__/yolox_voc_nano.cpython-38.pyc b/exps/example/yolox_pedestrian/voc_format/__pycache__/yolox_voc_nano.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4472ab2e8aabe86e166b380acd0373655427d5e7 GIT binary patch literal 3782 zcmai1ON<;x8SeLM9{aEp;|OB&6oX)w1c87!9ulwDiDa)0c1%1xn%=3}*&cWIjH`R> z-KFQWh8)mxjB#$jMxl;;1)|Ujz0|giU1Lwf22l@9WUc$BCH%wLNWj`<$n7=~Gta#`nDw2HPg-WUzxZ!71?+P*IRu+krf``W*6FRl+# zzStinakRKJ=#EmJ75UsPP8x(HP{M3fE_zE~aKkTaeaB95}f`-5({HV|o4O!}xH zi?YGOa8tP01r#u^XO5lH!Z;hnDIMmsV8K*TpC#>YcEDc zcYOs;ewIm-4|D0IIIgT1r7c71*QB0`CtyM)oB1 z>+xjaLNe$^Nq%wRXo|E|kAi5+l(vF_^y%lu3(GfFs_LsR@rWHDPPABqH)3+fE!b0w zv#`4!Wf@O$umeH&L7em6k5B*R%3rr0wp(?=Ig7i5^SZ**f?6*ZpM;g-QzSl3f`A~1 z#RS%IY%bPQ4(+Qa5at)D=lAu?6i)bk5-d7jc%q-hg>K6d-k!21*lU24xmyj@5Po%nAVY`O*-t1D~gMPw#0@X=AvDaBaJm$ku`VhF|Bb1Bs=@2F>aLgvccvqXb2`~S<}nL&-HP$ zY$9mpS1J%WvVQyer6peQZV?YM*}Ns9ID^P?;tn}`2S`;Yzo}?O$Y2RK#MME>xR5h1 zXKQrQ{IGN8+8~eO?v%b`LJGk@(k@8Cr6)B*WS`je0!Hd4brB(_WYZlF}UtJ`{uQ!3JsbrIRYN#YKFIHiUE;?+K2O z_hE3BCsDz{>Ig^r0mOx@;d01_o4T5wR&x2Vzq;CWJQ>f5JSQ-$^vqSc##HBEQlo%mq{#>pl!hwSON}|7K>qelfn-c zCal_(nea?pp@Fl?>r)$O&8V0cFH>(b&aq#*n^s2!XVS?AVKnOfAKSk}X1qj#(f~O} zOC$<>;oS$3=;gAm5EWL9k8{Ng8LP~y`sqH%I}nQdcOgNsjgRJ>}JiXb$6z8_He79D5}=hBYo|lbLg+1D5c>5`O7}C7KhKFw`l5Dx&0w8pKy%xwP+tf&n< zEkk^r>fa#o9KbOQiohm`2H7~AHgF-PSy~l_ZH|SFV)-RCQ zD98!A+Qlsh(8)k_k#CKPI6?A79+$Ogl+7<4L++5I=5F$PBV5wBpC(P>Hff}=X$8lS z72l;LTw0g$`^V`R;&w9Y?kI!Q5izZm9rIH zk=aT~F8Y`tViF-utOO{@A4!G!znvY(Kt#9)&CexRB}ZPdff|71RqU|d6E7N%gHv%Q9ZmQ H>*jv}g~8Jr literal 0 HcmV?d00001 diff --git a/exps/example/yolox_pedestrian/voc_format/yolox_voc_nano.py b/exps/example/yolox_pedestrian/voc_format/yolox_voc_nano.py new file mode 100644 index 000000000..a3d02e62a --- /dev/null +++ b/exps/example/yolox_pedestrian/voc_format/yolox_voc_nano.py @@ -0,0 +1,145 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. + +from yolox.data import get_yolox_datadir +from yolox.exp import Exp as MyExp +import os +import random +import torch.nn as nn +import torch +import torch.distributed as dist + + +class Exp(MyExp): + def __init__(self): + super(Exp, self).__init__() + self.num_classes = 1 + self.depth = 0.33 + self.width = 0.25 + self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] + self.enable_mixup = False + + def get_model(self, sublinear=False): + + def init_yolo(M): + for m in M.modules(): + if isinstance(m, nn.BatchNorm2d): + m.eps = 1e-3 + m.momentum = 0.03 + if "model" not in self.__dict__: + from yolox.models import YOLOX, YOLOPAFPN, YOLOXHead + in_channels = [256, 512, 1024] + # NANO model use depthwise = True, which is main difference. + backbone = YOLOPAFPN(self.depth, self.width, in_channels=in_channels, depthwise=True) + head = YOLOXHead(self.num_classes, self.width, in_channels=in_channels, depthwise=True) + self.model = YOLOX(backbone, head) + + self.model.apply(init_yolo) + self.model.head.initialize_biases(1e-2) + return self.model + + def get_data_loader(self, batch_size, is_distributed, no_aug=False): + from yolox.data import ( + VOCDetection, + TrainTransform, + YoloBatchSampler, + DataLoader, + InfiniteSampler, + MosaicDetection, + ) + + dataset = VOCDetection( + data_dir=os.path.join(get_yolox_datadir(), "pedestrian_voc"), + image_sets=[('train')], + img_size=self.input_size, + preproc=TrainTransform( + rgb_means=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225), + max_labels=50, + ), + ) + + dataset = MosaicDetection( + dataset, + mosaic=not no_aug, + img_size=self.input_size, + preproc=TrainTransform( + rgb_means=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225), + max_labels=120, + ), + degrees=self.degrees, + translate=self.translate, + scale=self.scale, + shear=self.shear, + perspective=self.perspective, + enable_mixup=self.enable_mixup, + ) + + self.dataset = dataset + + if is_distributed: + batch_size = batch_size // dist.get_world_size() + + sampler = InfiniteSampler( + len(self.dataset), seed=self.seed if self.seed else 0 + ) + + batch_sampler = YoloBatchSampler( + sampler=sampler, + batch_size=batch_size, + drop_last=False, + input_dimension=self.input_size, + mosaic=not no_aug, + ) + + dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True} + dataloader_kwargs["batch_sampler"] = batch_sampler + train_loader = DataLoader(self.dataset, **dataloader_kwargs) + + return train_loader + + def get_eval_loader(self, batch_size, is_distributed, testdev=False): + from yolox.data import VOCDetection, ValTransform + + valdataset = VOCDetection( + data_dir=os.path.join(get_yolox_datadir(), "pedestrian_voc"), + image_sets=[('valid')], + img_size=self.test_size, + preproc=ValTransform( + rgb_means=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225), + ), + ) + + if is_distributed: + batch_size = batch_size // dist.get_world_size() + sampler = torch.utils.data.distributed.DistributedSampler( + valdataset, shuffle=False + ) + else: + sampler = torch.utils.data.SequentialSampler(valdataset) + + dataloader_kwargs = { + "num_workers": self.data_num_workers, + "pin_memory": True, + "sampler": sampler, + } + dataloader_kwargs["batch_size"] = batch_size + val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs) + + return val_loader + + def get_evaluator(self, batch_size, is_distributed, testdev=False): + from yolox.evaluators import VOCEvaluator + + val_loader = self.get_eval_loader(batch_size, is_distributed, testdev=testdev) + evaluator = VOCEvaluator( + dataloader=val_loader, + img_size=self.test_size, + confthre=self.test_conf, + nmsthre=self.nmsthre, + num_classes=self.num_classes, + ) + return evaluator diff --git a/exps/example/yolox_pedestrian/voc_format/yolox_voc_nano_adam.py b/exps/example/yolox_pedestrian/voc_format/yolox_voc_nano_adam.py new file mode 100644 index 000000000..8e05e67ff --- /dev/null +++ b/exps/example/yolox_pedestrian/voc_format/yolox_voc_nano_adam.py @@ -0,0 +1,176 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. + +import os +import random +import torch.nn as nn +import torch +import torch.distributed as dist +from yolox.exp import Exp as MyExp +from yolox.data import get_yolox_datadir + + +class Exp(MyExp): + def __init__(self): + super(Exp, self).__init__() + self.num_classes = 1 + self.depth = 0.33 + self.width = 0.25 + self.scale = (0.5, 1.5) + self.random_size = (10, 20) + self.eps = 1e-8 + self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] + self.enable_mixup = False + + def get_model(self, sublinear=False): + + def init_yolo(M): + for m in M.modules(): + if isinstance(m, nn.BatchNorm2d): + m.eps = 1e-3 + m.momentum = 0.03 + if "model" not in self.__dict__: + from yolox.models import YOLOX, YOLOPAFPN, YOLOXHead + in_channels = [256, 512, 1024] + # NANO model use depthwise = True, which is main difference. + backbone = YOLOPAFPN(self.depth, self.width, in_channels=in_channels, depthwise=True) + head = YOLOXHead(self.num_classes, self.width, in_channels=in_channels, depthwise=True) + self.model = YOLOX(backbone, head) + + self.model.apply(init_yolo) + self.model.head.initialize_biases(1e-2) + return self.model + + def get_optimizer(self, batch_size): + if "optimizer" not in self.__dict__: + if self.warmup_epochs > 0: + lr = self.warmup_lr + else: + lr = self.basic_lr_per_img * batch_size + + pg0, pg1, pg2 = [], [], [] # optimizer parameter groups + + for k, v in self.model.named_modules(): + if hasattr(v, "bias") and isinstance(v.bias, nn.Parameter): + pg2.append(v.bias) # biases + if isinstance(v, nn.BatchNorm2d) or "bn" in k: + pg0.append(v.weight) # no decay + elif hasattr(v, "weight") and isinstance(v.weight, nn.Parameter): + pg1.append(v.weight) # apply decay + + optimizer = torch.optim.Adam( + pg0, lr=lr, eps=self.eps, amsgrad=False + ) + optimizer.add_param_group( + {"params": pg1, "weight_decay": self.weight_decay} + ) # add pg1 with weight_decay + optimizer.add_param_group({"params": pg2}) + self.optimizer = optimizer + + return self.optimizer + + def get_data_loader(self, batch_size, is_distributed, no_aug=False): + from yolox.data import ( + VOCDetection, + TrainTransform, + YoloBatchSampler, + DataLoader, + InfiniteSampler, + MosaicDetection, + ) + + dataset = VOCDetection( + data_dir=os.path.join(get_yolox_datadir(), "pedestrian_voc"), + image_sets=[('train')], + img_size=self.input_size, + preproc=TrainTransform( + rgb_means=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225), + max_labels=50, + ), + ) + + dataset = MosaicDetection( + dataset, + mosaic=not no_aug, + img_size=self.input_size, + preproc=TrainTransform( + rgb_means=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225), + max_labels=120, + ), + degrees=self.degrees, + translate=self.translate, + scale=self.scale, + shear=self.shear, + perspective=self.perspective, + enable_mixup=self.enable_mixup, + ) + + self.dataset = dataset + + if is_distributed: + batch_size = batch_size // dist.get_world_size() + + sampler = InfiniteSampler( + len(self.dataset), seed=self.seed if self.seed else 0 + ) + + batch_sampler = YoloBatchSampler( + sampler=sampler, + batch_size=batch_size, + drop_last=False, + input_dimension=self.input_size, + mosaic=not no_aug, + ) + + dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True} + dataloader_kwargs["batch_sampler"] = batch_sampler + train_loader = DataLoader(self.dataset, **dataloader_kwargs) + + return train_loader + + def get_eval_loader(self, batch_size, is_distributed, testdev=False): + from yolox.data import VOCDetection, ValTransform + + valdataset = VOCDetection( + data_dir=os.path.join(get_yolox_datadir(), "pedestrian_voc"), + image_sets=[('valid')], + img_size=self.test_size, + preproc=ValTransform( + rgb_means=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225), + ), + ) + + if is_distributed: + batch_size = batch_size // dist.get_world_size() + sampler = torch.utils.data.distributed.DistributedSampler( + valdataset, shuffle=False + ) + else: + sampler = torch.utils.data.SequentialSampler(valdataset) + + dataloader_kwargs = { + "num_workers": self.data_num_workers, + "pin_memory": True, + "sampler": sampler, + } + dataloader_kwargs["batch_size"] = batch_size + val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs) + + return val_loader + + def get_evaluator(self, batch_size, is_distributed, testdev=False): + from yolox.evaluators import VOCEvaluator + + val_loader = self.get_eval_loader(batch_size, is_distributed, testdev=testdev) + evaluator = VOCEvaluator( + dataloader=val_loader, + img_size=self.test_size, + confthre=self.test_conf, + nmsthre=self.nmsthre, + num_classes=self.num_classes, + ) + return evaluator diff --git a/exps/example/yolox_pedestrian/voc_format/yolox_voc_s.py b/exps/example/yolox_pedestrian/voc_format/yolox_voc_s.py new file mode 100644 index 000000000..fa27310ab --- /dev/null +++ b/exps/example/yolox_pedestrian/voc_format/yolox_voc_s.py @@ -0,0 +1,123 @@ +# encoding: utf-8 +import os +import random +import torch +import torch.nn as nn +import torch.distributed as dist + +from yolox.exp import Exp as MyExp +from yolox.data import get_yolox_datadir + + +class Exp(MyExp): + def __init__(self): + super(Exp, self).__init__() + self.num_classes = 1 + self.depth = 0.33 + self.width = 0.50 + self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] + + def get_data_loader(self, batch_size, is_distributed, no_aug=False): + from yolox.data import ( + VOCDetection, + TrainTransform, + YoloBatchSampler, + DataLoader, + InfiniteSampler, + MosaicDetection, + ) + + dataset = VOCDetection( + data_dir=os.path.join(get_yolox_datadir(), "pedestrian_voc"), + image_sets=[('train')], + img_size=self.input_size, + preproc=TrainTransform( + rgb_means=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225), + max_labels=50, + ), + ) + + dataset = MosaicDetection( + dataset, + mosaic=not no_aug, + img_size=self.input_size, + preproc=TrainTransform( + rgb_means=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225), + max_labels=120, + ), + degrees=self.degrees, + translate=self.translate, + scale=self.scale, + shear=self.shear, + perspective=self.perspective, + enable_mixup=self.enable_mixup, + ) + + self.dataset = dataset + + if is_distributed: + batch_size = batch_size // dist.get_world_size() + + sampler = InfiniteSampler( + len(self.dataset), seed=self.seed if self.seed else 0 + ) + + batch_sampler = YoloBatchSampler( + sampler=sampler, + batch_size=batch_size, + drop_last=False, + input_dimension=self.input_size, + mosaic=not no_aug, + ) + + dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True} + dataloader_kwargs["batch_sampler"] = batch_sampler + train_loader = DataLoader(self.dataset, **dataloader_kwargs) + + return train_loader + + def get_eval_loader(self, batch_size, is_distributed, testdev=False): + from yolox.data import VOCDetection, ValTransform + + valdataset = VOCDetection( + data_dir=os.path.join(get_yolox_datadir(), "pedestrian_voc"), + image_sets=[('valid')], + img_size=self.test_size, + preproc=ValTransform( + rgb_means=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225), + ), + ) + + if is_distributed: + batch_size = batch_size // dist.get_world_size() + sampler = torch.utils.data.distributed.DistributedSampler( + valdataset, shuffle=False + ) + else: + sampler = torch.utils.data.SequentialSampler(valdataset) + + dataloader_kwargs = { + "num_workers": self.data_num_workers, + "pin_memory": True, + "sampler": sampler, + } + dataloader_kwargs["batch_size"] = batch_size + val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs) + + return val_loader + + def get_evaluator(self, batch_size, is_distributed, testdev=False): + from yolox.evaluators import VOCEvaluator + + val_loader = self.get_eval_loader(batch_size, is_distributed, testdev=testdev) + evaluator = VOCEvaluator( + dataloader=val_loader, + img_size=self.test_size, + confthre=self.test_conf, + nmsthre=self.nmsthre, + num_classes=self.num_classes, + ) + return evaluator From 6b262d246ca94cd81c3167c9b38cdc70e0617b08 Mon Sep 17 00:00:00 2001 From: lujulia <39236354+lujulia@users.noreply.github.com> Date: Sat, 9 Jul 2022 22:15:07 +0800 Subject: [PATCH 02/59] Add files via upload --- .../__pycache__/yolox_voc_nano.cpython-38.pyc | Bin 0 -> 3838 bytes exps/example/yolox_voc_nano/yolox_voc_nano.py | 147 +++++++++++++++ .../yolox_voc_nano_adam.py | 178 ++++++++++++++++++ exps/example/yolox_voc_tiny/yolox_voc_tiny.py | 146 ++++++++++++++ 4 files changed, 471 insertions(+) create mode 100644 exps/example/yolox_voc_nano/__pycache__/yolox_voc_nano.cpython-38.pyc create mode 100644 exps/example/yolox_voc_nano/yolox_voc_nano.py create mode 100644 exps/example/yolox_voc_nano_adam/yolox_voc_nano_adam.py create mode 100644 exps/example/yolox_voc_tiny/yolox_voc_tiny.py diff --git a/exps/example/yolox_voc_nano/__pycache__/yolox_voc_nano.cpython-38.pyc b/exps/example/yolox_voc_nano/__pycache__/yolox_voc_nano.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0b3dc19292760e1ae41ca89f792124e0826856ce GIT binary patch literal 3838 zcmai1O^h5z74HA(?)lqYI{^oX%}g{Xwy078sJQH>QIj3oVqsrmOLgf-lHiBRp(_^huzV(i->PBLAOucNgI+kj) z6Q@&|wz(aby8~~>XU49s+RV6`)H-#gTX7?4cAC0&Rbw`D9%#(r^)0hgWBN_4Re6L} zwdGBB@wtUN*I&7QyQNF>%K8Ye**?#MjbS`o4_KIoED}8$d-yS@m243T*T~+E@s_rt zGo2X^+>Xg=%wqNf9ah(w!z$2h)?hC4pgF9`d{%{4c}ugjjthHcgn_Ag`k_@ww^3-Z zY#Acga=p;Dj2&ZF!xo_jCNm%{We9}}BQBJ2(hY(rjq)In)pVQ$y*SJ= zo=KbWQNAke`w_#-7-rHMh18IZ;wYD{;9;y9@cl{@bA0de^-++9370jVhF!EJ(fXJU z-^zHr@=%vvP|hC&54C@{7gmP}Ul@$zC@j6TfC(~)Fd4;sp>)>0VGnbr!(-L?(T1pC zH?t^N4MNi!x}$zy(>H7I`TX9#mU`rby~l&%pj<}D9zZDPY7>J_d!VgV1S2QMdSXn> z#GF`(HL>9rxSP3sbK(>h#@Q%N;V_T++t#E~P`z@b%`MELQn*{>Nf$ltj#nr^lRofw zs;o-zrW(9fTRs93GMFOiCK>q+h(g=ZcC|(AjW14gz(voEfw^S}7jV%56a&(}>nZ@4 z%l%It{_79dx;NVW_n#&dyxVR$vXTthIEG{WD2vi857Qo(Mw-g%_rtumx;zxgc_vLh z%A}jXFKIqbT87lGNj(uyz=Tpf2~BzmFyx;vkby3Qrq}Ru^vyGL^b~7YkH_;D;$bh0 zvlr&~KasZTQCMu5(pGUQJ$l`Eap}f#sos8xhwKpoLW?w$;tPw{&n*~yHz7Vv#3Xat|1OrXn6vhr**t++>N7@heDjDZ;#FV3{B;B!`@nVm~zpg zA;(W}|AbDVH#-TT8LsW?Cw1RA30X600Q6@4$Z7Tn6MIh+3ISgM`C}*qHC=HY*$_&aZt8M!eAKz%fTw#c}fzT5>CwH}{w25B0`4bp3w*_h&JSpBInHsRR zzHLmZg;P`uozZMnT9Gxk?Fp^%IX1&)-!UfjqE^(|+yxEcgjrVfqW%kg(kL1Tn-j|= zj2u~e{rcrap7UNF4O7{;CBi6$NV63HTh818Ru#@~DsBE^V=y)Z6Y&Yc(Eg%2K@eg99t9$oZ5YWIJ4 z@3+5tYT+;K{x2{8JpTLRKey5Or4D=kqw62G`x}qGe$M^P2kricAE>AO=sZ40Jb0qt z4H6E^rJ3bS`boGR#9O^8;Fvy&x4HT2-qm;2N`fGD2V%U4MPP2K^ zNz_inB~-X#PKafn44aD~JYiK`@DB0<~2 zQD6xiptM*7(;F0%urOxjuFROH;$<2*qfY(M23k!;1mcI(+lVskH_JuT9p{`$Cmjai zxc`4_zeQ&JfW!@&WR{kQ6&{1N`=RJ(vZfFgl#Ry;q)Emqv&#N&Vv?)_q3LHlL*0dz zZj`@ItxudP=@x$bZ8p`WzmJ_+$FM*;hVE*O&m3zyiuxZrZcTUfhOv2KkBknjdxVmR zO^HkuCA|&hC?zQhG6&WUa+*B~>Jus8{-k7%RkjW0;y!giA#Z|0KvpG%a&J9tSe=kV;aP{sGk=I2O%p2S6nB?Lz$TOF^g#Qavv6qM14>m-zK z2_UUG@hUYaEWb(GK5oU2sH$M=(LxG%;x%e~9YWgqQ1p;(KLD<{LrYX>U2xlhdC~`~(NYMFau+|Qfzqp=w8(5VNhV@^Al68P^l9l|`d$pFk>aFPIpz(u zaQWpxeY8K{@%XP({O|Yv41>!g0x>+JVtg9ekDbcq>|WgNk8kO~?cSe9855=L$e8?q z@;2m&lsyH(-EkO~Ju2B2#4_Ry5^7@^*;t?E^gSNOL2$^<6Eu$UMfHEi43ta}gkE#~ z6aGd2v}(uyNE8mz!EMBb3GP*b{Ct`U%8W#p`ew_vL% 0: + lr = self.warmup_lr + else: + lr = self.basic_lr_per_img * batch_size + + pg0, pg1, pg2 = [], [], [] # optimizer parameter groups + + for k, v in self.model.named_modules(): + if hasattr(v, "bias") and isinstance(v.bias, nn.Parameter): + pg2.append(v.bias) # biases + if isinstance(v, nn.BatchNorm2d) or "bn" in k: + pg0.append(v.weight) # no decay + elif hasattr(v, "weight") and isinstance(v.weight, nn.Parameter): + pg1.append(v.weight) # apply decay + + optimizer = torch.optim.Adam( + pg0, lr=lr, eps=self.eps, amsgrad=False + ) + optimizer.add_param_group( + {"params": pg1, "weight_decay": self.weight_decay} + ) # add pg1 with weight_decay + optimizer.add_param_group({"params": pg2}) + self.optimizer = optimizer + + return self.optimizer + + def get_data_loader(self, batch_size, is_distributed, no_aug=False): + from yolox.data import ( + VOCDetection, + TrainTransform, + YoloBatchSampler, + DataLoader, + InfiniteSampler, + MosaicDetection, + ) + + dataset = VOCDetection( + data_dir=os.path.join(get_yolox_datadir(), "VOCdevkit"), + image_sets=[('2007', 'trainval'), ('2012', 'trainval')], + img_size=self.input_size, + preproc=TrainTransform( + rgb_means=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225), + max_labels=50, + ), + ) + + dataset = MosaicDetection( + dataset, + mosaic=not no_aug, + img_size=self.input_size, + preproc=TrainTransform( + rgb_means=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225), + max_labels=120, + ), + degrees=self.degrees, + translate=self.translate, + scale=self.scale, + shear=self.shear, + perspective=self.perspective, + enable_mixup=self.enable_mixup, + ) + + self.dataset = dataset + + if is_distributed: + batch_size = batch_size // dist.get_world_size() + + sampler = InfiniteSampler( + len(self.dataset), seed=self.seed if self.seed else 0 + ) + + batch_sampler = YoloBatchSampler( + sampler=sampler, + batch_size=batch_size, + drop_last=False, + input_dimension=self.input_size, + mosaic=not no_aug, + ) + + dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True} + dataloader_kwargs["batch_sampler"] = batch_sampler + train_loader = DataLoader(self.dataset, **dataloader_kwargs) + + return train_loader + + def get_eval_loader(self, batch_size, is_distributed, testdev=False): + from yolox.data import VOCDetection, ValTransform + + valdataset = VOCDetection( + data_dir=os.path.join(get_yolox_datadir(), "VOCdevkit"), + image_sets=[('2007', 'test')], + img_size=self.test_size, + preproc=ValTransform( + rgb_means=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225), + ), + ) + + if is_distributed: + batch_size = batch_size // dist.get_world_size() + sampler = torch.utils.data.distributed.DistributedSampler( + valdataset, shuffle=False + ) + else: + sampler = torch.utils.data.SequentialSampler(valdataset) + + dataloader_kwargs = { + "num_workers": self.data_num_workers, + "pin_memory": True, + "sampler": sampler, + } + dataloader_kwargs["batch_size"] = batch_size + val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs) + + return val_loader + + def get_evaluator(self, batch_size, is_distributed, testdev=False): + from yolox.evaluators import VOCEvaluator + + val_loader = self.get_eval_loader(batch_size, is_distributed, testdev=testdev) + evaluator = VOCEvaluator( + dataloader=val_loader, + img_size=self.test_size, + confthre=self.test_conf, + nmsthre=self.nmsthre, + num_classes=self.num_classes, + ) + return evaluator diff --git a/exps/example/yolox_voc_tiny/yolox_voc_tiny.py b/exps/example/yolox_voc_tiny/yolox_voc_tiny.py new file mode 100644 index 000000000..499b2a59a --- /dev/null +++ b/exps/example/yolox_voc_tiny/yolox_voc_tiny.py @@ -0,0 +1,146 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. + +import os +import random +import torch.nn as nn +import torch +import torch.distributed as dist +import sys +sys.path.append(r'D:/YOLOX') +from yolox.exp import Exp as MyExp +from yolox.data import get_yolox_datadir + + +class Exp(MyExp): + def __init__(self): + super(Exp, self).__init__() + self.depth = 0.33 + self.width = 0.375 + self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] + + def get_model(self, sublinear=False): + + def init_yolo(M): + for m in M.modules(): + if isinstance(m, nn.BatchNorm2d): + m.eps = 1e-3 + m.momentum = 0.03 + + if "model" not in self.__dict__: + from yolox.models import YOLOX, YOLOPAFPN, YOLOXHead + in_channels = [256, 512, 1024] + # NANO model use depthwise = True, which is main difference. + backbone = YOLOPAFPN(self.depth, self.width, in_channels=in_channels, depthwise=True) + head = YOLOXHead(self.num_classes, self.width, in_channels=in_channels, depthwise=True) + self.model = YOLOX(backbone, head) + + self.model.apply(init_yolo) + self.model.head.initialize_biases(1e-2) + return self.model + + def get_data_loader(self, batch_size, is_distributed, no_aug=False): + from yolox.data import ( + VOCDetection, + TrainTransform, + YoloBatchSampler, + DataLoader, + InfiniteSampler, + MosaicDetection, + ) + + dataset = VOCDetection( + data_dir=os.path.join(get_yolox_datadir(), "VOCdevkit"), + image_sets=[('2007', 'trainval'), ('2012', 'trainval')], + img_size=self.input_size, + preproc=TrainTransform( + rgb_means=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225), + max_labels=50, + ), + ) + + dataset = MosaicDetection( + dataset, + mosaic=not no_aug, + img_size=self.input_size, + preproc=TrainTransform( + rgb_means=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225), + max_labels=120, + ), + degrees=self.degrees, + translate=self.translate, + scale=self.scale, + shear=self.shear, + perspective=self.perspective, + enable_mixup=self.enable_mixup, + ) + + self.dataset = dataset + + if is_distributed: + batch_size = batch_size // dist.get_world_size() + + sampler = InfiniteSampler( + len(self.dataset), seed=self.seed if self.seed else 0 + ) + + batch_sampler = YoloBatchSampler( + sampler=sampler, + batch_size=batch_size, + drop_last=False, + input_dimension=self.input_size, + mosaic=not no_aug, + ) + + dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True} + dataloader_kwargs["batch_sampler"] = batch_sampler + train_loader = DataLoader(self.dataset, **dataloader_kwargs) + + return train_loader + + def get_eval_loader(self, batch_size, is_distributed, testdev=False): + from yolox.data import VOCDetection, ValTransform + + valdataset = VOCDetection( + data_dir=os.path.join(get_yolox_datadir(), "VOCdevkit"), + image_sets=[('2007', 'test')], + img_size=self.test_size, + preproc=ValTransform( + rgb_means=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225), + ), + ) + + if is_distributed: + batch_size = batch_size // dist.get_world_size() + sampler = torch.utils.data.distributed.DistributedSampler( + valdataset, shuffle=False + ) + else: + sampler = torch.utils.data.SequentialSampler(valdataset) + + dataloader_kwargs = { + "num_workers": self.data_num_workers, + "pin_memory": True, + "sampler": sampler, + } + dataloader_kwargs["batch_size"] = batch_size + val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs) + + return val_loader + + def get_evaluator(self, batch_size, is_distributed, testdev=False): + from yolox.evaluators import VOCEvaluator + + val_loader = self.get_eval_loader(batch_size, is_distributed, testdev=testdev) + evaluator = VOCEvaluator( + dataloader=val_loader, + img_size=self.test_size, + confthre=self.test_conf, + nmsthre=self.nmsthre, + num_classes=self.num_classes, + ) + return evaluator From 812f20e47e279a02806e360c48183d92a0451ef1 Mon Sep 17 00:00:00 2001 From: lujulia <39236354+lujulia@users.noreply.github.com> Date: Sat, 9 Jul 2022 22:17:03 +0800 Subject: [PATCH 03/59] Rename nano.py to yolox_nano.py --- exps/example/custom/{nano.py => yolox_nano.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename exps/example/custom/{nano.py => yolox_nano.py} (100%) diff --git a/exps/example/custom/nano.py b/exps/example/custom/yolox_nano.py similarity index 100% rename from exps/example/custom/nano.py rename to exps/example/custom/yolox_nano.py From 9950ca2d88ed246a3773b5d0af8f1ae3a42e2011 Mon Sep 17 00:00:00 2001 From: lujulia <39236354+lujulia@users.noreply.github.com> Date: Sat, 9 Jul 2022 22:19:37 +0800 Subject: [PATCH 04/59] Rename nano.py to yolox_nano.py --- .../yolox_pedestrian/coco_format/{nano.py => yolox_nano.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename exps/example/yolox_pedestrian/coco_format/{nano.py => yolox_nano.py} (100%) diff --git a/exps/example/yolox_pedestrian/coco_format/nano.py b/exps/example/yolox_pedestrian/coco_format/yolox_nano.py similarity index 100% rename from exps/example/yolox_pedestrian/coco_format/nano.py rename to exps/example/yolox_pedestrian/coco_format/yolox_nano.py From d09ad1d9f0376fab246332456494703ca0a82672 Mon Sep 17 00:00:00 2001 From: lujulia <39236354+lujulia@users.noreply.github.com> Date: Sat, 9 Jul 2022 22:21:17 +0800 Subject: [PATCH 05/59] Delete exps/example/yolox_voc directory --- exps/example/yolox_voc/yolox_voc_s.py | 138 -------------------------- 1 file changed, 138 deletions(-) delete mode 100644 exps/example/yolox_voc/yolox_voc_s.py diff --git a/exps/example/yolox_voc/yolox_voc_s.py b/exps/example/yolox_voc/yolox_voc_s.py deleted file mode 100644 index e5cdb6103..000000000 --- a/exps/example/yolox_voc/yolox_voc_s.py +++ /dev/null @@ -1,138 +0,0 @@ -# encoding: utf-8 -import os - -import torch -import torch.distributed as dist - -from yolox.data import get_yolox_datadir -from yolox.exp import Exp as MyExp - - -class Exp(MyExp): - def __init__(self): - super(Exp, self).__init__() - self.num_classes = 20 - self.depth = 0.33 - self.width = 0.50 - self.warmup_epochs = 1 - - # ---------- transform config ------------ # - self.mosaic_prob = 1.0 - self.mixup_prob = 1.0 - self.hsv_prob = 1.0 - self.flip_prob = 0.5 - - self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] - - def get_data_loader(self, batch_size, is_distributed, no_aug=False, cache_img=False): - from yolox.data import ( - VOCDetection, - TrainTransform, - YoloBatchSampler, - DataLoader, - InfiniteSampler, - MosaicDetection, - worker_init_reset_seed, - ) - from yolox.utils import ( - wait_for_the_master, - get_local_rank, - ) - local_rank = get_local_rank() - - with wait_for_the_master(local_rank): - dataset = VOCDetection( - data_dir=os.path.join(get_yolox_datadir(), "VOCdevkit"), - image_sets=[('2007', 'trainval'), ('2012', 'trainval')], - img_size=self.input_size, - preproc=TrainTransform( - max_labels=50, - flip_prob=self.flip_prob, - hsv_prob=self.hsv_prob), - cache=cache_img, - ) - - dataset = MosaicDetection( - dataset, - mosaic=not no_aug, - img_size=self.input_size, - preproc=TrainTransform( - max_labels=120, - flip_prob=self.flip_prob, - hsv_prob=self.hsv_prob), - degrees=self.degrees, - translate=self.translate, - mosaic_scale=self.mosaic_scale, - mixup_scale=self.mixup_scale, - shear=self.shear, - enable_mixup=self.enable_mixup, - mosaic_prob=self.mosaic_prob, - mixup_prob=self.mixup_prob, - ) - - self.dataset = dataset - - if is_distributed: - batch_size = batch_size // dist.get_world_size() - - sampler = InfiniteSampler( - len(self.dataset), seed=self.seed if self.seed else 0 - ) - - batch_sampler = YoloBatchSampler( - sampler=sampler, - batch_size=batch_size, - drop_last=False, - mosaic=not no_aug, - ) - - dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True} - dataloader_kwargs["batch_sampler"] = batch_sampler - - # Make sure each process has different random seed, especially for 'fork' method - dataloader_kwargs["worker_init_fn"] = worker_init_reset_seed - - train_loader = DataLoader(self.dataset, **dataloader_kwargs) - - return train_loader - - def get_eval_loader(self, batch_size, is_distributed, testdev=False, legacy=False): - from yolox.data import VOCDetection, ValTransform - - valdataset = VOCDetection( - data_dir=os.path.join(get_yolox_datadir(), "VOCdevkit"), - image_sets=[('2007', 'test')], - img_size=self.test_size, - preproc=ValTransform(legacy=legacy), - ) - - if is_distributed: - batch_size = batch_size // dist.get_world_size() - sampler = torch.utils.data.distributed.DistributedSampler( - valdataset, shuffle=False - ) - else: - sampler = torch.utils.data.SequentialSampler(valdataset) - - dataloader_kwargs = { - "num_workers": self.data_num_workers, - "pin_memory": True, - "sampler": sampler, - } - dataloader_kwargs["batch_size"] = batch_size - val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs) - - return val_loader - - def get_evaluator(self, batch_size, is_distributed, testdev=False, legacy=False): - from yolox.evaluators import VOCEvaluator - - val_loader = self.get_eval_loader(batch_size, is_distributed, testdev, legacy) - evaluator = VOCEvaluator( - dataloader=val_loader, - img_size=self.test_size, - confthre=self.test_conf, - nmsthre=self.nmsthre, - num_classes=self.num_classes, - ) - return evaluator From 66a3ead98275506f26d97f5f1612c542cee20a4b Mon Sep 17 00:00:00 2001 From: lujulia <39236354+lujulia@users.noreply.github.com> Date: Sat, 9 Jul 2022 22:21:30 +0800 Subject: [PATCH 06/59] Delete exps/example/yolox_voc_nano directory --- .../__pycache__/yolox_voc_nano.cpython-38.pyc | Bin 3838 -> 0 bytes exps/example/yolox_voc_nano/yolox_voc_nano.py | 147 ------------------ 2 files changed, 147 deletions(-) delete mode 100644 exps/example/yolox_voc_nano/__pycache__/yolox_voc_nano.cpython-38.pyc delete mode 100644 exps/example/yolox_voc_nano/yolox_voc_nano.py diff --git a/exps/example/yolox_voc_nano/__pycache__/yolox_voc_nano.cpython-38.pyc b/exps/example/yolox_voc_nano/__pycache__/yolox_voc_nano.cpython-38.pyc deleted file mode 100644 index 0b3dc19292760e1ae41ca89f792124e0826856ce..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3838 zcmai1O^h5z74HA(?)lqYI{^oX%}g{Xwy078sJQH>QIj3oVqsrmOLgf-lHiBRp(_^huzV(i->PBLAOucNgI+kj) z6Q@&|wz(aby8~~>XU49s+RV6`)H-#gTX7?4cAC0&Rbw`D9%#(r^)0hgWBN_4Re6L} zwdGBB@wtUN*I&7QyQNF>%K8Ye**?#MjbS`o4_KIoED}8$d-yS@m243T*T~+E@s_rt zGo2X^+>Xg=%wqNf9ah(w!z$2h)?hC4pgF9`d{%{4c}ugjjthHcgn_Ag`k_@ww^3-Z zY#Acga=p;Dj2&ZF!xo_jCNm%{We9}}BQBJ2(hY(rjq)In)pVQ$y*SJ= zo=KbWQNAke`w_#-7-rHMh18IZ;wYD{;9;y9@cl{@bA0de^-++9370jVhF!EJ(fXJU z-^zHr@=%vvP|hC&54C@{7gmP}Ul@$zC@j6TfC(~)Fd4;sp>)>0VGnbr!(-L?(T1pC zH?t^N4MNi!x}$zy(>H7I`TX9#mU`rby~l&%pj<}D9zZDPY7>J_d!VgV1S2QMdSXn> z#GF`(HL>9rxSP3sbK(>h#@Q%N;V_T++t#E~P`z@b%`MELQn*{>Nf$ltj#nr^lRofw zs;o-zrW(9fTRs93GMFOiCK>q+h(g=ZcC|(AjW14gz(voEfw^S}7jV%56a&(}>nZ@4 z%l%It{_79dx;NVW_n#&dyxVR$vXTthIEG{WD2vi857Qo(Mw-g%_rtumx;zxgc_vLh z%A}jXFKIqbT87lGNj(uyz=Tpf2~BzmFyx;vkby3Qrq}Ru^vyGL^b~7YkH_;D;$bh0 zvlr&~KasZTQCMu5(pGUQJ$l`Eap}f#sos8xhwKpoLW?w$;tPw{&n*~yHz7Vv#3Xat|1OrXn6vhr**t++>N7@heDjDZ;#FV3{B;B!`@nVm~zpg zA;(W}|AbDVH#-TT8LsW?Cw1RA30X600Q6@4$Z7Tn6MIh+3ISgM`C}*qHC=HY*$_&aZt8M!eAKz%fTw#c}fzT5>CwH}{w25B0`4bp3w*_h&JSpBInHsRR zzHLmZg;P`uozZMnT9Gxk?Fp^%IX1&)-!UfjqE^(|+yxEcgjrVfqW%kg(kL1Tn-j|= zj2u~e{rcrap7UNF4O7{;CBi6$NV63HTh818Ru#@~DsBE^V=y)Z6Y&Yc(Eg%2K@eg99t9$oZ5YWIJ4 z@3+5tYT+;K{x2{8JpTLRKey5Or4D=kqw62G`x}qGe$M^P2kricAE>AO=sZ40Jb0qt z4H6E^rJ3bS`boGR#9O^8;Fvy&x4HT2-qm;2N`fGD2V%U4MPP2K^ zNz_inB~-X#PKafn44aD~JYiK`@DB0<~2 zQD6xiptM*7(;F0%urOxjuFROH;$<2*qfY(M23k!;1mcI(+lVskH_JuT9p{`$Cmjai zxc`4_zeQ&JfW!@&WR{kQ6&{1N`=RJ(vZfFgl#Ry;q)Emqv&#N&Vv?)_q3LHlL*0dz zZj`@ItxudP=@x$bZ8p`WzmJ_+$FM*;hVE*O&m3zyiuxZrZcTUfhOv2KkBknjdxVmR zO^HkuCA|&hC?zQhG6&WUa+*B~>Jus8{-k7%RkjW0;y!giA#Z|0KvpG%a&J9tSe=kV;aP{sGk=I2O%p2S6nB?Lz$TOF^g#Qavv6qM14>m-zK z2_UUG@hUYaEWb(GK5oU2sH$M=(LxG%;x%e~9YWgqQ1p;(KLD<{LrYX>U2xlhdC~`~(NYMFau+|Qfzqp=w8(5VNhV@^Al68P^l9l|`d$pFk>aFPIpz(u zaQWpxeY8K{@%XP({O|Yv41>!g0x>+JVtg9ekDbcq>|WgNk8kO~?cSe9855=L$e8?q z@;2m&lsyH(-EkO~Ju2B2#4_Ry5^7@^*;t?E^gSNOL2$^<6Eu$UMfHEi43ta}gkE#~ z6aGd2v}(uyNE8mz!EMBb3GP*b{Ct`U%8W#p`ew_vL% Date: Sat, 9 Jul 2022 22:21:40 +0800 Subject: [PATCH 07/59] Delete exps/example/yolox_voc_nano_adam directory --- .../yolox_voc_nano_adam.py | 178 ------------------ 1 file changed, 178 deletions(-) delete mode 100644 exps/example/yolox_voc_nano_adam/yolox_voc_nano_adam.py diff --git a/exps/example/yolox_voc_nano_adam/yolox_voc_nano_adam.py b/exps/example/yolox_voc_nano_adam/yolox_voc_nano_adam.py deleted file mode 100644 index 1663c88f9..000000000 --- a/exps/example/yolox_voc_nano_adam/yolox_voc_nano_adam.py +++ /dev/null @@ -1,178 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding:utf-8 -*- -# Copyright (c) Megvii, Inc. and its affiliates. - -import os -import random -import torch.nn as nn -import torch -import torch.distributed as dist -import sys -sys.path.append(r'D:/YOLOX') -from yolox.exp import Exp as MyExp -from yolox.data import get_yolox_datadir - - -class Exp(MyExp): - def __init__(self): - super(Exp, self).__init__() - self.num_classes = 1 - self.depth = 0.33 - self.width = 0.25 - self.scale = (0.5, 1.5) - self.random_size = (10, 20) - self.eps = 1e-8 - self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] - self.enable_mixup = False - - def get_model(self, sublinear=False): - - def init_yolo(M): - for m in M.modules(): - if isinstance(m, nn.BatchNorm2d): - m.eps = 1e-3 - m.momentum = 0.03 - if "model" not in self.__dict__: - from yolox.models import YOLOX, YOLOPAFPN, YOLOXHead - in_channels = [256, 512, 1024] - # NANO model use depthwise = True, which is main difference. - backbone = YOLOPAFPN(self.depth, self.width, in_channels=in_channels, depthwise=True) - head = YOLOXHead(self.num_classes, self.width, in_channels=in_channels, depthwise=True) - self.model = YOLOX(backbone, head) - - self.model.apply(init_yolo) - self.model.head.initialize_biases(1e-2) - return self.model - - def get_optimizer(self, batch_size): - if "optimizer" not in self.__dict__: - if self.warmup_epochs > 0: - lr = self.warmup_lr - else: - lr = self.basic_lr_per_img * batch_size - - pg0, pg1, pg2 = [], [], [] # optimizer parameter groups - - for k, v in self.model.named_modules(): - if hasattr(v, "bias") and isinstance(v.bias, nn.Parameter): - pg2.append(v.bias) # biases - if isinstance(v, nn.BatchNorm2d) or "bn" in k: - pg0.append(v.weight) # no decay - elif hasattr(v, "weight") and isinstance(v.weight, nn.Parameter): - pg1.append(v.weight) # apply decay - - optimizer = torch.optim.Adam( - pg0, lr=lr, eps=self.eps, amsgrad=False - ) - optimizer.add_param_group( - {"params": pg1, "weight_decay": self.weight_decay} - ) # add pg1 with weight_decay - optimizer.add_param_group({"params": pg2}) - self.optimizer = optimizer - - return self.optimizer - - def get_data_loader(self, batch_size, is_distributed, no_aug=False): - from yolox.data import ( - VOCDetection, - TrainTransform, - YoloBatchSampler, - DataLoader, - InfiniteSampler, - MosaicDetection, - ) - - dataset = VOCDetection( - data_dir=os.path.join(get_yolox_datadir(), "VOCdevkit"), - image_sets=[('2007', 'trainval'), ('2012', 'trainval')], - img_size=self.input_size, - preproc=TrainTransform( - rgb_means=(0.485, 0.456, 0.406), - std=(0.229, 0.224, 0.225), - max_labels=50, - ), - ) - - dataset = MosaicDetection( - dataset, - mosaic=not no_aug, - img_size=self.input_size, - preproc=TrainTransform( - rgb_means=(0.485, 0.456, 0.406), - std=(0.229, 0.224, 0.225), - max_labels=120, - ), - degrees=self.degrees, - translate=self.translate, - scale=self.scale, - shear=self.shear, - perspective=self.perspective, - enable_mixup=self.enable_mixup, - ) - - self.dataset = dataset - - if is_distributed: - batch_size = batch_size // dist.get_world_size() - - sampler = InfiniteSampler( - len(self.dataset), seed=self.seed if self.seed else 0 - ) - - batch_sampler = YoloBatchSampler( - sampler=sampler, - batch_size=batch_size, - drop_last=False, - input_dimension=self.input_size, - mosaic=not no_aug, - ) - - dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True} - dataloader_kwargs["batch_sampler"] = batch_sampler - train_loader = DataLoader(self.dataset, **dataloader_kwargs) - - return train_loader - - def get_eval_loader(self, batch_size, is_distributed, testdev=False): - from yolox.data import VOCDetection, ValTransform - - valdataset = VOCDetection( - data_dir=os.path.join(get_yolox_datadir(), "VOCdevkit"), - image_sets=[('2007', 'test')], - img_size=self.test_size, - preproc=ValTransform( - rgb_means=(0.485, 0.456, 0.406), - std=(0.229, 0.224, 0.225), - ), - ) - - if is_distributed: - batch_size = batch_size // dist.get_world_size() - sampler = torch.utils.data.distributed.DistributedSampler( - valdataset, shuffle=False - ) - else: - sampler = torch.utils.data.SequentialSampler(valdataset) - - dataloader_kwargs = { - "num_workers": self.data_num_workers, - "pin_memory": True, - "sampler": sampler, - } - dataloader_kwargs["batch_size"] = batch_size - val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs) - - return val_loader - - def get_evaluator(self, batch_size, is_distributed, testdev=False): - from yolox.evaluators import VOCEvaluator - - val_loader = self.get_eval_loader(batch_size, is_distributed, testdev=testdev) - evaluator = VOCEvaluator( - dataloader=val_loader, - img_size=self.test_size, - confthre=self.test_conf, - nmsthre=self.nmsthre, - num_classes=self.num_classes, - ) - return evaluator From 0827b558cf4f125a9b79c97d2da6c2926e0e27d8 Mon Sep 17 00:00:00 2001 From: lujulia <39236354+lujulia@users.noreply.github.com> Date: Sat, 9 Jul 2022 22:21:47 +0800 Subject: [PATCH 08/59] Delete exps/example/yolox_voc_tiny directory --- exps/example/yolox_voc_tiny/yolox_voc_tiny.py | 146 ------------------ 1 file changed, 146 deletions(-) delete mode 100644 exps/example/yolox_voc_tiny/yolox_voc_tiny.py diff --git a/exps/example/yolox_voc_tiny/yolox_voc_tiny.py b/exps/example/yolox_voc_tiny/yolox_voc_tiny.py deleted file mode 100644 index 499b2a59a..000000000 --- a/exps/example/yolox_voc_tiny/yolox_voc_tiny.py +++ /dev/null @@ -1,146 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding:utf-8 -*- -# Copyright (c) Megvii, Inc. and its affiliates. - -import os -import random -import torch.nn as nn -import torch -import torch.distributed as dist -import sys -sys.path.append(r'D:/YOLOX') -from yolox.exp import Exp as MyExp -from yolox.data import get_yolox_datadir - - -class Exp(MyExp): - def __init__(self): - super(Exp, self).__init__() - self.depth = 0.33 - self.width = 0.375 - self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] - - def get_model(self, sublinear=False): - - def init_yolo(M): - for m in M.modules(): - if isinstance(m, nn.BatchNorm2d): - m.eps = 1e-3 - m.momentum = 0.03 - - if "model" not in self.__dict__: - from yolox.models import YOLOX, YOLOPAFPN, YOLOXHead - in_channels = [256, 512, 1024] - # NANO model use depthwise = True, which is main difference. - backbone = YOLOPAFPN(self.depth, self.width, in_channels=in_channels, depthwise=True) - head = YOLOXHead(self.num_classes, self.width, in_channels=in_channels, depthwise=True) - self.model = YOLOX(backbone, head) - - self.model.apply(init_yolo) - self.model.head.initialize_biases(1e-2) - return self.model - - def get_data_loader(self, batch_size, is_distributed, no_aug=False): - from yolox.data import ( - VOCDetection, - TrainTransform, - YoloBatchSampler, - DataLoader, - InfiniteSampler, - MosaicDetection, - ) - - dataset = VOCDetection( - data_dir=os.path.join(get_yolox_datadir(), "VOCdevkit"), - image_sets=[('2007', 'trainval'), ('2012', 'trainval')], - img_size=self.input_size, - preproc=TrainTransform( - rgb_means=(0.485, 0.456, 0.406), - std=(0.229, 0.224, 0.225), - max_labels=50, - ), - ) - - dataset = MosaicDetection( - dataset, - mosaic=not no_aug, - img_size=self.input_size, - preproc=TrainTransform( - rgb_means=(0.485, 0.456, 0.406), - std=(0.229, 0.224, 0.225), - max_labels=120, - ), - degrees=self.degrees, - translate=self.translate, - scale=self.scale, - shear=self.shear, - perspective=self.perspective, - enable_mixup=self.enable_mixup, - ) - - self.dataset = dataset - - if is_distributed: - batch_size = batch_size // dist.get_world_size() - - sampler = InfiniteSampler( - len(self.dataset), seed=self.seed if self.seed else 0 - ) - - batch_sampler = YoloBatchSampler( - sampler=sampler, - batch_size=batch_size, - drop_last=False, - input_dimension=self.input_size, - mosaic=not no_aug, - ) - - dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True} - dataloader_kwargs["batch_sampler"] = batch_sampler - train_loader = DataLoader(self.dataset, **dataloader_kwargs) - - return train_loader - - def get_eval_loader(self, batch_size, is_distributed, testdev=False): - from yolox.data import VOCDetection, ValTransform - - valdataset = VOCDetection( - data_dir=os.path.join(get_yolox_datadir(), "VOCdevkit"), - image_sets=[('2007', 'test')], - img_size=self.test_size, - preproc=ValTransform( - rgb_means=(0.485, 0.456, 0.406), - std=(0.229, 0.224, 0.225), - ), - ) - - if is_distributed: - batch_size = batch_size // dist.get_world_size() - sampler = torch.utils.data.distributed.DistributedSampler( - valdataset, shuffle=False - ) - else: - sampler = torch.utils.data.SequentialSampler(valdataset) - - dataloader_kwargs = { - "num_workers": self.data_num_workers, - "pin_memory": True, - "sampler": sampler, - } - dataloader_kwargs["batch_size"] = batch_size - val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs) - - return val_loader - - def get_evaluator(self, batch_size, is_distributed, testdev=False): - from yolox.evaluators import VOCEvaluator - - val_loader = self.get_eval_loader(batch_size, is_distributed, testdev=testdev) - evaluator = VOCEvaluator( - dataloader=val_loader, - img_size=self.test_size, - confthre=self.test_conf, - nmsthre=self.nmsthre, - num_classes=self.num_classes, - ) - return evaluator From 4175e9e8b07cf6845d56d62eb8a9636bce672fc0 Mon Sep 17 00:00:00 2001 From: lujulia <39236354+lujulia@users.noreply.github.com> Date: Sat, 9 Jul 2022 22:24:20 +0800 Subject: [PATCH 09/59] Add files via upload --- .../__pycache__/yolox_voc_nano.cpython-38.pyc | Bin 0 -> 3838 bytes .../yolox_voc_nano/yolox_voc_nano.py | 147 +++++++++++++++ .../yolox_voc_nano_adam.py | 178 ++++++++++++++++++ .../__pycache__/yolox_voc_s.cpython-38.pyc | Bin 0 -> 3045 bytes .../voc_format/yolox_voc_s/yolox_voc_s.py | 123 ++++++++++++ .../yolox_voc_tiny/yolox_voc_tiny.py | 146 ++++++++++++++ 6 files changed, 594 insertions(+) create mode 100644 exps/example/custom/voc_format/yolox_voc_nano/__pycache__/yolox_voc_nano.cpython-38.pyc create mode 100644 exps/example/custom/voc_format/yolox_voc_nano/yolox_voc_nano.py create mode 100644 exps/example/custom/voc_format/yolox_voc_nano_adam/yolox_voc_nano_adam.py create mode 100644 exps/example/custom/voc_format/yolox_voc_s/__pycache__/yolox_voc_s.cpython-38.pyc create mode 100644 exps/example/custom/voc_format/yolox_voc_s/yolox_voc_s.py create mode 100644 exps/example/custom/voc_format/yolox_voc_tiny/yolox_voc_tiny.py diff --git a/exps/example/custom/voc_format/yolox_voc_nano/__pycache__/yolox_voc_nano.cpython-38.pyc b/exps/example/custom/voc_format/yolox_voc_nano/__pycache__/yolox_voc_nano.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0b3dc19292760e1ae41ca89f792124e0826856ce GIT binary patch literal 3838 zcmai1O^h5z74HA(?)lqYI{^oX%}g{Xwy078sJQH>QIj3oVqsrmOLgf-lHiBRp(_^huzV(i->PBLAOucNgI+kj) z6Q@&|wz(aby8~~>XU49s+RV6`)H-#gTX7?4cAC0&Rbw`D9%#(r^)0hgWBN_4Re6L} zwdGBB@wtUN*I&7QyQNF>%K8Ye**?#MjbS`o4_KIoED}8$d-yS@m243T*T~+E@s_rt zGo2X^+>Xg=%wqNf9ah(w!z$2h)?hC4pgF9`d{%{4c}ugjjthHcgn_Ag`k_@ww^3-Z zY#Acga=p;Dj2&ZF!xo_jCNm%{We9}}BQBJ2(hY(rjq)In)pVQ$y*SJ= zo=KbWQNAke`w_#-7-rHMh18IZ;wYD{;9;y9@cl{@bA0de^-++9370jVhF!EJ(fXJU z-^zHr@=%vvP|hC&54C@{7gmP}Ul@$zC@j6TfC(~)Fd4;sp>)>0VGnbr!(-L?(T1pC zH?t^N4MNi!x}$zy(>H7I`TX9#mU`rby~l&%pj<}D9zZDPY7>J_d!VgV1S2QMdSXn> z#GF`(HL>9rxSP3sbK(>h#@Q%N;V_T++t#E~P`z@b%`MELQn*{>Nf$ltj#nr^lRofw zs;o-zrW(9fTRs93GMFOiCK>q+h(g=ZcC|(AjW14gz(voEfw^S}7jV%56a&(}>nZ@4 z%l%It{_79dx;NVW_n#&dyxVR$vXTthIEG{WD2vi857Qo(Mw-g%_rtumx;zxgc_vLh z%A}jXFKIqbT87lGNj(uyz=Tpf2~BzmFyx;vkby3Qrq}Ru^vyGL^b~7YkH_;D;$bh0 zvlr&~KasZTQCMu5(pGUQJ$l`Eap}f#sos8xhwKpoLW?w$;tPw{&n*~yHz7Vv#3Xat|1OrXn6vhr**t++>N7@heDjDZ;#FV3{B;B!`@nVm~zpg zA;(W}|AbDVH#-TT8LsW?Cw1RA30X600Q6@4$Z7Tn6MIh+3ISgM`C}*qHC=HY*$_&aZt8M!eAKz%fTw#c}fzT5>CwH}{w25B0`4bp3w*_h&JSpBInHsRR zzHLmZg;P`uozZMnT9Gxk?Fp^%IX1&)-!UfjqE^(|+yxEcgjrVfqW%kg(kL1Tn-j|= zj2u~e{rcrap7UNF4O7{;CBi6$NV63HTh818Ru#@~DsBE^V=y)Z6Y&Yc(Eg%2K@eg99t9$oZ5YWIJ4 z@3+5tYT+;K{x2{8JpTLRKey5Or4D=kqw62G`x}qGe$M^P2kricAE>AO=sZ40Jb0qt z4H6E^rJ3bS`boGR#9O^8;Fvy&x4HT2-qm;2N`fGD2V%U4MPP2K^ zNz_inB~-X#PKafn44aD~JYiK`@DB0<~2 zQD6xiptM*7(;F0%urOxjuFROH;$<2*qfY(M23k!;1mcI(+lVskH_JuT9p{`$Cmjai zxc`4_zeQ&JfW!@&WR{kQ6&{1N`=RJ(vZfFgl#Ry;q)Emqv&#N&Vv?)_q3LHlL*0dz zZj`@ItxudP=@x$bZ8p`WzmJ_+$FM*;hVE*O&m3zyiuxZrZcTUfhOv2KkBknjdxVmR zO^HkuCA|&hC?zQhG6&WUa+*B~>Jus8{-k7%RkjW0;y!giA#Z|0KvpG%a&J9tSe=kV;aP{sGk=I2O%p2S6nB?Lz$TOF^g#Qavv6qM14>m-zK z2_UUG@hUYaEWb(GK5oU2sH$M=(LxG%;x%e~9YWgqQ1p;(KLD<{LrYX>U2xlhdC~`~(NYMFau+|Qfzqp=w8(5VNhV@^Al68P^l9l|`d$pFk>aFPIpz(u zaQWpxeY8K{@%XP({O|Yv41>!g0x>+JVtg9ekDbcq>|WgNk8kO~?cSe9855=L$e8?q z@;2m&lsyH(-EkO~Ju2B2#4_Ry5^7@^*;t?E^gSNOL2$^<6Eu$UMfHEi43ta}gkE#~ z6aGd2v}(uyNE8mz!EMBb3GP*b{Ct`U%8W#p`ew_vL% 0: + lr = self.warmup_lr + else: + lr = self.basic_lr_per_img * batch_size + + pg0, pg1, pg2 = [], [], [] # optimizer parameter groups + + for k, v in self.model.named_modules(): + if hasattr(v, "bias") and isinstance(v.bias, nn.Parameter): + pg2.append(v.bias) # biases + if isinstance(v, nn.BatchNorm2d) or "bn" in k: + pg0.append(v.weight) # no decay + elif hasattr(v, "weight") and isinstance(v.weight, nn.Parameter): + pg1.append(v.weight) # apply decay + + optimizer = torch.optim.Adam( + pg0, lr=lr, eps=self.eps, amsgrad=False + ) + optimizer.add_param_group( + {"params": pg1, "weight_decay": self.weight_decay} + ) # add pg1 with weight_decay + optimizer.add_param_group({"params": pg2}) + self.optimizer = optimizer + + return self.optimizer + + def get_data_loader(self, batch_size, is_distributed, no_aug=False): + from yolox.data import ( + VOCDetection, + TrainTransform, + YoloBatchSampler, + DataLoader, + InfiniteSampler, + MosaicDetection, + ) + + dataset = VOCDetection( + data_dir=os.path.join(get_yolox_datadir(), "VOCdevkit"), + image_sets=[('2007', 'trainval'), ('2012', 'trainval')], + img_size=self.input_size, + preproc=TrainTransform( + rgb_means=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225), + max_labels=50, + ), + ) + + dataset = MosaicDetection( + dataset, + mosaic=not no_aug, + img_size=self.input_size, + preproc=TrainTransform( + rgb_means=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225), + max_labels=120, + ), + degrees=self.degrees, + translate=self.translate, + scale=self.scale, + shear=self.shear, + perspective=self.perspective, + enable_mixup=self.enable_mixup, + ) + + self.dataset = dataset + + if is_distributed: + batch_size = batch_size // dist.get_world_size() + + sampler = InfiniteSampler( + len(self.dataset), seed=self.seed if self.seed else 0 + ) + + batch_sampler = YoloBatchSampler( + sampler=sampler, + batch_size=batch_size, + drop_last=False, + input_dimension=self.input_size, + mosaic=not no_aug, + ) + + dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True} + dataloader_kwargs["batch_sampler"] = batch_sampler + train_loader = DataLoader(self.dataset, **dataloader_kwargs) + + return train_loader + + def get_eval_loader(self, batch_size, is_distributed, testdev=False): + from yolox.data import VOCDetection, ValTransform + + valdataset = VOCDetection( + data_dir=os.path.join(get_yolox_datadir(), "VOCdevkit"), + image_sets=[('2007', 'test')], + img_size=self.test_size, + preproc=ValTransform( + rgb_means=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225), + ), + ) + + if is_distributed: + batch_size = batch_size // dist.get_world_size() + sampler = torch.utils.data.distributed.DistributedSampler( + valdataset, shuffle=False + ) + else: + sampler = torch.utils.data.SequentialSampler(valdataset) + + dataloader_kwargs = { + "num_workers": self.data_num_workers, + "pin_memory": True, + "sampler": sampler, + } + dataloader_kwargs["batch_size"] = batch_size + val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs) + + return val_loader + + def get_evaluator(self, batch_size, is_distributed, testdev=False): + from yolox.evaluators import VOCEvaluator + + val_loader = self.get_eval_loader(batch_size, is_distributed, testdev=testdev) + evaluator = VOCEvaluator( + dataloader=val_loader, + img_size=self.test_size, + confthre=self.test_conf, + nmsthre=self.nmsthre, + num_classes=self.num_classes, + ) + return evaluator diff --git a/exps/example/custom/voc_format/yolox_voc_s/__pycache__/yolox_voc_s.cpython-38.pyc b/exps/example/custom/voc_format/yolox_voc_s/__pycache__/yolox_voc_s.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cea337ed06e250e2ca884a323327deb00f8e4824 GIT binary patch literal 3045 zcmai0ON<;x8Sd(L_w>v@oRlaGN_14DN{VzBLFqyJ_^@16VMw_nER@*_q(AG!k`ux6I0& zm?w3@Q#qs=Cm(CxsD6NoFvz2U`Hr#4IOFC6e_-)YTVey7ceu;F2W;T*i2FQ1&3(tP zje!T-U1>tWke$#?bq9q^R6l{JjhZ#aeRI#;H?SSlV~d-RHn$+1+8uj)zLr59?z4Ty z?bF7d&mGj<(+1{rxB+9{^@FFNZ*=yTAHVj87yi69LIcm=*AB?0Vk?r~{YW|0bRwj7 zP5E)07HJ*Fs#8q!c$g(sB`W1`F{!tdb0_6^nPsKyNkSdfBui`MOOa%{1M4@_Oklk) zb|!I=ZJ2Z-RgwjRnp)8;*qAc-fH^izCF)(Hl1|uZvm?>$EIX@0Lu`0-~rkOPQ<1H<1-TO6uX(O&tiS!gXZG z)iU8is)bjJO~Q&e99+0oR!KTM))zoK7u&bfx^F7`!nt$bQ+`b*wv(*wIe$T(MGPKV zqwp`kom>h(S{wao`;Wglv--ES(XTJRoBi|AU)M1B{D8ms(Txw+Mt2Xsdfxxt2Wz7b zKhV$G!38YuTPl#FjW`$ZTv=7kRhTC`ah7a|ta@lGpAtJpW~(sGlaYukQCG@O^HE%- z_k{8$QcPqyJlG*^fpT-b6Zv&CKsaXGnaG#OFy+9!Sa` zWsl3WkiF{E_QN%x69G-YH_GC+t4PO~hhEX_}kfL*;R9(p+SX?LB-0 z4!LMV?BIckG#Blf+y@W2;F7n%C191qXZgBvLo7%&%^V+DPO=#*|k7q zzlf^*QrrDys(*#V*C4JSIlO9Xy1AK&TYXD@p9bhF+s2jv(qEPrsY7G=RjM7~R$ii} zhHXHGH1On0)cG=ma_UkJ@s*m^X@*Z0#anewVr_Z-74(R_mFr}}BfB8EWA{`DdLvUvjdmATXd?CgFEEPZ@z*fFqKq@xA|Tb%Fw>Cw51<$V zXe?x=VSeV@JxfFKQ`hm?@(GBY(DoR_JSe52Ax7tX3^BmV7@G*Z*;w4#wPsFZHxByf z^JSw@-Q_kuG;ZSp1kM};On*jRf&bG42i|A$CKQA0!sDdWRI2=8S!~u@5{z5q6;)(F zQ$4lVk`}qGl~nR)7?5v}xC#N@rTEf%NohJ?_e3KvbE_9NH3w{)1dNAvp(xYd83ZcKE`!Hp-3x`0Z(uwIlkTBI$D1 zZs;x4RCj(C8j387;}ia!qIpD8`bI`ruZR?U7J1>N@T~5|O_Bzbi;od6b9s##zDD~> zQ798-scoG=yZk9;>FiL!{J3`}THV08b{Cs$^NiBzggeKa>0fNO_d~kGRy_zG#0I}` IXeicy0sid~;{X5v literal 0 HcmV?d00001 diff --git a/exps/example/custom/voc_format/yolox_voc_s/yolox_voc_s.py b/exps/example/custom/voc_format/yolox_voc_s/yolox_voc_s.py new file mode 100644 index 000000000..5f108b42b --- /dev/null +++ b/exps/example/custom/voc_format/yolox_voc_s/yolox_voc_s.py @@ -0,0 +1,123 @@ +# encoding: utf-8 +import os +import random +import torch +import torch.nn as nn +import torch.distributed as dist + +from yolox.exp import Exp as MyExp +from yolox.data import get_yolox_datadir + + +class Exp(MyExp): + def __init__(self): + super(Exp, self).__init__() + self.num_classes = 20 + self.depth = 0.33 + self.width = 0.50 + self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] + + def get_data_loader(self, batch_size, is_distributed, no_aug=False): + from yolox.data import ( + VOCDetection, + TrainTransform, + YoloBatchSampler, + DataLoader, + InfiniteSampler, + MosaicDetection, + ) + + dataset = VOCDetection( + data_dir=os.path.join(get_yolox_datadir(), "VOCdevkit"), + image_sets=[('2007', 'trainval'), ('2012', 'trainval')], + img_size=self.input_size, + preproc=TrainTransform( + rgb_means=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225), + max_labels=50, + ), + ) + + dataset = MosaicDetection( + dataset, + mosaic=not no_aug, + img_size=self.input_size, + preproc=TrainTransform( + rgb_means=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225), + max_labels=120, + ), + degrees=self.degrees, + translate=self.translate, + scale=self.scale, + shear=self.shear, + perspective=self.perspective, + enable_mixup=self.enable_mixup, + ) + + self.dataset = dataset + + if is_distributed: + batch_size = batch_size // dist.get_world_size() + + sampler = InfiniteSampler( + len(self.dataset), seed=self.seed if self.seed else 0 + ) + + batch_sampler = YoloBatchSampler( + sampler=sampler, + batch_size=batch_size, + drop_last=False, + input_dimension=self.input_size, + mosaic=not no_aug, + ) + + dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True} + dataloader_kwargs["batch_sampler"] = batch_sampler + train_loader = DataLoader(self.dataset, **dataloader_kwargs) + + return train_loader + + def get_eval_loader(self, batch_size, is_distributed, testdev=False): + from yolox.data import VOCDetection, ValTransform + + valdataset = VOCDetection( + data_dir=os.path.join(get_yolox_datadir(), "VOCdevkit"), + image_sets=[('2007', 'test')], + img_size=self.test_size, + preproc=ValTransform( + rgb_means=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225), + ), + ) + + if is_distributed: + batch_size = batch_size // dist.get_world_size() + sampler = torch.utils.data.distributed.DistributedSampler( + valdataset, shuffle=False + ) + else: + sampler = torch.utils.data.SequentialSampler(valdataset) + + dataloader_kwargs = { + "num_workers": self.data_num_workers, + "pin_memory": True, + "sampler": sampler, + } + dataloader_kwargs["batch_size"] = batch_size + val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs) + + return val_loader + + def get_evaluator(self, batch_size, is_distributed, testdev=False): + from yolox.evaluators import VOCEvaluator + + val_loader = self.get_eval_loader(batch_size, is_distributed, testdev=testdev) + evaluator = VOCEvaluator( + dataloader=val_loader, + img_size=self.test_size, + confthre=self.test_conf, + nmsthre=self.nmsthre, + num_classes=self.num_classes, + ) + return evaluator diff --git a/exps/example/custom/voc_format/yolox_voc_tiny/yolox_voc_tiny.py b/exps/example/custom/voc_format/yolox_voc_tiny/yolox_voc_tiny.py new file mode 100644 index 000000000..499b2a59a --- /dev/null +++ b/exps/example/custom/voc_format/yolox_voc_tiny/yolox_voc_tiny.py @@ -0,0 +1,146 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. + +import os +import random +import torch.nn as nn +import torch +import torch.distributed as dist +import sys +sys.path.append(r'D:/YOLOX') +from yolox.exp import Exp as MyExp +from yolox.data import get_yolox_datadir + + +class Exp(MyExp): + def __init__(self): + super(Exp, self).__init__() + self.depth = 0.33 + self.width = 0.375 + self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] + + def get_model(self, sublinear=False): + + def init_yolo(M): + for m in M.modules(): + if isinstance(m, nn.BatchNorm2d): + m.eps = 1e-3 + m.momentum = 0.03 + + if "model" not in self.__dict__: + from yolox.models import YOLOX, YOLOPAFPN, YOLOXHead + in_channels = [256, 512, 1024] + # NANO model use depthwise = True, which is main difference. + backbone = YOLOPAFPN(self.depth, self.width, in_channels=in_channels, depthwise=True) + head = YOLOXHead(self.num_classes, self.width, in_channels=in_channels, depthwise=True) + self.model = YOLOX(backbone, head) + + self.model.apply(init_yolo) + self.model.head.initialize_biases(1e-2) + return self.model + + def get_data_loader(self, batch_size, is_distributed, no_aug=False): + from yolox.data import ( + VOCDetection, + TrainTransform, + YoloBatchSampler, + DataLoader, + InfiniteSampler, + MosaicDetection, + ) + + dataset = VOCDetection( + data_dir=os.path.join(get_yolox_datadir(), "VOCdevkit"), + image_sets=[('2007', 'trainval'), ('2012', 'trainval')], + img_size=self.input_size, + preproc=TrainTransform( + rgb_means=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225), + max_labels=50, + ), + ) + + dataset = MosaicDetection( + dataset, + mosaic=not no_aug, + img_size=self.input_size, + preproc=TrainTransform( + rgb_means=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225), + max_labels=120, + ), + degrees=self.degrees, + translate=self.translate, + scale=self.scale, + shear=self.shear, + perspective=self.perspective, + enable_mixup=self.enable_mixup, + ) + + self.dataset = dataset + + if is_distributed: + batch_size = batch_size // dist.get_world_size() + + sampler = InfiniteSampler( + len(self.dataset), seed=self.seed if self.seed else 0 + ) + + batch_sampler = YoloBatchSampler( + sampler=sampler, + batch_size=batch_size, + drop_last=False, + input_dimension=self.input_size, + mosaic=not no_aug, + ) + + dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True} + dataloader_kwargs["batch_sampler"] = batch_sampler + train_loader = DataLoader(self.dataset, **dataloader_kwargs) + + return train_loader + + def get_eval_loader(self, batch_size, is_distributed, testdev=False): + from yolox.data import VOCDetection, ValTransform + + valdataset = VOCDetection( + data_dir=os.path.join(get_yolox_datadir(), "VOCdevkit"), + image_sets=[('2007', 'test')], + img_size=self.test_size, + preproc=ValTransform( + rgb_means=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225), + ), + ) + + if is_distributed: + batch_size = batch_size // dist.get_world_size() + sampler = torch.utils.data.distributed.DistributedSampler( + valdataset, shuffle=False + ) + else: + sampler = torch.utils.data.SequentialSampler(valdataset) + + dataloader_kwargs = { + "num_workers": self.data_num_workers, + "pin_memory": True, + "sampler": sampler, + } + dataloader_kwargs["batch_size"] = batch_size + val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs) + + return val_loader + + def get_evaluator(self, batch_size, is_distributed, testdev=False): + from yolox.evaluators import VOCEvaluator + + val_loader = self.get_eval_loader(batch_size, is_distributed, testdev=testdev) + evaluator = VOCEvaluator( + dataloader=val_loader, + img_size=self.test_size, + confthre=self.test_conf, + nmsthre=self.nmsthre, + num_classes=self.num_classes, + ) + return evaluator From a0499dfb8c04a0f2a280aeca6f0a4450cb856c6b Mon Sep 17 00:00:00 2001 From: lujulia <39236354+lujulia@users.noreply.github.com> Date: Sat, 9 Jul 2022 22:25:48 +0800 Subject: [PATCH 10/59] Add files via upload --- exps/example/custom/coco_format/nano.py | 48 ++++++++++++++++++++++ exps/example/custom/coco_format/yolox_s.py | 25 +++++++++++ 2 files changed, 73 insertions(+) create mode 100644 exps/example/custom/coco_format/nano.py create mode 100644 exps/example/custom/coco_format/yolox_s.py diff --git a/exps/example/custom/coco_format/nano.py b/exps/example/custom/coco_format/nano.py new file mode 100644 index 000000000..fb10626db --- /dev/null +++ b/exps/example/custom/coco_format/nano.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. + +import os + +import torch.nn as nn + +from yolox.exp import Exp as MyExp + + +class Exp(MyExp): + def __init__(self): + super(Exp, self).__init__() + self.depth = 0.33 + self.width = 0.25 + self.input_size = (416, 416) + self.mosaic_scale = (0.5, 1.5) + self.random_size = (10, 20) + self.test_size = (416, 416) + self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] + self.enable_mixup = False + + # Define yourself dataset path + self.data_dir = "datasets/coco128" + self.train_ann = "instances_train2017.json" + self.val_ann = "instances_val2017.json" + + self.num_classes = 71 + + def get_model(self, sublinear=False): + + def init_yolo(M): + for m in M.modules(): + if isinstance(m, nn.BatchNorm2d): + m.eps = 1e-3 + m.momentum = 0.03 + if "model" not in self.__dict__: + from yolox.models import YOLOX, YOLOPAFPN, YOLOXHead + in_channels = [256, 512, 1024] + # NANO model use depthwise = True, which is main difference. + backbone = YOLOPAFPN(self.depth, self.width, in_channels=in_channels, depthwise=True) + head = YOLOXHead(self.num_classes, self.width, in_channels=in_channels, depthwise=True) + self.model = YOLOX(backbone, head) + + self.model.apply(init_yolo) + self.model.head.initialize_biases(1e-2) + return self.model diff --git a/exps/example/custom/coco_format/yolox_s.py b/exps/example/custom/coco_format/yolox_s.py new file mode 100644 index 000000000..2f0b0a5f7 --- /dev/null +++ b/exps/example/custom/coco_format/yolox_s.py @@ -0,0 +1,25 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. +import os + +from yolox.exp import Exp as MyExp + + +class Exp(MyExp): + def __init__(self): + super(Exp, self).__init__() + self.depth = 0.33 + self.width = 0.50 + self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] + + # Define yourself dataset path + self.data_dir = "datasets/coco128" + self.train_ann = "instances_train2017.json" + self.val_ann = "instances_val2017.json" + + self.num_classes = 71 + + self.max_epoch = 300 + self.data_num_workers = 4 + self.eval_interval = 1 From fae9df388617672602c4fc8ee9fdcd2eb2f03c64 Mon Sep 17 00:00:00 2001 From: lujulia <39236354+lujulia@users.noreply.github.com> Date: Sat, 9 Jul 2022 22:26:28 +0800 Subject: [PATCH 11/59] Delete yolox_nano.py --- exps/example/custom/yolox_nano.py | 48 ------------------------------- 1 file changed, 48 deletions(-) delete mode 100644 exps/example/custom/yolox_nano.py diff --git a/exps/example/custom/yolox_nano.py b/exps/example/custom/yolox_nano.py deleted file mode 100644 index fb10626db..000000000 --- a/exps/example/custom/yolox_nano.py +++ /dev/null @@ -1,48 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding:utf-8 -*- -# Copyright (c) Megvii, Inc. and its affiliates. - -import os - -import torch.nn as nn - -from yolox.exp import Exp as MyExp - - -class Exp(MyExp): - def __init__(self): - super(Exp, self).__init__() - self.depth = 0.33 - self.width = 0.25 - self.input_size = (416, 416) - self.mosaic_scale = (0.5, 1.5) - self.random_size = (10, 20) - self.test_size = (416, 416) - self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] - self.enable_mixup = False - - # Define yourself dataset path - self.data_dir = "datasets/coco128" - self.train_ann = "instances_train2017.json" - self.val_ann = "instances_val2017.json" - - self.num_classes = 71 - - def get_model(self, sublinear=False): - - def init_yolo(M): - for m in M.modules(): - if isinstance(m, nn.BatchNorm2d): - m.eps = 1e-3 - m.momentum = 0.03 - if "model" not in self.__dict__: - from yolox.models import YOLOX, YOLOPAFPN, YOLOXHead - in_channels = [256, 512, 1024] - # NANO model use depthwise = True, which is main difference. - backbone = YOLOPAFPN(self.depth, self.width, in_channels=in_channels, depthwise=True) - head = YOLOXHead(self.num_classes, self.width, in_channels=in_channels, depthwise=True) - self.model = YOLOX(backbone, head) - - self.model.apply(init_yolo) - self.model.head.initialize_biases(1e-2) - return self.model From 1af2ef84888c9b293eb3576298cda7f060d11187 Mon Sep 17 00:00:00 2001 From: lujulia <39236354+lujulia@users.noreply.github.com> Date: Sat, 9 Jul 2022 22:26:39 +0800 Subject: [PATCH 12/59] Delete yolox_s.py --- exps/example/custom/yolox_s.py | 25 ------------------------- 1 file changed, 25 deletions(-) delete mode 100644 exps/example/custom/yolox_s.py diff --git a/exps/example/custom/yolox_s.py b/exps/example/custom/yolox_s.py deleted file mode 100644 index 2f0b0a5f7..000000000 --- a/exps/example/custom/yolox_s.py +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding:utf-8 -*- -# Copyright (c) Megvii, Inc. and its affiliates. -import os - -from yolox.exp import Exp as MyExp - - -class Exp(MyExp): - def __init__(self): - super(Exp, self).__init__() - self.depth = 0.33 - self.width = 0.50 - self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] - - # Define yourself dataset path - self.data_dir = "datasets/coco128" - self.train_ann = "instances_train2017.json" - self.val_ann = "instances_val2017.json" - - self.num_classes = 71 - - self.max_epoch = 300 - self.data_num_workers = 4 - self.eval_interval = 1 From 6ed55a457d47d7283a2502c8b5c9812c299c22e0 Mon Sep 17 00:00:00 2001 From: lujulia <39236354+lujulia@users.noreply.github.com> Date: Sat, 9 Jul 2022 22:28:41 +0800 Subject: [PATCH 13/59] Rename nano.py to yolox_nano.py --- exps/example/custom/coco_format/{nano.py => yolox_nano.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename exps/example/custom/coco_format/{nano.py => yolox_nano.py} (100%) diff --git a/exps/example/custom/coco_format/nano.py b/exps/example/custom/coco_format/yolox_nano.py similarity index 100% rename from exps/example/custom/coco_format/nano.py rename to exps/example/custom/coco_format/yolox_nano.py From d162dc8f0ca41dc8cec8b6b86a450a0e85a37fbd Mon Sep 17 00:00:00 2001 From: lujulia <39236354+lujulia@users.noreply.github.com> Date: Sat, 9 Jul 2022 22:30:21 +0800 Subject: [PATCH 14/59] Update demo.py --- tools/demo.py | 96 +++++++++++++++++++++++---------------------------- 1 file changed, 43 insertions(+), 53 deletions(-) diff --git a/tools/demo.py b/tools/demo.py index b16598d5f..1e505a3aa 100644 --- a/tools/demo.py +++ b/tools/demo.py @@ -2,20 +2,21 @@ # -*- coding:utf-8 -*- # Copyright (c) Megvii, Inc. and its affiliates. -import argparse -import os -import time from loguru import logger import cv2 import torch -from yolox.data.data_augment import ValTransform -from yolox.data.datasets import COCO_CLASSES +from yolox.data.data_augment import preproc +from yolox.data.datasets import COCO_CLASSES, VOC_CLASSES from yolox.exp import get_exp from yolox.utils import fuse_model, get_model_info, postprocess, vis +import argparse +import os +import time + IMAGE_EXT = [".jpg", ".jpeg", ".webp", ".bmp", ".png"] @@ -43,7 +44,7 @@ def make_parser(): "--exp_file", default=None, type=str, - help="please input your experiment description file", + help="pls input your expriment description file", ) parser.add_argument("-c", "--ckpt", default=None, type=str, help="ckpt for eval") parser.add_argument( @@ -52,8 +53,8 @@ def make_parser(): type=str, help="device to run our model, can either be cpu or gpu", ) - parser.add_argument("--conf", default=0.3, type=float, help="test conf") - parser.add_argument("--nms", default=0.3, type=float, help="test nms threshold") + parser.add_argument("--conf", default=None, type=float, help="test conf") + parser.add_argument("--nms", default=None, type=float, help="test nms threshold") parser.add_argument("--tsize", default=None, type=int, help="test img size") parser.add_argument( "--fp16", @@ -62,13 +63,6 @@ def make_parser(): action="store_true", help="Adopting mix precision evaluating.", ) - parser.add_argument( - "--legacy", - dest="legacy", - default=False, - action="store_true", - help="To be compatible with older versions", - ) parser.add_argument( "--fuse", dest="fuse", @@ -102,12 +96,10 @@ def __init__( self, model, exp, - cls_names=COCO_CLASSES, + cls_names=VOC_CLASSES, trt_file=None, decoder=None, device="cpu", - fp16=False, - legacy=False, ): self.model = model self.cls_names = cls_names @@ -117,8 +109,6 @@ def __init__( self.nmsthre = exp.nmsthre self.test_size = exp.test_size self.device = device - self.fp16 = fp16 - self.preproc = ValTransform(legacy=legacy) if trt_file is not None: from torch2trt import TRTModule @@ -128,6 +118,8 @@ def __init__( x = torch.ones(1, 3, exp.test_size[0], exp.test_size[1]).cuda() self.model(x) self.model = model_trt + self.rgb_means = (0.485, 0.456, 0.406) + self.std = (0.229, 0.224, 0.225) def inference(self, img): img_info = {"id": 0} @@ -142,25 +134,23 @@ def inference(self, img): img_info["width"] = width img_info["raw_img"] = img - ratio = min(self.test_size[0] / img.shape[0], self.test_size[1] / img.shape[1]) + img, ratio = preproc(img, self.test_size, self.rgb_means, self.std) img_info["ratio"] = ratio - - img, _ = self.preproc(img, None, self.test_size) img = torch.from_numpy(img).unsqueeze(0) - img = img.float() if self.device == "gpu": img = img.cuda() - if self.fp16: - img = img.half() # to FP16 with torch.no_grad(): t0 = time.time() outputs = self.model(img) + #print(type(outputs)) # torch.Tensor + print(len(outputs)) # 1 + print(outputs.shape) # (1,8400,6) + print(outputs.tolist()) # print complete list. if self.decoder is not None: outputs = self.decoder(outputs, dtype=outputs.type()) outputs = postprocess( - outputs, self.num_classes, self.confthre, - self.nmsthre, class_agnostic=True + outputs, self.num_classes, self.confthre, self.nmsthre ) logger.info("Infer time: {:.4f}s".format(time.time() - t0)) return outputs, img_info @@ -169,6 +159,17 @@ def visual(self, output, img_info, cls_conf=0.35): ratio = img_info["ratio"] img = img_info["raw_img"] if output is None: + font = cv2.FONT_HERSHEY_SIMPLEX + class_count = {} + class_AP = {} + for i in self.cls_names: + class_count[i] = 0 + class_AP[i] = 0.0 + line = 0 + for k in class_count: + cv2.putText(img, str(k)+": "+str(class_count[k]), (15,25+line), font, 0.8, (0, 255, 255), thickness=2) + cv2.putText(img, "AP"+": "+'{:.1f}%'.format(class_AP[k]), (15,50+line), font, 0.8, (0, 255, 255), thickness=2) + line = line+50 return img output = output.cpu() @@ -179,7 +180,6 @@ def visual(self, output, img_info, cls_conf=0.35): cls = output[:, 6] scores = output[:, 4] * output[:, 5] - vis_res = vis(img, bboxes, scores, cls, cls_conf, self.cls_names) return vis_res @@ -211,19 +211,18 @@ def imageflow_demo(predictor, vis_folder, current_time, args): width = cap.get(cv2.CAP_PROP_FRAME_WIDTH) # float height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) # float fps = cap.get(cv2.CAP_PROP_FPS) - if args.save_result: - save_folder = os.path.join( - vis_folder, time.strftime("%Y_%m_%d_%H_%M_%S", current_time) - ) - os.makedirs(save_folder, exist_ok=True) - if args.demo == "video": - save_path = os.path.join(save_folder, os.path.basename(args.path)) - else: - save_path = os.path.join(save_folder, "camera.mp4") - logger.info(f"video save_path is {save_path}") - vid_writer = cv2.VideoWriter( - save_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (int(width), int(height)) - ) + save_folder = os.path.join( + vis_folder, time.strftime("%Y_%m_%d_%H_%M_%S", current_time) + ) + os.makedirs(save_folder, exist_ok=True) + if args.demo == "video": + save_path = os.path.join(save_folder, args.path.split("/")[-1]) + else: + save_path = os.path.join(save_folder, "camera.mp4") + logger.info(f"video save_path is {save_path}") + vid_writer = cv2.VideoWriter( + save_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (int(width), int(height)) + ) while True: ret_val, frame = cap.read() if ret_val: @@ -231,9 +230,6 @@ def imageflow_demo(predictor, vis_folder, current_time, args): result_frame = predictor.visual(outputs[0], img_info, predictor.confthre) if args.save_result: vid_writer.write(result_frame) - else: - cv2.namedWindow("yolox", cv2.WINDOW_NORMAL) - cv2.imshow("yolox", result_frame) ch = cv2.waitKey(1) if ch == 27 or ch == ord("q") or ch == ord("Q"): break @@ -248,7 +244,6 @@ def main(exp, args): file_name = os.path.join(exp.output_dir, args.experiment_name) os.makedirs(file_name, exist_ok=True) - vis_folder = None if args.save_result: vis_folder = os.path.join(file_name, "vis_res") os.makedirs(vis_folder, exist_ok=True) @@ -270,13 +265,11 @@ def main(exp, args): if args.device == "gpu": model.cuda() - if args.fp16: - model.half() # to FP16 model.eval() if not args.trt: if args.ckpt is None: - ckpt_file = os.path.join(file_name, "best_ckpt.pth") + ckpt_file = os.path.join(file_name, "best_ckpt.pth.tar") else: ckpt_file = args.ckpt logger.info("loading checkpoint") @@ -302,10 +295,7 @@ def main(exp, args): trt_file = None decoder = None - predictor = Predictor( - model, exp, COCO_CLASSES, trt_file, decoder, - args.device, args.fp16, args.legacy, - ) + predictor = Predictor(model, exp, VOC_CLASSES, trt_file, decoder, args.device) current_time = time.localtime() if args.demo == "image": image_demo(predictor, vis_folder, args.path, current_time, args.save_result) From 0ae6ad6dc94e38ae11816efd524c9ca290500edd Mon Sep 17 00:00:00 2001 From: lujulia <39236354+lujulia@users.noreply.github.com> Date: Sat, 9 Jul 2022 22:34:06 +0800 Subject: [PATCH 15/59] Update __init__.py --- yolox/data/datasets/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/yolox/data/datasets/__init__.py b/yolox/data/datasets/__init__.py index dee2c9f48..6ea2be14f 100644 --- a/yolox/data/datasets/__init__.py +++ b/yolox/data/datasets/__init__.py @@ -6,4 +6,5 @@ from .coco_classes import COCO_CLASSES from .datasets_wrapper import ConcatDataset, Dataset, MixConcatDataset from .mosaicdetection import MosaicDetection +from .voc_classes import VOC_CLASSES from .voc import VOCDetection From 2d74703b9ec377d46ca4ea7c86f1b02eaaa1fe08 Mon Sep 17 00:00:00 2001 From: lujulia <39236354+lujulia@users.noreply.github.com> Date: Sat, 9 Jul 2022 22:35:06 +0800 Subject: [PATCH 16/59] Update coco_classes.py --- yolox/data/datasets/coco_classes.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/yolox/data/datasets/coco_classes.py b/yolox/data/datasets/coco_classes.py index 17f5cbe6e..760945eaf 100644 --- a/yolox/data/datasets/coco_classes.py +++ b/yolox/data/datasets/coco_classes.py @@ -2,6 +2,10 @@ # -*- coding:utf-8 -*- # Copyright (c) Megvii, Inc. and its affiliates. +COCO_CLASSES = ( + "pedestrian", +) +""" COCO_CLASSES = ( "person", "bicycle", @@ -84,3 +88,4 @@ "hair drier", "toothbrush", ) +""" From 2f882f36024c1170a88be2549039cd11ac552504 Mon Sep 17 00:00:00 2001 From: lujulia <39236354+lujulia@users.noreply.github.com> Date: Sat, 9 Jul 2022 22:36:17 +0800 Subject: [PATCH 17/59] Update coco.py --- yolox/data/datasets/coco.py | 153 +++++++----------------------------- 1 file changed, 27 insertions(+), 126 deletions(-) diff --git a/yolox/data/datasets/coco.py b/yolox/data/datasets/coco.py index 4fbdf8836..c3381724a 100644 --- a/yolox/data/datasets/coco.py +++ b/yolox/data/datasets/coco.py @@ -2,36 +2,16 @@ # -*- coding:utf-8 -*- # Copyright (c) Megvii, Inc. and its affiliates. -import os -from loguru import logger - import cv2 import numpy as np from pycocotools.coco import COCO +import os + from ..dataloading import get_yolox_datadir from .datasets_wrapper import Dataset -def remove_useless_info(coco): - """ - Remove useless info in coco dataset. COCO object is modified inplace. - This function is mainly used for saving memory (save about 30% mem). - """ - if isinstance(coco, COCO): - dataset = coco.dataset - dataset.pop("info", None) - dataset.pop("licenses", None) - for img in dataset["images"]: - img.pop("license", None) - img.pop("coco_url", None) - img.pop("date_captured", None) - img.pop("flickr_url", None) - if "annotations" in coco.dataset: - for anno in coco.dataset["annotations"]: - anno.pop("segmentation", None) - - class COCODataset(Dataset): """ COCO dataset class. @@ -40,11 +20,10 @@ class COCODataset(Dataset): def __init__( self, data_dir=None, - json_file="instances_train2017.json", - name="train2017", + json_file="train_annotations.json", + name="train", img_size=(416, 416), preproc=None, - cache=False, ): """ COCO dataset initialization. Annotation data are read into memory by COCO API. @@ -57,82 +36,26 @@ def __init__( """ super().__init__(img_size) if data_dir is None: - data_dir = os.path.join(get_yolox_datadir(), "COCO") + data_dir = os.path.join(get_yolox_datadir(), "pedestrian_coco") self.data_dir = data_dir self.json_file = json_file self.coco = COCO(os.path.join(self.data_dir, "annotations", self.json_file)) - remove_useless_info(self.coco) self.ids = self.coco.getImgIds() self.class_ids = sorted(self.coco.getCatIds()) - self.cats = self.coco.loadCats(self.coco.getCatIds()) - self._classes = tuple([c["name"] for c in self.cats]) - self.imgs = None + cats = self.coco.loadCats(self.coco.getCatIds()) + self._classes = tuple([c["name"] for c in cats]) + self.annotations = self._load_coco_annotations() self.name = name self.img_size = img_size self.preproc = preproc - self.annotations = self._load_coco_annotations() - if cache: - self._cache_images() def __len__(self): return len(self.ids) - def __del__(self): - del self.imgs - def _load_coco_annotations(self): return [self.load_anno_from_ids(_ids) for _ids in self.ids] - def _cache_images(self): - logger.warning( - "\n********************************************************************************\n" - "You are using cached images in RAM to accelerate training.\n" - "This requires large system RAM.\n" - "Make sure you have 200G+ RAM and 136G available disk space for training COCO.\n" - "********************************************************************************\n" - ) - max_h = self.img_size[0] - max_w = self.img_size[1] - cache_file = os.path.join(self.data_dir, f"img_resized_cache_{self.name}.array") - if not os.path.exists(cache_file): - logger.info( - "Caching images for the first time. This might take about 20 minutes for COCO" - ) - self.imgs = np.memmap( - cache_file, - shape=(len(self.ids), max_h, max_w, 3), - dtype=np.uint8, - mode="w+", - ) - from tqdm import tqdm - from multiprocessing.pool import ThreadPool - - NUM_THREADs = min(8, os.cpu_count()) - loaded_images = ThreadPool(NUM_THREADs).imap( - lambda x: self.load_resized_img(x), - range(len(self.annotations)), - ) - pbar = tqdm(enumerate(loaded_images), total=len(self.annotations)) - for k, out in pbar: - self.imgs[k][: out.shape[0], : out.shape[1], :] = out.copy() - self.imgs.flush() - pbar.close() - else: - logger.warning( - "You are using cached imgs! Make sure your dataset is not changed!!\n" - "Everytime the self.input_size is changed in your exp file, you need to delete\n" - "the cached data and re-generate them.\n" - ) - - logger.info("Loading cached imgs...") - self.imgs = np.memmap( - cache_file, - shape=(len(self.ids), max_h, max_w, 3), - dtype=np.uint8, - mode="r+", - ) - def load_anno_from_ids(self, id_): im_ann = self.coco.loadImgs(id_)[0] width = im_ann["width"] @@ -143,8 +66,8 @@ def load_anno_from_ids(self, id_): for obj in annotations: x1 = np.max((0, obj["bbox"][0])) y1 = np.max((0, obj["bbox"][1])) - x2 = np.min((width, x1 + np.max((0, obj["bbox"][2])))) - y2 = np.min((height, y1 + np.max((0, obj["bbox"][3])))) + x2 = np.min((width - 1, x1 + np.max((0, obj["bbox"][2] - 1)))) + y2 = np.min((height - 1, y1 + np.max((0, obj["bbox"][3] - 1)))) if obj["area"] > 0 and x2 >= x1 and y2 >= y1: obj["clean_bbox"] = [x1, y1, x2, y2] objs.append(obj) @@ -158,56 +81,32 @@ def load_anno_from_ids(self, id_): res[ix, 0:4] = obj["clean_bbox"] res[ix, 4] = cls - r = min(self.img_size[0] / height, self.img_size[1] / width) - res[:, :4] *= r - img_info = (height, width) - resized_info = (int(height * r), int(width * r)) - file_name = ( - im_ann["file_name"] - if "file_name" in im_ann - else "{:012}".format(id_) + ".jpg" - ) + file_name = im_ann["file_name"] if "file_name" in im_ann else "{:012}".format(id_) + ".jpg" + + del im_ann, annotations - return (res, img_info, resized_info, file_name) + return (res, img_info, file_name) def load_anno(self, index): return self.annotations[index][0] - def load_resized_img(self, index): - img = self.load_image(index) - r = min(self.img_size[0] / img.shape[0], self.img_size[1] / img.shape[1]) - resized_img = cv2.resize( - img, - (int(img.shape[1] * r), int(img.shape[0] * r)), - interpolation=cv2.INTER_LINEAR, - ).astype(np.uint8) - return resized_img - - def load_image(self, index): - file_name = self.annotations[index][3] - - img_file = os.path.join(self.data_dir, self.name, file_name) - - img = cv2.imread(img_file) - assert img is not None, f"file named {img_file} not found" - - return img - def pull_item(self, index): id_ = self.ids[index] - res, img_info, resized_info, _ = self.annotations[index] - if self.imgs is not None: - pad_img = self.imgs[index] - img = pad_img[: resized_info[0], : resized_info[1], :].copy() - else: - img = self.load_resized_img(index) + res, img_info, file_name = self.annotations[index] + # load image and preprocess + img_file = os.path.join( + self.data_dir, self.name, file_name + ) + + img = cv2.imread(img_file) + assert img is not None - return img, res.copy(), img_info, np.array([id_]) + return img, res, img_info, np.array([id_]) - @Dataset.mosaic_getitem + @Dataset.resize_getitem def __getitem__(self, index): """ One image / label pair for the given index is picked up and pre-processed. @@ -223,8 +122,10 @@ def __getitem__(self, index): class (float): class index. xc, yc (float) : center of bbox whose values range from 0 to 1. w, h (float) : size of bbox whose values range from 0 to 1. - info_img : tuple of h, w. + info_img : tuple of h, w, nh, nw, dx, dy. h, w (int): original shape of the image + nh, nw (int): shape of the resized image without padding + dx, dy (int): pad size img_id (int): same as the input index. Used for evaluation. """ img, target, img_info, img_id = self.pull_item(index) From 8087d1028c371017660e4564be67530cd7dede90 Mon Sep 17 00:00:00 2001 From: lujulia <39236354+lujulia@users.noreply.github.com> Date: Sat, 9 Jul 2022 22:36:58 +0800 Subject: [PATCH 18/59] Update voc_classes.py --- yolox/data/datasets/voc_classes.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/yolox/data/datasets/voc_classes.py b/yolox/data/datasets/voc_classes.py index 89354b3fd..438c5b78b 100644 --- a/yolox/data/datasets/voc_classes.py +++ b/yolox/data/datasets/voc_classes.py @@ -3,6 +3,11 @@ # Copyright (c) Megvii, Inc. and its affiliates. # VOC_CLASSES = ( '__background__', # always index 0 + +VOC_CLASSES = ( + "pedestrian", +) +""" VOC_CLASSES = ( "aeroplane", "bicycle", @@ -25,3 +30,4 @@ "train", "tvmonitor", ) +""" From 716a82f7dd0d8649305948c7f855b4166074a1e2 Mon Sep 17 00:00:00 2001 From: lujulia <39236354+lujulia@users.noreply.github.com> Date: Sat, 9 Jul 2022 22:37:57 +0800 Subject: [PATCH 19/59] Update voc.py --- yolox/data/datasets/voc.py | 170 +++++++++---------------------------- 1 file changed, 42 insertions(+), 128 deletions(-) diff --git a/yolox/data/datasets/voc.py b/yolox/data/datasets/voc.py index 56675a297..465664aad 100644 --- a/yolox/data/datasets/voc.py +++ b/yolox/data/datasets/voc.py @@ -6,17 +6,16 @@ # Copyright (c) Ellis Brown, Max deGroot. # Copyright (c) Megvii, Inc. and its affiliates. -import os -import os.path -import pickle -import xml.etree.ElementTree as ET -from loguru import logger - import cv2 import numpy as np from yolox.evaluators.voc_eval import voc_eval +import os +import os.path +import pickle +import xml.etree.ElementTree as ET + from .datasets_wrapper import Dataset from .voc_classes import VOC_CLASSES @@ -36,9 +35,7 @@ class AnnotationTransform(object): """ def __init__(self, class_to_ind=None, keep_difficult=True): - self.class_to_ind = class_to_ind or dict( - zip(VOC_CLASSES, range(len(VOC_CLASSES))) - ) + self.class_to_ind = class_to_ind or dict(zip(VOC_CLASSES, range(len(VOC_CLASSES)))) self.keep_difficult = keep_difficult def __call__(self, target): @@ -51,20 +48,16 @@ def __call__(self, target): """ res = np.empty((0, 5)) for obj in target.iter("object"): - difficult = obj.find("difficult") - if difficult is not None: - difficult = int(difficult.text) == 1 - else: - difficult = False + difficult = int(obj.find("difficult").text) == 1 if not self.keep_difficult and difficult: continue - name = obj.find("name").text.strip() + name = obj.find("name").text.lower().strip() bbox = obj.find("bndbox") pts = ["xmin", "ymin", "xmax", "ymax"] bndbox = [] for i, pt in enumerate(pts): - cur_pt = int(float(bbox.find(pt).text)) - 1 + cur_pt = int(bbox.find(pt).text) - 1 # scale height or width # cur_pt = cur_pt / width if i % 2 == 0 else cur_pt / height bndbox.append(cur_pt) @@ -73,11 +66,7 @@ def __call__(self, target): res = np.vstack((res, bndbox)) # [xmin, ymin, xmax, ymax, label_ind] # img_id = target.find('filename').text[:-4] - width = int(target.find("size").find("width").text) - height = int(target.find("size").find("height").text) - img_info = (height, width) - - return res, img_info + return res # [[xmin, ymin, xmax, ymax, label_ind], ... ] class VOCDetection(Dataset): @@ -102,12 +91,11 @@ class VOCDetection(Dataset): def __init__( self, data_dir, - image_sets=[("2007", "trainval"), ("2012", "trainval")], + image_sets=[('2007', 'trainval'), ('2012', 'trainval')], img_size=(416, 416), preproc=None, target_transform=AnnotationTransform(), dataset_name="VOC0712", - cache=False, ): super().__init__(img_size) self.root = data_dir @@ -120,6 +108,13 @@ def __init__( self._imgpath = os.path.join("%s", "JPEGImages", "%s.jpg") self._classes = VOC_CLASSES self.ids = list() + for name in image_sets: + rootpath = self.root + for line in open( + os.path.join(rootpath, "ImageSets", "Main", name + ".txt") + ): + self.ids.append((rootpath, line.strip())) + """ for (year, name) in image_sets: self._year = year rootpath = os.path.join(self.root, "VOC" + year) @@ -127,101 +122,18 @@ def __init__( os.path.join(rootpath, "ImageSets", "Main", name + ".txt") ): self.ids.append((rootpath, line.strip())) - - self.annotations = self._load_coco_annotations() - self.imgs = None - if cache: - self._cache_images() + """ def __len__(self): return len(self.ids) - def _load_coco_annotations(self): - return [self.load_anno_from_ids(_ids) for _ids in range(len(self.ids))] - - def _cache_images(self): - logger.warning( - "\n********************************************************************************\n" - "You are using cached images in RAM to accelerate training.\n" - "This requires large system RAM.\n" - "Make sure you have 60G+ RAM and 19G available disk space for training VOC.\n" - "********************************************************************************\n" - ) - max_h = self.img_size[0] - max_w = self.img_size[1] - cache_file = os.path.join(self.root, f"img_resized_cache_{self.name}.array") - if not os.path.exists(cache_file): - logger.info( - "Caching images for the first time. This might take about 3 minutes for VOC" - ) - self.imgs = np.memmap( - cache_file, - shape=(len(self.ids), max_h, max_w, 3), - dtype=np.uint8, - mode="w+", - ) - from tqdm import tqdm - from multiprocessing.pool import ThreadPool - - NUM_THREADs = min(8, os.cpu_count()) - loaded_images = ThreadPool(NUM_THREADs).imap( - lambda x: self.load_resized_img(x), - range(len(self.annotations)), - ) - pbar = tqdm(enumerate(loaded_images), total=len(self.annotations)) - for k, out in pbar: - self.imgs[k][: out.shape[0], : out.shape[1], :] = out.copy() - self.imgs.flush() - pbar.close() - else: - logger.warning( - "You are using cached imgs! Make sure your dataset is not changed!!\n" - "Everytime the self.input_size is changed in your exp file, you need to delete\n" - "the cached data and re-generate them.\n" - ) - - logger.info("Loading cached imgs...") - self.imgs = np.memmap( - cache_file, - shape=(len(self.ids), max_h, max_w, 3), - dtype=np.uint8, - mode="r+", - ) - - def load_anno_from_ids(self, index): - img_id = self.ids[index] - target = ET.parse(self._annopath % img_id).getroot() - - assert self.target_transform is not None - res, img_info = self.target_transform(target) - height, width = img_info - - r = min(self.img_size[0] / height, self.img_size[1] / width) - res[:, :4] *= r - resized_info = (int(height * r), int(width * r)) - - return (res, img_info, resized_info) - def load_anno(self, index): - return self.annotations[index][0] - - def load_resized_img(self, index): - img = self.load_image(index) - r = min(self.img_size[0] / img.shape[0], self.img_size[1] / img.shape[1]) - resized_img = cv2.resize( - img, - (int(img.shape[1] * r), int(img.shape[0] * r)), - interpolation=cv2.INTER_LINEAR, - ).astype(np.uint8) - - return resized_img - - def load_image(self, index): img_id = self.ids[index] - img = cv2.imread(self._imgpath % img_id, cv2.IMREAD_COLOR) - assert img is not None, f"file named {self._imgpath % img_id} not found" + target = ET.parse(self._annopath % img_id).getroot() + if self.target_transform is not None: + target = self.target_transform(target) - return img + return target def pull_item(self, index): """Returns the original image and target at an index for mixup @@ -234,17 +146,17 @@ def pull_item(self, index): Return: img, target """ - if self.imgs is not None: - target, img_info, resized_info = self.annotations[index] - pad_img = self.imgs[index] - img = pad_img[: resized_info[0], : resized_info[1], :].copy() - else: - img = self.load_resized_img(index) - target, img_info, _ = self.annotations[index] + img_id = self.ids[index] + img = cv2.imread(self._imgpath % img_id, cv2.IMREAD_COLOR) + height, width, _ = img.shape + + target = self.load_anno(index) + + img_info = (height, width) return img, target, img_info, index - @Dataset.mosaic_getitem + @Dataset.resize_getitem def __getitem__(self, index): img, target, img_info, img_id = self.pull_item(index) @@ -263,9 +175,7 @@ def evaluate_detections(self, all_boxes, output_dir=None): all_boxes[class][image] = [] or np.array of shape #dets x 5 """ self._write_voc_results_file(all_boxes) - IouTh = np.linspace( - 0.5, 0.95, int(np.round((0.95 - 0.5) / 0.05)) + 1, endpoint=True - ) + IouTh = np.linspace(0.5, 0.95, int(np.round((0.95 - 0.5) / 0.05)) + 1, endpoint=True) mAPs = [] for iou in IouTh: mAP = self._do_python_eval(output_dir, iou) @@ -279,7 +189,8 @@ def evaluate_detections(self, all_boxes, output_dir=None): def _get_voc_results_file_template(self): filename = "comp4_det_test" + "_{:s}.txt" - filedir = os.path.join(self.root, "results", "VOC" + self._year, "Main") + #filedir = os.path.join(self.root, "results", "VOC" + self._year, "Main") + filedir = os.path.join(self.root, "results") if not os.path.exists(filedir): os.makedirs(filedir) path = os.path.join(filedir, filename) @@ -311,18 +222,21 @@ def _write_voc_results_file(self, all_boxes): ) def _do_python_eval(self, output_dir="output", iou=0.5): - rootpath = os.path.join(self.root, "VOC" + self._year) - name = self.image_set[0][1] - annopath = os.path.join(rootpath, "Annotations", "{:s}.xml") + #rootpath = os.path.join(self.root, "VOC" + self._year) + rootpath = self.root + name = self.image_set[0] + annopath = os.path.join(rootpath, "Annotations", "{}.xml") imagesetfile = os.path.join(rootpath, "ImageSets", "Main", name + ".txt") cachedir = os.path.join( - self.root, "annotations_cache", "VOC" + self._year, name + #self.root, "annotations_cache", "VOC" + self._year, name + self.root, "annotations_cache" ) if not os.path.exists(cachedir): os.makedirs(cachedir) aps = [] # The PASCAL VOC metric changed in 2010 - use_07_metric = True if int(self._year) < 2010 else False + use_07_metric = True + #use_07_metric = True if int(self._year) < 2010 else False print("Eval IoU : {:.2f}".format(iou)) if output_dir is not None and not os.path.isdir(output_dir): os.mkdir(output_dir) From b274e8b84488444b3c552fcb0210afd6fe0268d1 Mon Sep 17 00:00:00 2001 From: lujulia <39236354+lujulia@users.noreply.github.com> Date: Sun, 10 Jul 2022 11:49:09 +0800 Subject: [PATCH 20/59] Update yolox_base.py --- yolox/exp/yolox_base.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/yolox/exp/yolox_base.py b/yolox/exp/yolox_base.py index 6e52e6eac..5029f42e7 100644 --- a/yolox/exp/yolox_base.py +++ b/yolox/exp/yolox_base.py @@ -24,8 +24,9 @@ def __init__(self): # factor of model width self.width = 1.00 # activation name. For example, if using "relu", then "silu" will be replaced to "relu". + """ self.act = "silu" - + """ # ---------------- dataloader config ---------------- # # set worker to 4 for shorter dataloader init time # If your training process cost many memory, reduce this value. @@ -33,19 +34,25 @@ def __init__(self): self.input_size = (640, 640) # (height, width) # Actual multiscale ranges: [640 - 5 * 32, 640 + 5 * 32]. # To disable multiscale training, set the value to 0. + """ self.multiscale_range = 5 + """ # You can uncomment this line to specify a multiscale range # self.random_size = (14, 26) # dir of dataset images, if data_dir is None, this project will use `datasets` dir + """ self.data_dir = None + """ # name of annotation file for training self.train_ann = "instances_train2017.json" # name of annotation file for evaluation self.val_ann = "instances_val2017.json" # name of annotation file for testing + """ self.test_ann = "instances_test2017.json" - + """ # --------------- transform config ----------------- # + """ # prob of applying mosaic aug self.mosaic_prob = 1.0 # prob of applying mixup aug @@ -55,6 +62,7 @@ def __init__(self): # prob of applying flip aug self.flip_prob = 0.5 # rotation angle range, for example, if set to 2, the true range is (-2, 2) + """ self.degrees = 10.0 # translate range, for example, if set to 0.1, the true range is (-0.1, 0.1) self.translate = 0.1 @@ -78,7 +86,7 @@ def __init__(self): # name of LRScheduler self.scheduler = "yoloxwarmcos" # last #epoch to close augmention like mosaic - self.no_aug_epochs = 15 + self.no_aug_epochs = 0 # apply EMA during training self.ema = True From 2891d235617e34e4f45ce3f5b0798aabf657d08e Mon Sep 17 00:00:00 2001 From: lujulia <39236354+lujulia@users.noreply.github.com> Date: Sun, 10 Jul 2022 11:53:38 +0800 Subject: [PATCH 21/59] Update visualize.py --- yolox/utils/visualize.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/yolox/utils/visualize.py b/yolox/utils/visualize.py index e714a3ee7..b31741f3d 100644 --- a/yolox/utils/visualize.py +++ b/yolox/utils/visualize.py @@ -9,6 +9,11 @@ def vis(img, boxes, scores, cls_ids, conf=0.5, class_names=None): + class_count = {} + class_AP = {} + for j in class_names: + class_count[j] = 0 + class_AP[j] = 0 for i in range(len(boxes)): box = boxes[i] @@ -37,8 +42,18 @@ def vis(img, boxes, scores, cls_ids, conf=0.5, class_names=None): txt_bk_color, -1 ) + class_count[class_names[cls_id]] = class_count[class_names[cls_id]]+1 + class_AP[class_names[cls_id]] = class_AP[class_names[cls_id]]+float('{:.1f}'.format(score * 100)) cv2.putText(img, text, (x0, y0 + txt_size[1]), font, 0.4, txt_color, thickness=1) - + line = 0 + for k in class_count: + cv2.putText(img, str(k)+": "+str(class_count[k]), (15,25+line), font, 0.8, (0, 255, 255), thickness=2) + if class_count[k] !=0: + class_AP[k]=class_AP[k]/class_count[k] + else: + class_AP[k]=0.0 + cv2.putText(img, "AP"+": "+'{:.1f}%'.format(class_AP[k]), (15,50+line), font, 0.8, (0, 255, 255), thickness=2) + line = line+50 return img From 1d8f8dfc9ad85c679b6be828d86fedf0791c05fe Mon Sep 17 00:00:00 2001 From: lujulia <39236354+lujulia@users.noreply.github.com> Date: Sun, 10 Jul 2022 12:13:04 +0800 Subject: [PATCH 22/59] Add files via upload --- datasets/pedestrian_coco/annotations/train_annotations.json | 0 datasets/pedestrian_coco/annotations/valid_annotations.json | 0 datasets/pedestrian_voc/ImageSets/Main/train.txt | 0 datasets/pedestrian_voc/ImageSets/Main/valid.txt | 0 4 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 datasets/pedestrian_coco/annotations/train_annotations.json create mode 100644 datasets/pedestrian_coco/annotations/valid_annotations.json create mode 100644 datasets/pedestrian_voc/ImageSets/Main/train.txt create mode 100644 datasets/pedestrian_voc/ImageSets/Main/valid.txt diff --git a/datasets/pedestrian_coco/annotations/train_annotations.json b/datasets/pedestrian_coco/annotations/train_annotations.json new file mode 100644 index 000000000..e69de29bb diff --git a/datasets/pedestrian_coco/annotations/valid_annotations.json b/datasets/pedestrian_coco/annotations/valid_annotations.json new file mode 100644 index 000000000..e69de29bb diff --git a/datasets/pedestrian_voc/ImageSets/Main/train.txt b/datasets/pedestrian_voc/ImageSets/Main/train.txt new file mode 100644 index 000000000..e69de29bb diff --git a/datasets/pedestrian_voc/ImageSets/Main/valid.txt b/datasets/pedestrian_voc/ImageSets/Main/valid.txt new file mode 100644 index 000000000..e69de29bb From bf8e247b350e62d0bc2cc2faf62e2b04aaec762d Mon Sep 17 00:00:00 2001 From: lujulia <39236354+lujulia@users.noreply.github.com> Date: Sun, 10 Jul 2022 12:13:37 +0800 Subject: [PATCH 23/59] Delete datasets directory --- datasets/README.md | 24 ------------------- .../annotations/train_annotations.json | 0 .../annotations/valid_annotations.json | 0 .../pedestrian_voc/ImageSets/Main/train.txt | 0 .../pedestrian_voc/ImageSets/Main/valid.txt | 0 5 files changed, 24 deletions(-) delete mode 100644 datasets/README.md delete mode 100644 datasets/pedestrian_coco/annotations/train_annotations.json delete mode 100644 datasets/pedestrian_coco/annotations/valid_annotations.json delete mode 100644 datasets/pedestrian_voc/ImageSets/Main/train.txt delete mode 100644 datasets/pedestrian_voc/ImageSets/Main/valid.txt diff --git a/datasets/README.md b/datasets/README.md deleted file mode 100644 index 76f7c310c..000000000 --- a/datasets/README.md +++ /dev/null @@ -1,24 +0,0 @@ -# Prepare datasets - -If you have a dataset directory, you could use os environment variable named `YOLOX_DATADIR`. Under this directory, YOLOX will look for datasets in the structure described below, if needed. -``` -$YOLOX_DATADIR/ - COCO/ -``` -You can set the location for builtin datasets by -```shell -export YOLOX_DATADIR=/path/to/your/datasets -``` -If `YOLOX_DATADIR` is not set, the default value of dataset directory is `./datasets` relative to your current working directory. - -## Expected dataset structure for [COCO detection](https://cocodataset.org/#download): - -``` -COCO/ - annotations/ - instances_{train,val}2017.json - {train,val}2017/ - # image files that are mentioned in the corresponding json -``` - -You can use the 2014 version of the dataset as well. diff --git a/datasets/pedestrian_coco/annotations/train_annotations.json b/datasets/pedestrian_coco/annotations/train_annotations.json deleted file mode 100644 index e69de29bb..000000000 diff --git a/datasets/pedestrian_coco/annotations/valid_annotations.json b/datasets/pedestrian_coco/annotations/valid_annotations.json deleted file mode 100644 index e69de29bb..000000000 diff --git a/datasets/pedestrian_voc/ImageSets/Main/train.txt b/datasets/pedestrian_voc/ImageSets/Main/train.txt deleted file mode 100644 index e69de29bb..000000000 diff --git a/datasets/pedestrian_voc/ImageSets/Main/valid.txt b/datasets/pedestrian_voc/ImageSets/Main/valid.txt deleted file mode 100644 index e69de29bb..000000000 From 83a313d0f240f3381c134d2dbf55a71961917999 Mon Sep 17 00:00:00 2001 From: lujulia <39236354+lujulia@users.noreply.github.com> Date: Sun, 10 Jul 2022 12:19:34 +0800 Subject: [PATCH 24/59] Add files via upload --- datasets/README.md | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 datasets/README.md diff --git a/datasets/README.md b/datasets/README.md new file mode 100644 index 000000000..76f7c310c --- /dev/null +++ b/datasets/README.md @@ -0,0 +1,24 @@ +# Prepare datasets + +If you have a dataset directory, you could use os environment variable named `YOLOX_DATADIR`. Under this directory, YOLOX will look for datasets in the structure described below, if needed. +``` +$YOLOX_DATADIR/ + COCO/ +``` +You can set the location for builtin datasets by +```shell +export YOLOX_DATADIR=/path/to/your/datasets +``` +If `YOLOX_DATADIR` is not set, the default value of dataset directory is `./datasets` relative to your current working directory. + +## Expected dataset structure for [COCO detection](https://cocodataset.org/#download): + +``` +COCO/ + annotations/ + instances_{train,val}2017.json + {train,val}2017/ + # image files that are mentioned in the corresponding json +``` + +You can use the 2014 version of the dataset as well. From 2231daa7a3a5a9b684a9c0cea9f9fedba9d8299c Mon Sep 17 00:00:00 2001 From: lujulia <39236354+lujulia@users.noreply.github.com> Date: Sun, 10 Jul 2022 12:23:24 +0800 Subject: [PATCH 25/59] Add files via upload --- datasets/pedestrian_coco/annotations/train_annotations.json | 0 datasets/pedestrian_coco/annotations/valid_annotations.json | 0 datasets/pedestrian_coco/train/README.md | 1 + datasets/pedestrian_coco/valid/README.md | 1 + datasets/pedestrian_voc/Annotations/README.md | 1 + datasets/pedestrian_voc/ImageSets/Main/train.txt | 0 datasets/pedestrian_voc/ImageSets/Main/valid.txt | 0 datasets/pedestrian_voc/JPEGImages/README.md | 1 + 8 files changed, 4 insertions(+) create mode 100644 datasets/pedestrian_coco/annotations/train_annotations.json create mode 100644 datasets/pedestrian_coco/annotations/valid_annotations.json create mode 100644 datasets/pedestrian_coco/train/README.md create mode 100644 datasets/pedestrian_coco/valid/README.md create mode 100644 datasets/pedestrian_voc/Annotations/README.md create mode 100644 datasets/pedestrian_voc/ImageSets/Main/train.txt create mode 100644 datasets/pedestrian_voc/ImageSets/Main/valid.txt create mode 100644 datasets/pedestrian_voc/JPEGImages/README.md diff --git a/datasets/pedestrian_coco/annotations/train_annotations.json b/datasets/pedestrian_coco/annotations/train_annotations.json new file mode 100644 index 000000000..e69de29bb diff --git a/datasets/pedestrian_coco/annotations/valid_annotations.json b/datasets/pedestrian_coco/annotations/valid_annotations.json new file mode 100644 index 000000000..e69de29bb diff --git a/datasets/pedestrian_coco/train/README.md b/datasets/pedestrian_coco/train/README.md new file mode 100644 index 000000000..7b9800885 --- /dev/null +++ b/datasets/pedestrian_coco/train/README.md @@ -0,0 +1 @@ +put the train images \ No newline at end of file diff --git a/datasets/pedestrian_coco/valid/README.md b/datasets/pedestrian_coco/valid/README.md new file mode 100644 index 000000000..657f11b45 --- /dev/null +++ b/datasets/pedestrian_coco/valid/README.md @@ -0,0 +1 @@ +put the valid images \ No newline at end of file diff --git a/datasets/pedestrian_voc/Annotations/README.md b/datasets/pedestrian_voc/Annotations/README.md new file mode 100644 index 000000000..2b9cfa086 --- /dev/null +++ b/datasets/pedestrian_voc/Annotations/README.md @@ -0,0 +1 @@ +put the train and valid annotations \ No newline at end of file diff --git a/datasets/pedestrian_voc/ImageSets/Main/train.txt b/datasets/pedestrian_voc/ImageSets/Main/train.txt new file mode 100644 index 000000000..e69de29bb diff --git a/datasets/pedestrian_voc/ImageSets/Main/valid.txt b/datasets/pedestrian_voc/ImageSets/Main/valid.txt new file mode 100644 index 000000000..e69de29bb diff --git a/datasets/pedestrian_voc/JPEGImages/README.md b/datasets/pedestrian_voc/JPEGImages/README.md new file mode 100644 index 000000000..62ac03c99 --- /dev/null +++ b/datasets/pedestrian_voc/JPEGImages/README.md @@ -0,0 +1 @@ +put the train and valid images \ No newline at end of file From 44be20eb94838b09c1ed27b3fb16814fe8b7e11c Mon Sep 17 00:00:00 2001 From: lujulia <39236354+lujulia@users.noreply.github.com> Date: Sun, 10 Jul 2022 12:42:49 +0800 Subject: [PATCH 26/59] Update yolox_voc_s.py --- .../voc_format/yolox_voc_s/yolox_voc_s.py | 69 +++++++++++-------- 1 file changed, 42 insertions(+), 27 deletions(-) diff --git a/exps/example/custom/voc_format/yolox_voc_s/yolox_voc_s.py b/exps/example/custom/voc_format/yolox_voc_s/yolox_voc_s.py index 5f108b42b..e5cdb6103 100644 --- a/exps/example/custom/voc_format/yolox_voc_s/yolox_voc_s.py +++ b/exps/example/custom/voc_format/yolox_voc_s/yolox_voc_s.py @@ -1,12 +1,11 @@ # encoding: utf-8 import os -import random + import torch -import torch.nn as nn import torch.distributed as dist -from yolox.exp import Exp as MyExp from yolox.data import get_yolox_datadir +from yolox.exp import Exp as MyExp class Exp(MyExp): @@ -15,9 +14,17 @@ def __init__(self): self.num_classes = 20 self.depth = 0.33 self.width = 0.50 + self.warmup_epochs = 1 + + # ---------- transform config ------------ # + self.mosaic_prob = 1.0 + self.mixup_prob = 1.0 + self.hsv_prob = 1.0 + self.flip_prob = 0.5 + self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] - def get_data_loader(self, batch_size, is_distributed, no_aug=False): + def get_data_loader(self, batch_size, is_distributed, no_aug=False, cache_img=False): from yolox.data import ( VOCDetection, TrainTransform, @@ -25,34 +32,42 @@ def get_data_loader(self, batch_size, is_distributed, no_aug=False): DataLoader, InfiniteSampler, MosaicDetection, + worker_init_reset_seed, ) - - dataset = VOCDetection( - data_dir=os.path.join(get_yolox_datadir(), "VOCdevkit"), - image_sets=[('2007', 'trainval'), ('2012', 'trainval')], - img_size=self.input_size, - preproc=TrainTransform( - rgb_means=(0.485, 0.456, 0.406), - std=(0.229, 0.224, 0.225), - max_labels=50, - ), + from yolox.utils import ( + wait_for_the_master, + get_local_rank, ) + local_rank = get_local_rank() + + with wait_for_the_master(local_rank): + dataset = VOCDetection( + data_dir=os.path.join(get_yolox_datadir(), "VOCdevkit"), + image_sets=[('2007', 'trainval'), ('2012', 'trainval')], + img_size=self.input_size, + preproc=TrainTransform( + max_labels=50, + flip_prob=self.flip_prob, + hsv_prob=self.hsv_prob), + cache=cache_img, + ) dataset = MosaicDetection( dataset, mosaic=not no_aug, img_size=self.input_size, preproc=TrainTransform( - rgb_means=(0.485, 0.456, 0.406), - std=(0.229, 0.224, 0.225), max_labels=120, - ), + flip_prob=self.flip_prob, + hsv_prob=self.hsv_prob), degrees=self.degrees, translate=self.translate, - scale=self.scale, + mosaic_scale=self.mosaic_scale, + mixup_scale=self.mixup_scale, shear=self.shear, - perspective=self.perspective, enable_mixup=self.enable_mixup, + mosaic_prob=self.mosaic_prob, + mixup_prob=self.mixup_prob, ) self.dataset = dataset @@ -68,27 +83,27 @@ def get_data_loader(self, batch_size, is_distributed, no_aug=False): sampler=sampler, batch_size=batch_size, drop_last=False, - input_dimension=self.input_size, mosaic=not no_aug, ) dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True} dataloader_kwargs["batch_sampler"] = batch_sampler + + # Make sure each process has different random seed, especially for 'fork' method + dataloader_kwargs["worker_init_fn"] = worker_init_reset_seed + train_loader = DataLoader(self.dataset, **dataloader_kwargs) return train_loader - def get_eval_loader(self, batch_size, is_distributed, testdev=False): + def get_eval_loader(self, batch_size, is_distributed, testdev=False, legacy=False): from yolox.data import VOCDetection, ValTransform valdataset = VOCDetection( data_dir=os.path.join(get_yolox_datadir(), "VOCdevkit"), image_sets=[('2007', 'test')], img_size=self.test_size, - preproc=ValTransform( - rgb_means=(0.485, 0.456, 0.406), - std=(0.229, 0.224, 0.225), - ), + preproc=ValTransform(legacy=legacy), ) if is_distributed: @@ -109,10 +124,10 @@ def get_eval_loader(self, batch_size, is_distributed, testdev=False): return val_loader - def get_evaluator(self, batch_size, is_distributed, testdev=False): + def get_evaluator(self, batch_size, is_distributed, testdev=False, legacy=False): from yolox.evaluators import VOCEvaluator - val_loader = self.get_eval_loader(batch_size, is_distributed, testdev=testdev) + val_loader = self.get_eval_loader(batch_size, is_distributed, testdev, legacy) evaluator = VOCEvaluator( dataloader=val_loader, img_size=self.test_size, From 8fd95f668f0196d459194708c58fa9e9356759b0 Mon Sep 17 00:00:00 2001 From: lujulia <39236354+lujulia@users.noreply.github.com> Date: Sun, 10 Jul 2022 13:09:57 +0800 Subject: [PATCH 27/59] Update yolox_nano.py From 1ea66fd02e8897032c85bd55126de2cb9399a9f0 Mon Sep 17 00:00:00 2001 From: lujulia <39236354+lujulia@users.noreply.github.com> Date: Sun, 10 Jul 2022 13:10:43 +0800 Subject: [PATCH 28/59] Update yolox_s.py From 6823fda0529c21294a8a007c56361e0b252421d7 Mon Sep 17 00:00:00 2001 From: lujulia <39236354+lujulia@users.noreply.github.com> Date: Sun, 10 Jul 2022 13:29:04 +0800 Subject: [PATCH 29/59] Update yolox_voc_nano.py --- .../yolox_voc_nano/yolox_voc_nano.py | 78 +++++++++++-------- 1 file changed, 47 insertions(+), 31 deletions(-) diff --git a/exps/example/custom/voc_format/yolox_voc_nano/yolox_voc_nano.py b/exps/example/custom/voc_format/yolox_voc_nano/yolox_voc_nano.py index 0bba25ffa..0410e9664 100644 --- a/exps/example/custom/voc_format/yolox_voc_nano/yolox_voc_nano.py +++ b/exps/example/custom/voc_format/yolox_voc_nano/yolox_voc_nano.py @@ -2,15 +2,13 @@ # -*- coding:utf-8 -*- # Copyright (c) Megvii, Inc. and its affiliates. -from yolox.data import get_yolox_datadir -from yolox.exp import Exp as MyExp import os -import random -import torch.nn as nn + import torch import torch.distributed as dist -import sys -sys.path.append(r'D:/YOLOX') + +from yolox.data import get_yolox_datadir +from yolox.exp import Exp as MyExp class Exp(MyExp): @@ -19,8 +17,18 @@ def __init__(self): self.num_classes = 20 self.depth = 0.33 self.width = 0.25 - self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] + self.input_size = (416, 416) + self.mosaic_scale = (0.5, 1.5) + self.random_size = (10, 20) + self.test_size = (416, 416) + self.warmup_epochs = 1 + # ---------- transform config ------------ # + self.mosaic_prob = 1.0 self.enable_mixup = False + #self.mixup_prob = 1.0 + self.hsv_prob = 1.0 + self.flip_prob = 0.5 + self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] def get_model(self, sublinear=False): @@ -41,7 +49,7 @@ def init_yolo(M): self.model.head.initialize_biases(1e-2) return self.model - def get_data_loader(self, batch_size, is_distributed, no_aug=False): + def get_data_loader(self, batch_size, is_distributed, no_aug=False, cache_img=False): from yolox.data import ( VOCDetection, TrainTransform, @@ -49,34 +57,42 @@ def get_data_loader(self, batch_size, is_distributed, no_aug=False): DataLoader, InfiniteSampler, MosaicDetection, + worker_init_reset_seed, ) - - dataset = VOCDetection( - data_dir=os.path.join(get_yolox_datadir(), "VOCdevkit"), - image_sets=[('2007', 'trainval'), ('2012', 'trainval')], - img_size=self.input_size, - preproc=TrainTransform( - rgb_means=(0.485, 0.456, 0.406), - std=(0.229, 0.224, 0.225), - max_labels=50, - ), + from yolox.utils import ( + wait_for_the_master, + get_local_rank, ) + local_rank = get_local_rank() + + with wait_for_the_master(local_rank): + dataset = VOCDetection( + data_dir=os.path.join(get_yolox_datadir(), "VOCdevkit"), + image_sets=[('2007', 'trainval'), ('2012', 'trainval')], + img_size=self.input_size, + preproc=TrainTransform( + max_labels=50, + flip_prob=self.flip_prob, + hsv_prob=self.hsv_prob), + cache=cache_img, + ) dataset = MosaicDetection( dataset, mosaic=not no_aug, img_size=self.input_size, preproc=TrainTransform( - rgb_means=(0.485, 0.456, 0.406), - std=(0.229, 0.224, 0.225), max_labels=120, - ), + flip_prob=self.flip_prob, + hsv_prob=self.hsv_prob), degrees=self.degrees, translate=self.translate, - scale=self.scale, + mosaic_scale=self.mosaic_scale, + mixup_scale=self.mixup_scale, shear=self.shear, - perspective=self.perspective, enable_mixup=self.enable_mixup, + mosaic_prob=self.mosaic_prob, + mixup_prob=self.mixup_prob, ) self.dataset = dataset @@ -92,27 +108,27 @@ def get_data_loader(self, batch_size, is_distributed, no_aug=False): sampler=sampler, batch_size=batch_size, drop_last=False, - input_dimension=self.input_size, mosaic=not no_aug, ) dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True} dataloader_kwargs["batch_sampler"] = batch_sampler + + # Make sure each process has different random seed, especially for 'fork' method + dataloader_kwargs["worker_init_fn"] = worker_init_reset_seed + train_loader = DataLoader(self.dataset, **dataloader_kwargs) return train_loader - def get_eval_loader(self, batch_size, is_distributed, testdev=False): + def get_eval_loader(self, batch_size, is_distributed, testdev=False, legacy=False): from yolox.data import VOCDetection, ValTransform valdataset = VOCDetection( data_dir=os.path.join(get_yolox_datadir(), "VOCdevkit"), image_sets=[('2007', 'test')], img_size=self.test_size, - preproc=ValTransform( - rgb_means=(0.485, 0.456, 0.406), - std=(0.229, 0.224, 0.225), - ), + preproc=ValTransform(legacy=legacy), ) if is_distributed: @@ -133,10 +149,10 @@ def get_eval_loader(self, batch_size, is_distributed, testdev=False): return val_loader - def get_evaluator(self, batch_size, is_distributed, testdev=False): + def get_evaluator(self, batch_size, is_distributed, testdev=False, legacy=False): from yolox.evaluators import VOCEvaluator - val_loader = self.get_eval_loader(batch_size, is_distributed, testdev=testdev) + val_loader = self.get_eval_loader(batch_size, is_distributed, testdev, legacy) evaluator = VOCEvaluator( dataloader=val_loader, img_size=self.test_size, From feed57ddd8090cb228b7a3b9177f97449947bc82 Mon Sep 17 00:00:00 2001 From: lujulia <39236354+lujulia@users.noreply.github.com> Date: Sun, 10 Jul 2022 13:38:39 +0800 Subject: [PATCH 30/59] Update yolox_voc_nano.py --- .../custom/voc_format/yolox_voc_nano/yolox_voc_nano.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/exps/example/custom/voc_format/yolox_voc_nano/yolox_voc_nano.py b/exps/example/custom/voc_format/yolox_voc_nano/yolox_voc_nano.py index 0410e9664..5e4fb127f 100644 --- a/exps/example/custom/voc_format/yolox_voc_nano/yolox_voc_nano.py +++ b/exps/example/custom/voc_format/yolox_voc_nano/yolox_voc_nano.py @@ -23,11 +23,11 @@ def __init__(self): self.test_size = (416, 416) self.warmup_epochs = 1 # ---------- transform config ------------ # - self.mosaic_prob = 1.0 + #self.mosaic_prob = 1.0 self.enable_mixup = False #self.mixup_prob = 1.0 - self.hsv_prob = 1.0 - self.flip_prob = 0.5 + #self.hsv_prob = 1.0 + #self.flip_prob = 0.5 self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] def get_model(self, sublinear=False): From ce9d0e163d4d9e37339b46a0e551e007afc33bca Mon Sep 17 00:00:00 2001 From: lujulia <39236354+lujulia@users.noreply.github.com> Date: Sun, 10 Jul 2022 13:40:09 +0800 Subject: [PATCH 31/59] Update yolox_voc_tiny.py --- .../yolox_voc_tiny/yolox_voc_tiny.py | 79 +++++++++++-------- 1 file changed, 48 insertions(+), 31 deletions(-) diff --git a/exps/example/custom/voc_format/yolox_voc_tiny/yolox_voc_tiny.py b/exps/example/custom/voc_format/yolox_voc_tiny/yolox_voc_tiny.py index 499b2a59a..1ffee8079 100644 --- a/exps/example/custom/voc_format/yolox_voc_tiny/yolox_voc_tiny.py +++ b/exps/example/custom/voc_format/yolox_voc_tiny/yolox_voc_tiny.py @@ -3,21 +3,31 @@ # Copyright (c) Megvii, Inc. and its affiliates. import os -import random -import torch.nn as nn + import torch import torch.distributed as dist -import sys -sys.path.append(r'D:/YOLOX') -from yolox.exp import Exp as MyExp + from yolox.data import get_yolox_datadir +from yolox.exp import Exp as MyExp class Exp(MyExp): def __init__(self): super(Exp, self).__init__() + self.num_classes = 20 self.depth = 0.33 - self.width = 0.375 + self.width = 0.75 + self.input_size = (416, 416) + self.mosaic_scale = (0.5, 1.5) + self.random_size = (10, 20) + self.test_size = (416, 416) + self.warmup_epochs = 1 + # ---------- transform config ------------ # + #self.mosaic_prob = 1.0 + self.enable_mixup = False + #self.mixup_prob = 1.0 + #self.hsv_prob = 1.0 + #self.flip_prob = 0.5 self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] def get_model(self, sublinear=False): @@ -27,7 +37,6 @@ def init_yolo(M): if isinstance(m, nn.BatchNorm2d): m.eps = 1e-3 m.momentum = 0.03 - if "model" not in self.__dict__: from yolox.models import YOLOX, YOLOPAFPN, YOLOXHead in_channels = [256, 512, 1024] @@ -40,7 +49,7 @@ def init_yolo(M): self.model.head.initialize_biases(1e-2) return self.model - def get_data_loader(self, batch_size, is_distributed, no_aug=False): + def get_data_loader(self, batch_size, is_distributed, no_aug=False, cache_img=False): from yolox.data import ( VOCDetection, TrainTransform, @@ -48,34 +57,42 @@ def get_data_loader(self, batch_size, is_distributed, no_aug=False): DataLoader, InfiniteSampler, MosaicDetection, + worker_init_reset_seed, ) - - dataset = VOCDetection( - data_dir=os.path.join(get_yolox_datadir(), "VOCdevkit"), - image_sets=[('2007', 'trainval'), ('2012', 'trainval')], - img_size=self.input_size, - preproc=TrainTransform( - rgb_means=(0.485, 0.456, 0.406), - std=(0.229, 0.224, 0.225), - max_labels=50, - ), + from yolox.utils import ( + wait_for_the_master, + get_local_rank, ) + local_rank = get_local_rank() + + with wait_for_the_master(local_rank): + dataset = VOCDetection( + data_dir=os.path.join(get_yolox_datadir(), "VOCdevkit"), + image_sets=[('2007', 'trainval'), ('2012', 'trainval')], + img_size=self.input_size, + preproc=TrainTransform( + max_labels=50, + flip_prob=self.flip_prob, + hsv_prob=self.hsv_prob), + cache=cache_img, + ) dataset = MosaicDetection( dataset, mosaic=not no_aug, img_size=self.input_size, preproc=TrainTransform( - rgb_means=(0.485, 0.456, 0.406), - std=(0.229, 0.224, 0.225), max_labels=120, - ), + flip_prob=self.flip_prob, + hsv_prob=self.hsv_prob), degrees=self.degrees, translate=self.translate, - scale=self.scale, + mosaic_scale=self.mosaic_scale, + mixup_scale=self.mixup_scale, shear=self.shear, - perspective=self.perspective, enable_mixup=self.enable_mixup, + mosaic_prob=self.mosaic_prob, + mixup_prob=self.mixup_prob, ) self.dataset = dataset @@ -91,27 +108,27 @@ def get_data_loader(self, batch_size, is_distributed, no_aug=False): sampler=sampler, batch_size=batch_size, drop_last=False, - input_dimension=self.input_size, mosaic=not no_aug, ) dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True} dataloader_kwargs["batch_sampler"] = batch_sampler + + # Make sure each process has different random seed, especially for 'fork' method + dataloader_kwargs["worker_init_fn"] = worker_init_reset_seed + train_loader = DataLoader(self.dataset, **dataloader_kwargs) return train_loader - def get_eval_loader(self, batch_size, is_distributed, testdev=False): + def get_eval_loader(self, batch_size, is_distributed, testdev=False, legacy=False): from yolox.data import VOCDetection, ValTransform valdataset = VOCDetection( data_dir=os.path.join(get_yolox_datadir(), "VOCdevkit"), image_sets=[('2007', 'test')], img_size=self.test_size, - preproc=ValTransform( - rgb_means=(0.485, 0.456, 0.406), - std=(0.229, 0.224, 0.225), - ), + preproc=ValTransform(legacy=legacy), ) if is_distributed: @@ -132,10 +149,10 @@ def get_eval_loader(self, batch_size, is_distributed, testdev=False): return val_loader - def get_evaluator(self, batch_size, is_distributed, testdev=False): + def get_evaluator(self, batch_size, is_distributed, testdev=False, legacy=False): from yolox.evaluators import VOCEvaluator - val_loader = self.get_eval_loader(batch_size, is_distributed, testdev=testdev) + val_loader = self.get_eval_loader(batch_size, is_distributed, testdev, legacy) evaluator = VOCEvaluator( dataloader=val_loader, img_size=self.test_size, From 0eaf0b88bc6edce2f094201fd147ac21f7b818dc Mon Sep 17 00:00:00 2001 From: lujulia <39236354+lujulia@users.noreply.github.com> Date: Sun, 10 Jul 2022 13:48:26 +0800 Subject: [PATCH 32/59] Update yolox_voc_tiny.py --- .../yolox_voc_tiny/yolox_voc_tiny.py | 25 ++----------------- 1 file changed, 2 insertions(+), 23 deletions(-) diff --git a/exps/example/custom/voc_format/yolox_voc_tiny/yolox_voc_tiny.py b/exps/example/custom/voc_format/yolox_voc_tiny/yolox_voc_tiny.py index 1ffee8079..d8ffbdac9 100644 --- a/exps/example/custom/voc_format/yolox_voc_tiny/yolox_voc_tiny.py +++ b/exps/example/custom/voc_format/yolox_voc_tiny/yolox_voc_tiny.py @@ -14,11 +14,9 @@ class Exp(MyExp): def __init__(self): super(Exp, self).__init__() - self.num_classes = 20 self.depth = 0.33 - self.width = 0.75 - self.input_size = (416, 416) - self.mosaic_scale = (0.5, 1.5) + self.width = 0.375 + self.scale = (0.5, 1.5) self.random_size = (10, 20) self.test_size = (416, 416) self.warmup_epochs = 1 @@ -30,25 +28,6 @@ def __init__(self): #self.flip_prob = 0.5 self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] - def get_model(self, sublinear=False): - - def init_yolo(M): - for m in M.modules(): - if isinstance(m, nn.BatchNorm2d): - m.eps = 1e-3 - m.momentum = 0.03 - if "model" not in self.__dict__: - from yolox.models import YOLOX, YOLOPAFPN, YOLOXHead - in_channels = [256, 512, 1024] - # NANO model use depthwise = True, which is main difference. - backbone = YOLOPAFPN(self.depth, self.width, in_channels=in_channels, depthwise=True) - head = YOLOXHead(self.num_classes, self.width, in_channels=in_channels, depthwise=True) - self.model = YOLOX(backbone, head) - - self.model.apply(init_yolo) - self.model.head.initialize_biases(1e-2) - return self.model - def get_data_loader(self, batch_size, is_distributed, no_aug=False, cache_img=False): from yolox.data import ( VOCDetection, From 96de204cb4a8fb9fce7fb76324f35e2ceea01d98 Mon Sep 17 00:00:00 2001 From: lujulia <39236354+lujulia@users.noreply.github.com> Date: Sun, 10 Jul 2022 14:02:00 +0800 Subject: [PATCH 33/59] Update yolox_voc_nano_adam.py --- .../yolox_voc_nano_adam.py | 83 +++++++++++-------- 1 file changed, 48 insertions(+), 35 deletions(-) diff --git a/exps/example/custom/voc_format/yolox_voc_nano_adam/yolox_voc_nano_adam.py b/exps/example/custom/voc_format/yolox_voc_nano_adam/yolox_voc_nano_adam.py index 1663c88f9..11b82f646 100644 --- a/exps/example/custom/voc_format/yolox_voc_nano_adam/yolox_voc_nano_adam.py +++ b/exps/example/custom/voc_format/yolox_voc_nano_adam/yolox_voc_nano_adam.py @@ -3,27 +3,33 @@ # Copyright (c) Megvii, Inc. and its affiliates. import os -import random -import torch.nn as nn + import torch import torch.distributed as dist -import sys -sys.path.append(r'D:/YOLOX') -from yolox.exp import Exp as MyExp + from yolox.data import get_yolox_datadir +from yolox.exp import Exp as MyExp class Exp(MyExp): def __init__(self): super(Exp, self).__init__() - self.num_classes = 1 + self.num_classes = 20 self.depth = 0.33 self.width = 0.25 - self.scale = (0.5, 1.5) + self.input_size = (416, 416) + self.mosaic_scale = (0.5, 1.5) self.random_size = (10, 20) + self.test_size = (416, 416) self.eps = 1e-8 - self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] + self.warmup_epochs = 1 + # ---------- transform config ------------ # + #self.mosaic_prob = 1.0 self.enable_mixup = False + #self.mixup_prob = 1.0 + #self.hsv_prob = 1.0 + #self.flip_prob = 0.5 + self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] def get_model(self, sublinear=False): @@ -43,7 +49,7 @@ def init_yolo(M): self.model.apply(init_yolo) self.model.head.initialize_biases(1e-2) return self.model - + def get_optimizer(self, batch_size): if "optimizer" not in self.__dict__: if self.warmup_epochs > 0: @@ -69,10 +75,9 @@ def get_optimizer(self, batch_size): ) # add pg1 with weight_decay optimizer.add_param_group({"params": pg2}) self.optimizer = optimizer - return self.optimizer - - def get_data_loader(self, batch_size, is_distributed, no_aug=False): + + def get_data_loader(self, batch_size, is_distributed, no_aug=False, cache_img=False): from yolox.data import ( VOCDetection, TrainTransform, @@ -80,34 +85,42 @@ def get_data_loader(self, batch_size, is_distributed, no_aug=False): DataLoader, InfiniteSampler, MosaicDetection, + worker_init_reset_seed, ) - - dataset = VOCDetection( - data_dir=os.path.join(get_yolox_datadir(), "VOCdevkit"), - image_sets=[('2007', 'trainval'), ('2012', 'trainval')], - img_size=self.input_size, - preproc=TrainTransform( - rgb_means=(0.485, 0.456, 0.406), - std=(0.229, 0.224, 0.225), - max_labels=50, - ), + from yolox.utils import ( + wait_for_the_master, + get_local_rank, ) + local_rank = get_local_rank() + + with wait_for_the_master(local_rank): + dataset = VOCDetection( + data_dir=os.path.join(get_yolox_datadir(), "VOCdevkit"), + image_sets=[('2007', 'trainval'), ('2012', 'trainval')], + img_size=self.input_size, + preproc=TrainTransform( + max_labels=50, + flip_prob=self.flip_prob, + hsv_prob=self.hsv_prob), + cache=cache_img, + ) dataset = MosaicDetection( dataset, mosaic=not no_aug, img_size=self.input_size, preproc=TrainTransform( - rgb_means=(0.485, 0.456, 0.406), - std=(0.229, 0.224, 0.225), max_labels=120, - ), + flip_prob=self.flip_prob, + hsv_prob=self.hsv_prob), degrees=self.degrees, translate=self.translate, - scale=self.scale, + mosaic_scale=self.mosaic_scale, + mixup_scale=self.mixup_scale, shear=self.shear, - perspective=self.perspective, enable_mixup=self.enable_mixup, + mosaic_prob=self.mosaic_prob, + mixup_prob=self.mixup_prob, ) self.dataset = dataset @@ -123,27 +136,27 @@ def get_data_loader(self, batch_size, is_distributed, no_aug=False): sampler=sampler, batch_size=batch_size, drop_last=False, - input_dimension=self.input_size, mosaic=not no_aug, ) dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True} dataloader_kwargs["batch_sampler"] = batch_sampler + + # Make sure each process has different random seed, especially for 'fork' method + dataloader_kwargs["worker_init_fn"] = worker_init_reset_seed + train_loader = DataLoader(self.dataset, **dataloader_kwargs) return train_loader - def get_eval_loader(self, batch_size, is_distributed, testdev=False): + def get_eval_loader(self, batch_size, is_distributed, testdev=False, legacy=False): from yolox.data import VOCDetection, ValTransform valdataset = VOCDetection( data_dir=os.path.join(get_yolox_datadir(), "VOCdevkit"), image_sets=[('2007', 'test')], img_size=self.test_size, - preproc=ValTransform( - rgb_means=(0.485, 0.456, 0.406), - std=(0.229, 0.224, 0.225), - ), + preproc=ValTransform(legacy=legacy), ) if is_distributed: @@ -164,10 +177,10 @@ def get_eval_loader(self, batch_size, is_distributed, testdev=False): return val_loader - def get_evaluator(self, batch_size, is_distributed, testdev=False): + def get_evaluator(self, batch_size, is_distributed, testdev=False, legacy=False): from yolox.evaluators import VOCEvaluator - val_loader = self.get_eval_loader(batch_size, is_distributed, testdev=testdev) + val_loader = self.get_eval_loader(batch_size, is_distributed, testdev, legacy) evaluator = VOCEvaluator( dataloader=val_loader, img_size=self.test_size, From 9a858cd1a50b6708af24d5cb57935caa91d126d0 Mon Sep 17 00:00:00 2001 From: lujulia <39236354+lujulia@users.noreply.github.com> Date: Sun, 10 Jul 2022 14:08:36 +0800 Subject: [PATCH 34/59] Update demo.py --- tools/demo.py | 87 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 53 insertions(+), 34 deletions(-) diff --git a/tools/demo.py b/tools/demo.py index 1e505a3aa..0e966900c 100644 --- a/tools/demo.py +++ b/tools/demo.py @@ -2,21 +2,20 @@ # -*- coding:utf-8 -*- # Copyright (c) Megvii, Inc. and its affiliates. +import argparse +import os +import time from loguru import logger import cv2 import torch -from yolox.data.data_augment import preproc -from yolox.data.datasets import COCO_CLASSES, VOC_CLASSES +from yolox.data.data_augment import ValTransform +from yolox.data.datasets import COCO_CLASSES from yolox.exp import get_exp from yolox.utils import fuse_model, get_model_info, postprocess, vis -import argparse -import os -import time - IMAGE_EXT = [".jpg", ".jpeg", ".webp", ".bmp", ".png"] @@ -44,7 +43,7 @@ def make_parser(): "--exp_file", default=None, type=str, - help="pls input your expriment description file", + help="please input your experiment description file", ) parser.add_argument("-c", "--ckpt", default=None, type=str, help="ckpt for eval") parser.add_argument( @@ -53,8 +52,8 @@ def make_parser(): type=str, help="device to run our model, can either be cpu or gpu", ) - parser.add_argument("--conf", default=None, type=float, help="test conf") - parser.add_argument("--nms", default=None, type=float, help="test nms threshold") + parser.add_argument("--conf", default=0.3, type=float, help="test conf") + parser.add_argument("--nms", default=0.3, type=float, help="test nms threshold") parser.add_argument("--tsize", default=None, type=int, help="test img size") parser.add_argument( "--fp16", @@ -63,6 +62,13 @@ def make_parser(): action="store_true", help="Adopting mix precision evaluating.", ) + parser.add_argument( + "--legacy", + dest="legacy", + default=False, + action="store_true", + help="To be compatible with older versions", + ) parser.add_argument( "--fuse", dest="fuse", @@ -96,10 +102,12 @@ def __init__( self, model, exp, - cls_names=VOC_CLASSES, + cls_names=COCO_CLASSES, trt_file=None, decoder=None, device="cpu", + fp16=False, + legacy=False, ): self.model = model self.cls_names = cls_names @@ -109,6 +117,8 @@ def __init__( self.nmsthre = exp.nmsthre self.test_size = exp.test_size self.device = device + self.fp16 = fp16 + self.preproc = ValTransform(legacy=legacy) if trt_file is not None: from torch2trt import TRTModule @@ -118,8 +128,6 @@ def __init__( x = torch.ones(1, 3, exp.test_size[0], exp.test_size[1]).cuda() self.model(x) self.model = model_trt - self.rgb_means = (0.485, 0.456, 0.406) - self.std = (0.229, 0.224, 0.225) def inference(self, img): img_info = {"id": 0} @@ -134,23 +142,25 @@ def inference(self, img): img_info["width"] = width img_info["raw_img"] = img - img, ratio = preproc(img, self.test_size, self.rgb_means, self.std) + ratio = min(self.test_size[0] / img.shape[0], self.test_size[1] / img.shape[1]) img_info["ratio"] = ratio + + img, _ = self.preproc(img, None, self.test_size) img = torch.from_numpy(img).unsqueeze(0) + img = img.float() if self.device == "gpu": img = img.cuda() + if self.fp16: + img = img.half() # to FP16 with torch.no_grad(): t0 = time.time() outputs = self.model(img) - #print(type(outputs)) # torch.Tensor - print(len(outputs)) # 1 - print(outputs.shape) # (1,8400,6) - print(outputs.tolist()) # print complete list. if self.decoder is not None: outputs = self.decoder(outputs, dtype=outputs.type()) outputs = postprocess( - outputs, self.num_classes, self.confthre, self.nmsthre + outputs, self.num_classes, self.confthre, + self.nmsthre, class_agnostic=True ) logger.info("Infer time: {:.4f}s".format(time.time() - t0)) return outputs, img_info @@ -183,7 +193,6 @@ def visual(self, output, img_info, cls_conf=0.35): vis_res = vis(img, bboxes, scores, cls, cls_conf, self.cls_names) return vis_res - def image_demo(predictor, vis_folder, path, current_time, save_result): if os.path.isdir(path): files = get_image_list(path) @@ -211,18 +220,19 @@ def imageflow_demo(predictor, vis_folder, current_time, args): width = cap.get(cv2.CAP_PROP_FRAME_WIDTH) # float height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) # float fps = cap.get(cv2.CAP_PROP_FPS) - save_folder = os.path.join( - vis_folder, time.strftime("%Y_%m_%d_%H_%M_%S", current_time) - ) - os.makedirs(save_folder, exist_ok=True) - if args.demo == "video": - save_path = os.path.join(save_folder, args.path.split("/")[-1]) - else: - save_path = os.path.join(save_folder, "camera.mp4") - logger.info(f"video save_path is {save_path}") - vid_writer = cv2.VideoWriter( - save_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (int(width), int(height)) - ) + if args.save_result: + save_folder = os.path.join( + vis_folder, time.strftime("%Y_%m_%d_%H_%M_%S", current_time) + ) + os.makedirs(save_folder, exist_ok=True) + if args.demo == "video": + save_path = os.path.join(save_folder, os.path.basename(args.path)) + else: + save_path = os.path.join(save_folder, "camera.mp4") + logger.info(f"video save_path is {save_path}") + vid_writer = cv2.VideoWriter( + save_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (int(width), int(height)) + ) while True: ret_val, frame = cap.read() if ret_val: @@ -230,7 +240,10 @@ def imageflow_demo(predictor, vis_folder, current_time, args): result_frame = predictor.visual(outputs[0], img_info, predictor.confthre) if args.save_result: vid_writer.write(result_frame) - ch = cv2.waitKey(1) + else: + cv2.namedWindow("yolox", cv2.WINDOW_NORMAL) + cv2.imshow("yolox", result_frame) + ch = cv2.waitKey(1000) if ch == 27 or ch == ord("q") or ch == ord("Q"): break else: @@ -244,6 +257,7 @@ def main(exp, args): file_name = os.path.join(exp.output_dir, args.experiment_name) os.makedirs(file_name, exist_ok=True) + vis_folder = None if args.save_result: vis_folder = os.path.join(file_name, "vis_res") os.makedirs(vis_folder, exist_ok=True) @@ -265,11 +279,13 @@ def main(exp, args): if args.device == "gpu": model.cuda() + if args.fp16: + model.half() # to FP16 model.eval() if not args.trt: if args.ckpt is None: - ckpt_file = os.path.join(file_name, "best_ckpt.pth.tar") + ckpt_file = os.path.join(file_name, "best_ckpt.pth") else: ckpt_file = args.ckpt logger.info("loading checkpoint") @@ -295,7 +311,10 @@ def main(exp, args): trt_file = None decoder = None - predictor = Predictor(model, exp, VOC_CLASSES, trt_file, decoder, args.device) + predictor = Predictor( + model, exp, COCO_CLASSES, trt_file, decoder, + args.device, args.fp16, args.legacy, + ) current_time = time.localtime() if args.demo == "image": image_demo(predictor, vis_folder, args.path, current_time, args.save_result) From 4a1acd4f687adc3e05f344f878697ae7b6e33e38 Mon Sep 17 00:00:00 2001 From: lujulia <39236354+lujulia@users.noreply.github.com> Date: Sun, 10 Jul 2022 14:20:23 +0800 Subject: [PATCH 35/59] Update coco.py --- yolox/data/datasets/coco.py | 147 ++++++++++++++++++++++++++++++------ 1 file changed, 123 insertions(+), 24 deletions(-) diff --git a/yolox/data/datasets/coco.py b/yolox/data/datasets/coco.py index c3381724a..5ead905e4 100644 --- a/yolox/data/datasets/coco.py +++ b/yolox/data/datasets/coco.py @@ -2,16 +2,36 @@ # -*- coding:utf-8 -*- # Copyright (c) Megvii, Inc. and its affiliates. +import os +from loguru import logger + import cv2 import numpy as np from pycocotools.coco import COCO -import os - from ..dataloading import get_yolox_datadir from .datasets_wrapper import Dataset +def remove_useless_info(coco): + """ + Remove useless info in coco dataset. COCO object is modified inplace. + This function is mainly used for saving memory (save about 30% mem). + """ + if isinstance(coco, COCO): + dataset = coco.dataset + dataset.pop("info", None) + dataset.pop("licenses", None) + for img in dataset["images"]: + img.pop("license", None) + img.pop("coco_url", None) + img.pop("date_captured", None) + img.pop("flickr_url", None) + if "annotations" in coco.dataset: + for anno in coco.dataset["annotations"]: + anno.pop("segmentation", None) + + class COCODataset(Dataset): """ COCO dataset class. @@ -24,6 +44,7 @@ def __init__( name="train", img_size=(416, 416), preproc=None, + cache=False, ): """ COCO dataset initialization. Annotation data are read into memory by COCO API. @@ -41,21 +62,77 @@ def __init__( self.json_file = json_file self.coco = COCO(os.path.join(self.data_dir, "annotations", self.json_file)) + remove_useless_info(self.coco) self.ids = self.coco.getImgIds() self.class_ids = sorted(self.coco.getCatIds()) - cats = self.coco.loadCats(self.coco.getCatIds()) - self._classes = tuple([c["name"] for c in cats]) - self.annotations = self._load_coco_annotations() + self.cats = self.coco.loadCats(self.coco.getCatIds()) + self._classes = tuple([c["name"] for c in self.cats]) + self.imgs = None self.name = name self.img_size = img_size self.preproc = preproc + self.annotations = self._load_coco_annotations() + if cache: + self._cache_images() def __len__(self): return len(self.ids) + def __del__(self): + del self.imgs + def _load_coco_annotations(self): return [self.load_anno_from_ids(_ids) for _ids in self.ids] + def _cache_images(self): + logger.warning( + "\n********************************************************************************\n" + "You are using cached images in RAM to accelerate training.\n" + "This requires large system RAM.\n" + "Make sure you have 200G+ RAM and 136G available disk space for training COCO.\n" + "********************************************************************************\n" + ) + max_h = self.img_size[0] + max_w = self.img_size[1] + cache_file = os.path.join(self.data_dir, f"img_resized_cache_{self.name}.array") + if not os.path.exists(cache_file): + logger.info( + "Caching images for the first time. This might take about 20 minutes for COCO" + ) + self.imgs = np.memmap( + cache_file, + shape=(len(self.ids), max_h, max_w, 3), + dtype=np.uint8, + mode="w+", + ) + from tqdm import tqdm + from multiprocessing.pool import ThreadPool + + NUM_THREADs = min(8, os.cpu_count()) + loaded_images = ThreadPool(NUM_THREADs).imap( + lambda x: self.load_resized_img(x), + range(len(self.annotations)), + ) + pbar = tqdm(enumerate(loaded_images), total=len(self.annotations)) + for k, out in pbar: + self.imgs[k][: out.shape[0], : out.shape[1], :] = out.copy() + self.imgs.flush() + pbar.close() + else: + logger.warning( + "You are using cached imgs! Make sure your dataset is not changed!!\n" + "Everytime the self.input_size is changed in your exp file, you need to delete\n" + "the cached data and re-generate them.\n" + ) + + logger.info("Loading cached imgs...") + self.imgs = np.memmap( + cache_file, + shape=(len(self.ids), max_h, max_w, 3), + dtype=np.uint8, + mode="r+", + ) + def load_anno_from_ids(self, id_): im_ann = self.coco.loadImgs(id_)[0] width = im_ann["width"] @@ -66,8 +143,8 @@ def load_anno_from_ids(self, id_): for obj in annotations: x1 = np.max((0, obj["bbox"][0])) y1 = np.max((0, obj["bbox"][1])) - x2 = np.min((width - 1, x1 + np.max((0, obj["bbox"][2] - 1)))) - y2 = np.min((height - 1, y1 + np.max((0, obj["bbox"][3] - 1)))) + x2 = np.min((width, x1 + np.max((0, obj["bbox"][2])))) + y2 = np.min((height, y1 + np.max((0, obj["bbox"][3])))) if obj["area"] > 0 and x2 >= x1 and y2 >= y1: obj["clean_bbox"] = [x1, y1, x2, y2] objs.append(obj) @@ -81,32 +158,56 @@ def load_anno_from_ids(self, id_): res[ix, 0:4] = obj["clean_bbox"] res[ix, 4] = cls - img_info = (height, width) + r = min(self.img_size[0] / height, self.img_size[1] / width) + res[:, :4] *= r - file_name = im_ann["file_name"] if "file_name" in im_ann else "{:012}".format(id_) + ".jpg" + img_info = (height, width) + resized_info = (int(height * r), int(width * r)) - del im_ann, annotations + file_name = ( + im_ann["file_name"] + if "file_name" in im_ann + else "{:012}".format(id_) + ".jpg" + ) - return (res, img_info, file_name) + return (res, img_info, resized_info, file_name) def load_anno(self, index): return self.annotations[index][0] - def pull_item(self, index): - id_ = self.ids[index] + def load_resized_img(self, index): + img = self.load_image(index) + r = min(self.img_size[0] / img.shape[0], self.img_size[1] / img.shape[1]) + resized_img = cv2.resize( + img, + (int(img.shape[1] * r), int(img.shape[0] * r)), + interpolation=cv2.INTER_LINEAR, + ).astype(np.uint8) + return resized_img - res, img_info, file_name = self.annotations[index] - # load image and preprocess - img_file = os.path.join( - self.data_dir, self.name, file_name - ) + def load_image(self, index): + file_name = self.annotations[index][3] + + img_file = os.path.join(self.data_dir, self.name, file_name) img = cv2.imread(img_file) - assert img is not None + assert img is not None, f"file named {img_file} not found" + + return img + + def pull_item(self, index): + id_ = self.ids[index] + + res, img_info, resized_info, _ = self.annotations[index] + if self.imgs is not None: + pad_img = self.imgs[index] + img = pad_img[: resized_info[0], : resized_info[1], :].copy() + else: + img = self.load_resized_img(index) - return img, res, img_info, np.array([id_]) + return img, res.copy(), img_info, np.array([id_]) - @Dataset.resize_getitem + @Dataset.mosaic_getitem def __getitem__(self, index): """ One image / label pair for the given index is picked up and pre-processed. @@ -122,10 +223,8 @@ def __getitem__(self, index): class (float): class index. xc, yc (float) : center of bbox whose values range from 0 to 1. w, h (float) : size of bbox whose values range from 0 to 1. - info_img : tuple of h, w, nh, nw, dx, dy. + info_img : tuple of h, w. h, w (int): original shape of the image - nh, nw (int): shape of the resized image without padding - dx, dy (int): pad size img_id (int): same as the input index. Used for evaluation. """ img, target, img_info, img_id = self.pull_item(index) From 843ce1bf50365998e4eb0827e17fb665c417eba8 Mon Sep 17 00:00:00 2001 From: lujulia <39236354+lujulia@users.noreply.github.com> Date: Sun, 10 Jul 2022 14:27:15 +0800 Subject: [PATCH 36/59] Update voc.py From 7643143fa00d8e6900200b9bf71f78f2d69b1cc8 Mon Sep 17 00:00:00 2001 From: lujulia <39236354+lujulia@users.noreply.github.com> Date: Sun, 10 Jul 2022 14:33:18 +0800 Subject: [PATCH 37/59] Update voc.py --- yolox/data/datasets/voc.py | 153 ++++++++++++++++++++++++++++++------- 1 file changed, 125 insertions(+), 28 deletions(-) diff --git a/yolox/data/datasets/voc.py b/yolox/data/datasets/voc.py index 465664aad..09e9833de 100644 --- a/yolox/data/datasets/voc.py +++ b/yolox/data/datasets/voc.py @@ -6,15 +6,16 @@ # Copyright (c) Ellis Brown, Max deGroot. # Copyright (c) Megvii, Inc. and its affiliates. -import cv2 -import numpy as np - -from yolox.evaluators.voc_eval import voc_eval - import os import os.path import pickle import xml.etree.ElementTree as ET +from loguru import logger + +import cv2 +import numpy as np + +from yolox.evaluators.voc_eval import voc_eval from .datasets_wrapper import Dataset from .voc_classes import VOC_CLASSES @@ -35,7 +36,9 @@ class AnnotationTransform(object): """ def __init__(self, class_to_ind=None, keep_difficult=True): - self.class_to_ind = class_to_ind or dict(zip(VOC_CLASSES, range(len(VOC_CLASSES)))) + self.class_to_ind = class_to_ind or dict( + zip(VOC_CLASSES, range(len(VOC_CLASSES))) + ) self.keep_difficult = keep_difficult def __call__(self, target): @@ -48,16 +51,20 @@ def __call__(self, target): """ res = np.empty((0, 5)) for obj in target.iter("object"): - difficult = int(obj.find("difficult").text) == 1 + difficult = obj.find("difficult") + if difficult is not None: + difficult = int(difficult.text) == 1 + else: + difficult = False if not self.keep_difficult and difficult: continue - name = obj.find("name").text.lower().strip() + name = obj.find("name").text.strip() bbox = obj.find("bndbox") pts = ["xmin", "ymin", "xmax", "ymax"] bndbox = [] for i, pt in enumerate(pts): - cur_pt = int(bbox.find(pt).text) - 1 + cur_pt = int(float(bbox.find(pt).text)) - 1 # scale height or width # cur_pt = cur_pt / width if i % 2 == 0 else cur_pt / height bndbox.append(cur_pt) @@ -66,7 +73,11 @@ def __call__(self, target): res = np.vstack((res, bndbox)) # [xmin, ymin, xmax, ymax, label_ind] # img_id = target.find('filename').text[:-4] - return res # [[xmin, ymin, xmax, ymax, label_ind], ... ] + width = int(target.find("size").find("width").text) + height = int(target.find("size").find("height").text) + img_info = (height, width) + + return res, img_info class VOCDetection(Dataset): @@ -91,11 +102,12 @@ class VOCDetection(Dataset): def __init__( self, data_dir, - image_sets=[('2007', 'trainval'), ('2012', 'trainval')], + image_sets=[("2007", "trainval"), ("2012", "trainval")], img_size=(416, 416), preproc=None, target_transform=AnnotationTransform(), dataset_name="VOC0712", + cache=False, ): super().__init__(img_size) self.root = data_dir @@ -123,17 +135,100 @@ def __init__( ): self.ids.append((rootpath, line.strip())) """ + self.annotations = self._load_coco_annotations() + self.imgs = None + if cache: + self._cache_images() def __len__(self): return len(self.ids) - def load_anno(self, index): + def _load_coco_annotations(self): + return [self.load_anno_from_ids(_ids) for _ids in range(len(self.ids))] + + def _cache_images(self): + logger.warning( + "\n********************************************************************************\n" + "You are using cached images in RAM to accelerate training.\n" + "This requires large system RAM.\n" + "Make sure you have 60G+ RAM and 19G available disk space for training VOC.\n" + "********************************************************************************\n" + ) + max_h = self.img_size[0] + max_w = self.img_size[1] + cache_file = os.path.join(self.root, f"img_resized_cache_{self.name}.array") + if not os.path.exists(cache_file): + logger.info( + "Caching images for the first time. This might take about 3 minutes for VOC" + ) + self.imgs = np.memmap( + cache_file, + shape=(len(self.ids), max_h, max_w, 3), + dtype=np.uint8, + mode="w+", + ) + from tqdm import tqdm + from multiprocessing.pool import ThreadPool + + NUM_THREADs = min(8, os.cpu_count()) + loaded_images = ThreadPool(NUM_THREADs).imap( + lambda x: self.load_resized_img(x), + range(len(self.annotations)), + ) + pbar = tqdm(enumerate(loaded_images), total=len(self.annotations)) + for k, out in pbar: + self.imgs[k][: out.shape[0], : out.shape[1], :] = out.copy() + self.imgs.flush() + pbar.close() + else: + logger.warning( + "You are using cached imgs! Make sure your dataset is not changed!!\n" + "Everytime the self.input_size is changed in your exp file, you need to delete\n" + "the cached data and re-generate them.\n" + ) + + logger.info("Loading cached imgs...") + self.imgs = np.memmap( + cache_file, + shape=(len(self.ids), max_h, max_w, 3), + dtype=np.uint8, + mode="r+", + ) + + def load_anno_from_ids(self, index): img_id = self.ids[index] target = ET.parse(self._annopath % img_id).getroot() - if self.target_transform is not None: - target = self.target_transform(target) - return target + assert self.target_transform is not None + res, img_info = self.target_transform(target) + height, width = img_info + + r = min(self.img_size[0] / height, self.img_size[1] / width) + res[:, :4] *= r + resized_info = (int(height * r), int(width * r)) + + return (res, img_info, resized_info) + + def load_anno(self, index): + return self.annotations[index][0] + + def load_resized_img(self, index): + img = self.load_image(index) + r = min(self.img_size[0] / img.shape[0], self.img_size[1] / img.shape[1]) + resized_img = cv2.resize( + img, + (int(img.shape[1] * r), int(img.shape[0] * r)), + interpolation=cv2.INTER_LINEAR, + ).astype(np.uint8) + + return resized_img + + def load_image(self, index): + img_id = self.ids[index] + img = cv2.imread(self._imgpath % img_id, cv2.IMREAD_COLOR) + assert img is not None, f"file named {self._imgpath % img_id} not found" + + return img def pull_item(self, index): """Returns the original image and target at an index for mixup @@ -146,17 +241,17 @@ def pull_item(self, index): Return: img, target """ - img_id = self.ids[index] - img = cv2.imread(self._imgpath % img_id, cv2.IMREAD_COLOR) - height, width, _ = img.shape - - target = self.load_anno(index) - - img_info = (height, width) + if self.imgs is not None: + target, img_info, resized_info = self.annotations[index] + pad_img = self.imgs[index] + img = pad_img[: resized_info[0], : resized_info[1], :].copy() + else: + img = self.load_resized_img(index) + target, img_info, _ = self.annotations[index] return img, target, img_info, index - @Dataset.resize_getitem + @Dataset.mosaic_getitem def __getitem__(self, index): img, target, img_info, img_id = self.pull_item(index) @@ -175,7 +270,9 @@ def evaluate_detections(self, all_boxes, output_dir=None): all_boxes[class][image] = [] or np.array of shape #dets x 5 """ self._write_voc_results_file(all_boxes) - IouTh = np.linspace(0.5, 0.95, int(np.round((0.95 - 0.5) / 0.05)) + 1, endpoint=True) + IouTh = np.linspace( + 0.5, 0.95, int(np.round((0.95 - 0.5) / 0.05)) + 1, endpoint=True + ) mAPs = [] for iou in IouTh: mAP = self._do_python_eval(output_dir, iou) @@ -191,6 +288,7 @@ def _get_voc_results_file_template(self): filename = "comp4_det_test" + "_{:s}.txt" #filedir = os.path.join(self.root, "results", "VOC" + self._year, "Main") filedir = os.path.join(self.root, "results") + #filedir = os.path.join(self.root, "results", "VOC" + self._year, "Main") if not os.path.exists(filedir): os.makedirs(filedir) path = os.path.join(filedir, filename) @@ -224,8 +322,8 @@ def _write_voc_results_file(self, all_boxes): def _do_python_eval(self, output_dir="output", iou=0.5): #rootpath = os.path.join(self.root, "VOC" + self._year) rootpath = self.root - name = self.image_set[0] - annopath = os.path.join(rootpath, "Annotations", "{}.xml") + name = self.image_set[0][1] + annopath = os.path.join(rootpath, "Annotations", "{:s}.xml") imagesetfile = os.path.join(rootpath, "ImageSets", "Main", name + ".txt") cachedir = os.path.join( #self.root, "annotations_cache", "VOC" + self._year, name @@ -235,8 +333,7 @@ def _do_python_eval(self, output_dir="output", iou=0.5): os.makedirs(cachedir) aps = [] # The PASCAL VOC metric changed in 2010 - use_07_metric = True - #use_07_metric = True if int(self._year) < 2010 else False + use_07_metric = True if int(self._year) < 2010 else False print("Eval IoU : {:.2f}".format(iou)) if output_dir is not None and not os.path.isdir(output_dir): os.mkdir(output_dir) From 82e70983f58b3850b65eb6892a21d9ab1570f821 Mon Sep 17 00:00:00 2001 From: lujulia <39236354+lujulia@users.noreply.github.com> Date: Sun, 10 Jul 2022 14:34:42 +0800 Subject: [PATCH 38/59] Update visualize.py --- yolox/utils/visualize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yolox/utils/visualize.py b/yolox/utils/visualize.py index b31741f3d..4be7d616c 100644 --- a/yolox/utils/visualize.py +++ b/yolox/utils/visualize.py @@ -27,7 +27,7 @@ def vis(img, boxes, scores, cls_ids, conf=0.5, class_names=None): y1 = int(box[3]) color = (_COLORS[cls_id] * 255).astype(np.uint8).tolist() - text = '{}:{:.1f}%'.format(class_names[cls_id], score * 100) + text = '{:.1f}%'.format(score * 100)#'{}:{:.1f}%'.format(class_names[cls_id], score * 100) txt_color = (0, 0, 0) if np.mean(_COLORS[cls_id]) > 0.5 else (255, 255, 255) font = cv2.FONT_HERSHEY_SIMPLEX From ea7a361350586dcd23d8800b5d22200b497e39c4 Mon Sep 17 00:00:00 2001 From: lujulia <39236354+lujulia@users.noreply.github.com> Date: Sun, 10 Jul 2022 14:37:23 +0800 Subject: [PATCH 39/59] Update yolox_base.py --- yolox/exp/yolox_base.py | 191 ++++++++++++---------------------------- 1 file changed, 55 insertions(+), 136 deletions(-) diff --git a/yolox/exp/yolox_base.py b/yolox/exp/yolox_base.py index 5029f42e7..31308a077 100644 --- a/yolox/exp/yolox_base.py +++ b/yolox/exp/yolox_base.py @@ -1,14 +1,14 @@ #!/usr/bin/env python3 # -*- coding:utf-8 -*- -# Copyright (c) Megvii Inc. All rights reserved. - -import os -import random +# Copyright (c) 2014-2021 Megvii Inc. All rights reserved. import torch import torch.distributed as dist import torch.nn as nn +import os +import random + from .base_exp import BaseExp @@ -17,106 +17,50 @@ def __init__(self): super().__init__() # ---------------- model config ---------------- # - # detect classes number of model - self.num_classes = 80 - # factor of model depth + self.num_classes = 1 self.depth = 1.00 - # factor of model width self.width = 1.00 - # activation name. For example, if using "relu", then "silu" will be replaced to "relu". - """ - self.act = "silu" - """ + # ---------------- dataloader config ---------------- # # set worker to 4 for shorter dataloader init time - # If your training process cost many memory, reduce this value. self.data_num_workers = 4 - self.input_size = (640, 640) # (height, width) - # Actual multiscale ranges: [640 - 5 * 32, 640 + 5 * 32]. - # To disable multiscale training, set the value to 0. - """ - self.multiscale_range = 5 - """ - # You can uncomment this line to specify a multiscale range - # self.random_size = (14, 26) - # dir of dataset images, if data_dir is None, this project will use `datasets` dir - """ - self.data_dir = None - """ - # name of annotation file for training + self.input_size = (640, 640) + self.random_size = (14, 26) self.train_ann = "instances_train2017.json" - # name of annotation file for evaluation self.val_ann = "instances_val2017.json" - # name of annotation file for testing - """ - self.test_ann = "instances_test2017.json" - """ + # --------------- transform config ----------------- # - """ - # prob of applying mosaic aug - self.mosaic_prob = 1.0 - # prob of applying mixup aug - self.mixup_prob = 1.0 - # prob of applying hsv aug - self.hsv_prob = 1.0 - # prob of applying flip aug - self.flip_prob = 0.5 - # rotation angle range, for example, if set to 2, the true range is (-2, 2) - """ self.degrees = 10.0 - # translate range, for example, if set to 0.1, the true range is (-0.1, 0.1) self.translate = 0.1 - self.mosaic_scale = (0.1, 2) - # apply mixup aug or not - self.enable_mixup = True - self.mixup_scale = (0.5, 1.5) - # shear angle range, for example, if set to 2, the true range is (-2, 2) + self.scale = (0.1, 2) + self.mscale = (0.8, 1.6) self.shear = 2.0 + self.perspective = 0.0 + self.enable_mixup = True - # -------------- training config --------------------- # - # epoch number used for warmup + # -------------- training config --------------------- #s self.warmup_epochs = 5 - # max training epoch - self.max_epoch = 300 - # minimum learning rate during warmup + self.max_epoch = 500 self.warmup_lr = 0 - self.min_lr_ratio = 0.05 - # learning rate for one image. During training, lr will multiply batchsize. self.basic_lr_per_img = 0.01 / 64.0 - # name of LRScheduler self.scheduler = "yoloxwarmcos" - # last #epoch to close augmention like mosaic - self.no_aug_epochs = 0 - # apply EMA during training + self.no_aug_epochs = 0#15 + self.min_lr_ratio = 0.05 self.ema = True - # weight decay of optimizer self.weight_decay = 5e-4 - # momentum of optimizer self.momentum = 0.9 - # log period in iter, for example, - # if set to 1, user could see log every iteration. self.print_interval = 10 - # eval period in epoch, for example, - # if set to 1, model will be evaluate after every epoch. self.eval_interval = 10 - # save history checkpoint or not. - # If set to False, yolox will only save latest and best ckpt. - self.save_history_ckpt = True - # name of experiment self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] # ----------------- testing config ------------------ # - # output image size during evaluation/test self.test_size = (640, 640) - # confidence threshold during evaluation/test, - # boxes whose scores are less than test_conf will be filtered self.test_conf = 0.01 - # nms threshold self.nmsthre = 0.65 def get_model(self): - from yolox.models import YOLOX, YOLOPAFPN, YOLOXHead + from yolox.models import YOLOPAFPN, YOLOX, YOLOXHead def init_yolo(M): for m in M.modules(): @@ -126,55 +70,50 @@ def init_yolo(M): if getattr(self, "model", None) is None: in_channels = [256, 512, 1024] - backbone = YOLOPAFPN(self.depth, self.width, in_channels=in_channels, act=self.act) - head = YOLOXHead(self.num_classes, self.width, in_channels=in_channels, act=self.act) + backbone = YOLOPAFPN(self.depth, self.width, in_channels=in_channels) + head = YOLOXHead(self.num_classes, self.width, in_channels=in_channels) self.model = YOLOX(backbone, head) self.model.apply(init_yolo) self.model.head.initialize_biases(1e-2) - self.model.train() return self.model - def get_data_loader(self, batch_size, is_distributed, no_aug=False, cache_img=False): + def get_data_loader(self, batch_size, is_distributed, no_aug=False): from yolox.data import ( COCODataset, - TrainTransform, - YoloBatchSampler, DataLoader, InfiniteSampler, MosaicDetection, - worker_init_reset_seed, + TrainTransform, + YoloBatchSampler + ) + + dataset = COCODataset( + data_dir=None, + json_file=self.train_ann, + img_size=self.input_size, + preproc=TrainTransform( + rgb_means=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225), + max_labels=50, + ), ) - from yolox.utils import wait_for_the_master - - with wait_for_the_master(): - dataset = COCODataset( - data_dir=self.data_dir, - json_file=self.train_ann, - img_size=self.input_size, - preproc=TrainTransform( - max_labels=50, - flip_prob=self.flip_prob, - hsv_prob=self.hsv_prob), - cache=cache_img, - ) dataset = MosaicDetection( dataset, mosaic=not no_aug, img_size=self.input_size, preproc=TrainTransform( + rgb_means=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225), max_labels=120, - flip_prob=self.flip_prob, - hsv_prob=self.hsv_prob), + ), degrees=self.degrees, translate=self.translate, - mosaic_scale=self.mosaic_scale, - mixup_scale=self.mixup_scale, + scale=self.scale, shear=self.shear, + perspective=self.perspective, enable_mixup=self.enable_mixup, - mosaic_prob=self.mosaic_prob, - mixup_prob=self.mixup_prob, ) self.dataset = dataset @@ -188,16 +127,12 @@ def get_data_loader(self, batch_size, is_distributed, no_aug=False, cache_img=Fa sampler=sampler, batch_size=batch_size, drop_last=False, + input_dimension=self.input_size, mosaic=not no_aug, ) dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True} dataloader_kwargs["batch_sampler"] = batch_sampler - - # Make sure each process has different random seed, especially for 'fork' method. - # Check https://github.com/pytorch/pytorch/issues/63311 for more details. - dataloader_kwargs["worker_init_fn"] = worker_init_reset_seed - train_loader = DataLoader(self.dataset, **dataloader_kwargs) return train_loader @@ -207,10 +142,6 @@ def random_resize(self, data_loader, epoch, rank, is_distributed): if rank == 0: size_factor = self.input_size[1] * 1.0 / self.input_size[0] - if not hasattr(self, 'random_size'): - min_size = int(self.input_size[0] / 32) - self.multiscale_range - max_size = int(self.input_size[0] / 32) + self.multiscale_range - self.random_size = (min_size, max_size) size = random.randint(*self.random_size) size = (int(32 * size), 32 * int(size * size_factor)) tensor[0] = size[0] @@ -220,20 +151,11 @@ def random_resize(self, data_loader, epoch, rank, is_distributed): dist.barrier() dist.broadcast(tensor, 0) - input_size = (tensor[0].item(), tensor[1].item()) + input_size = data_loader.change_input_dim( + multiple=(tensor[0].item(), tensor[1].item()), random_range=None + ) return input_size - def preprocess(self, inputs, targets, tsize): - scale_y = tsize[0] / self.input_size[0] - scale_x = tsize[1] / self.input_size[1] - if scale_x != 1 or scale_y != 1: - inputs = nn.functional.interpolate( - inputs, size=tsize, mode="bilinear", align_corners=False - ) - targets[..., 1::2] = targets[..., 1::2] * scale_x - targets[..., 2::2] = targets[..., 2::2] * scale_y - return inputs, targets - def get_optimizer(self, batch_size): if "optimizer" not in self.__dict__: if self.warmup_epochs > 0: @@ -277,15 +199,18 @@ def get_lr_scheduler(self, lr, iters_per_epoch): ) return scheduler - def get_eval_loader(self, batch_size, is_distributed, testdev=False, legacy=False): + def get_eval_loader(self, batch_size, is_distributed, testdev=False): from yolox.data import COCODataset, ValTransform valdataset = COCODataset( - data_dir=self.data_dir, - json_file=self.val_ann if not testdev else self.test_ann, - name="val2017" if not testdev else "test2017", + data_dir=None, + json_file=self.val_ann if not testdev else "image_info_test-dev2017.json", + #name="val2017" if not testdev else "test2017", + name="valid" if not testdev else "test", img_size=self.test_size, - preproc=ValTransform(legacy=legacy), + preproc=ValTransform( + rgb_means=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225) + ), ) if is_distributed: @@ -306,10 +231,10 @@ def get_eval_loader(self, batch_size, is_distributed, testdev=False, legacy=Fals return val_loader - def get_evaluator(self, batch_size, is_distributed, testdev=False, legacy=False): + def get_evaluator(self, batch_size, is_distributed, testdev=False): from yolox.evaluators import COCOEvaluator - val_loader = self.get_eval_loader(batch_size, is_distributed, testdev, legacy) + val_loader = self.get_eval_loader(batch_size, is_distributed, testdev=testdev) evaluator = COCOEvaluator( dataloader=val_loader, img_size=self.test_size, @@ -320,11 +245,5 @@ def get_evaluator(self, batch_size, is_distributed, testdev=False, legacy=False) ) return evaluator - def get_trainer(self, args): - from yolox.core import Trainer - trainer = Trainer(self, args) - # NOTE: trainer shouldn't be an attribute of exp object - return trainer - - def eval(self, model, evaluator, is_distributed, half=False, return_outputs=False): - return evaluator.evaluate(model, is_distributed, half, return_outputs=return_outputs) + def eval(self, model, evaluator, is_distributed, half=False): + return evaluator.evaluate(model, is_distributed, half) From fdc26eb6f8a01af264e8ac8266d9e17f18233f93 Mon Sep 17 00:00:00 2001 From: lujulia <39236354+lujulia@users.noreply.github.com> Date: Sun, 10 Jul 2022 14:41:59 +0800 Subject: [PATCH 40/59] Update yolox_base.py --- yolox/exp/yolox_base.py | 176 ++++++++++++++++++++++++++++------------ 1 file changed, 125 insertions(+), 51 deletions(-) diff --git a/yolox/exp/yolox_base.py b/yolox/exp/yolox_base.py index 31308a077..c96e195d0 100644 --- a/yolox/exp/yolox_base.py +++ b/yolox/exp/yolox_base.py @@ -1,14 +1,14 @@ #!/usr/bin/env python3 # -*- coding:utf-8 -*- -# Copyright (c) 2014-2021 Megvii Inc. All rights reserved. +# Copyright (c) Megvii Inc. All rights reserved. + +import os +import random import torch import torch.distributed as dist import torch.nn as nn -import os -import random - from .base_exp import BaseExp @@ -17,50 +17,98 @@ def __init__(self): super().__init__() # ---------------- model config ---------------- # - self.num_classes = 1 + # detect classes number of model + self.num_classes = 80 + # factor of model depth self.depth = 1.00 + # factor of model width self.width = 1.00 + # activation name. For example, if using "relu", then "silu" will be replaced to "relu". + self.act = "silu" # ---------------- dataloader config ---------------- # # set worker to 4 for shorter dataloader init time + # If your training process cost many memory, reduce this value. self.data_num_workers = 4 - self.input_size = (640, 640) - self.random_size = (14, 26) + self.input_size = (640, 640) # (height, width) + # Actual multiscale ranges: [640 - 5 * 32, 640 + 5 * 32]. + # To disable multiscale training, set the value to 0. + self.multiscale_range = 5 + # You can uncomment this line to specify a multiscale range + # self.random_size = (14, 26) + # dir of dataset images, if data_dir is None, this project will use `datasets` dir + self.data_dir = None + # name of annotation file for training self.train_ann = "instances_train2017.json" + # name of annotation file for evaluation self.val_ann = "instances_val2017.json" + # name of annotation file for testing + self.test_ann = "instances_test2017.json" # --------------- transform config ----------------- # + # prob of applying mosaic aug + self.mosaic_prob = 1.0 + # prob of applying mixup aug + self.mixup_prob = 1.0 + # prob of applying hsv aug + self.hsv_prob = 1.0 + # prob of applying flip aug + self.flip_prob = 0.5 + # rotation angle range, for example, if set to 2, the true range is (-2, 2) self.degrees = 10.0 + # translate range, for example, if set to 0.1, the true range is (-0.1, 0.1) self.translate = 0.1 - self.scale = (0.1, 2) - self.mscale = (0.8, 1.6) - self.shear = 2.0 - self.perspective = 0.0 + self.mosaic_scale = (0.1, 2) + # apply mixup aug or not self.enable_mixup = True + self.mixup_scale = (0.5, 1.5) + # shear angle range, for example, if set to 2, the true range is (-2, 2) + self.shear = 2.0 - # -------------- training config --------------------- #s + # -------------- training config --------------------- # + # epoch number used for warmup self.warmup_epochs = 5 - self.max_epoch = 500 + # max training epoch + self.max_epoch = 300 + # minimum learning rate during warmup self.warmup_lr = 0 + self.min_lr_ratio = 0.05 + # learning rate for one image. During training, lr will multiply batchsize. self.basic_lr_per_img = 0.01 / 64.0 + # name of LRScheduler self.scheduler = "yoloxwarmcos" - self.no_aug_epochs = 0#15 - self.min_lr_ratio = 0.05 + # last #epoch to close augmention like mosaic + self.no_aug_epochs = 15 + # apply EMA during training self.ema = True + # weight decay of optimizer self.weight_decay = 5e-4 + # momentum of optimizer self.momentum = 0.9 + # log period in iter, for example, + # if set to 1, user could see log every iteration. self.print_interval = 10 + # eval period in epoch, for example, + # if set to 1, model will be evaluate after every epoch. self.eval_interval = 10 + # save history checkpoint or not. + # If set to False, yolox will only save latest and best ckpt. + self.save_history_ckpt = True + # name of experiment self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] # ----------------- testing config ------------------ # + # output image size during evaluation/test self.test_size = (640, 640) + # confidence threshold during evaluation/test, + # boxes whose scores are less than test_conf will be filtered self.test_conf = 0.01 + # nms threshold self.nmsthre = 0.65 def get_model(self): - from yolox.models import YOLOPAFPN, YOLOX, YOLOXHead + from yolox.models import YOLOX, YOLOPAFPN, YOLOXHead def init_yolo(M): for m in M.modules(): @@ -70,50 +118,55 @@ def init_yolo(M): if getattr(self, "model", None) is None: in_channels = [256, 512, 1024] - backbone = YOLOPAFPN(self.depth, self.width, in_channels=in_channels) - head = YOLOXHead(self.num_classes, self.width, in_channels=in_channels) + backbone = YOLOPAFPN(self.depth, self.width, in_channels=in_channels, act=self.act) + head = YOLOXHead(self.num_classes, self.width, in_channels=in_channels, act=self.act) self.model = YOLOX(backbone, head) self.model.apply(init_yolo) self.model.head.initialize_biases(1e-2) + self.model.train() return self.model - def get_data_loader(self, batch_size, is_distributed, no_aug=False): + def get_data_loader(self, batch_size, is_distributed, no_aug=False, cache_img=False): from yolox.data import ( COCODataset, + TrainTransform, + YoloBatchSampler, DataLoader, InfiniteSampler, MosaicDetection, - TrainTransform, - YoloBatchSampler - ) - - dataset = COCODataset( - data_dir=None, - json_file=self.train_ann, - img_size=self.input_size, - preproc=TrainTransform( - rgb_means=(0.485, 0.456, 0.406), - std=(0.229, 0.224, 0.225), - max_labels=50, - ), + worker_init_reset_seed, ) + from yolox.utils import wait_for_the_master + + with wait_for_the_master(): + dataset = COCODataset( + data_dir=self.data_dir, + json_file=self.train_ann, + img_size=self.input_size, + preproc=TrainTransform( + max_labels=50, + flip_prob=self.flip_prob, + hsv_prob=self.hsv_prob), + cache=cache_img, + ) dataset = MosaicDetection( dataset, mosaic=not no_aug, img_size=self.input_size, preproc=TrainTransform( - rgb_means=(0.485, 0.456, 0.406), - std=(0.229, 0.224, 0.225), max_labels=120, - ), + flip_prob=self.flip_prob, + hsv_prob=self.hsv_prob), degrees=self.degrees, translate=self.translate, - scale=self.scale, + mosaic_scale=self.mosaic_scale, + mixup_scale=self.mixup_scale, shear=self.shear, - perspective=self.perspective, enable_mixup=self.enable_mixup, + mosaic_prob=self.mosaic_prob, + mixup_prob=self.mixup_prob, ) self.dataset = dataset @@ -127,12 +180,16 @@ def get_data_loader(self, batch_size, is_distributed, no_aug=False): sampler=sampler, batch_size=batch_size, drop_last=False, - input_dimension=self.input_size, mosaic=not no_aug, ) dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True} dataloader_kwargs["batch_sampler"] = batch_sampler + + # Make sure each process has different random seed, especially for 'fork' method. + # Check https://github.com/pytorch/pytorch/issues/63311 for more details. + dataloader_kwargs["worker_init_fn"] = worker_init_reset_seed + train_loader = DataLoader(self.dataset, **dataloader_kwargs) return train_loader @@ -142,6 +199,10 @@ def random_resize(self, data_loader, epoch, rank, is_distributed): if rank == 0: size_factor = self.input_size[1] * 1.0 / self.input_size[0] + if not hasattr(self, 'random_size'): + min_size = int(self.input_size[0] / 32) - self.multiscale_range + max_size = int(self.input_size[0] / 32) + self.multiscale_range + self.random_size = (min_size, max_size) size = random.randint(*self.random_size) size = (int(32 * size), 32 * int(size * size_factor)) tensor[0] = size[0] @@ -151,11 +212,20 @@ def random_resize(self, data_loader, epoch, rank, is_distributed): dist.barrier() dist.broadcast(tensor, 0) - input_size = data_loader.change_input_dim( - multiple=(tensor[0].item(), tensor[1].item()), random_range=None - ) + input_size = (tensor[0].item(), tensor[1].item()) return input_size + def preprocess(self, inputs, targets, tsize): + scale_y = tsize[0] / self.input_size[0] + scale_x = tsize[1] / self.input_size[1] + if scale_x != 1 or scale_y != 1: + inputs = nn.functional.interpolate( + inputs, size=tsize, mode="bilinear", align_corners=False + ) + targets[..., 1::2] = targets[..., 1::2] * scale_x + targets[..., 2::2] = targets[..., 2::2] * scale_y + return inputs, targets + def get_optimizer(self, batch_size): if "optimizer" not in self.__dict__: if self.warmup_epochs > 0: @@ -199,18 +269,16 @@ def get_lr_scheduler(self, lr, iters_per_epoch): ) return scheduler - def get_eval_loader(self, batch_size, is_distributed, testdev=False): + def get_eval_loader(self, batch_size, is_distributed, testdev=False, legacy=False): from yolox.data import COCODataset, ValTransform valdataset = COCODataset( - data_dir=None, - json_file=self.val_ann if not testdev else "image_info_test-dev2017.json", + data_dir=self.data_dir, + json_file=self.val_ann if not testdev else self.test_ann, #name="val2017" if not testdev else "test2017", name="valid" if not testdev else "test", img_size=self.test_size, - preproc=ValTransform( - rgb_means=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225) - ), + preproc=ValTransform(legacy=legacy), ) if is_distributed: @@ -231,10 +299,10 @@ def get_eval_loader(self, batch_size, is_distributed, testdev=False): return val_loader - def get_evaluator(self, batch_size, is_distributed, testdev=False): + def get_evaluator(self, batch_size, is_distributed, testdev=False, legacy=False): from yolox.evaluators import COCOEvaluator - val_loader = self.get_eval_loader(batch_size, is_distributed, testdev=testdev) + val_loader = self.get_eval_loader(batch_size, is_distributed, testdev, legacy) evaluator = COCOEvaluator( dataloader=val_loader, img_size=self.test_size, @@ -245,5 +313,11 @@ def get_evaluator(self, batch_size, is_distributed, testdev=False): ) return evaluator - def eval(self, model, evaluator, is_distributed, half=False): - return evaluator.evaluate(model, is_distributed, half) + def get_trainer(self, args): + from yolox.core import Trainer + trainer = Trainer(self, args) + # NOTE: trainer shouldn't be an attribute of exp object + return trainer + + def eval(self, model, evaluator, is_distributed, half=False, return_outputs=False): + return evaluator.evaluate(model, is_distributed, half, return_outputs=return_outputs) From 18a6d9e5b031b78cf8dc44fe8828c40d9d9ad7f6 Mon Sep 17 00:00:00 2001 From: lujulia <39236354+lujulia@users.noreply.github.com> Date: Sun, 10 Jul 2022 14:54:16 +0800 Subject: [PATCH 41/59] Update yolox_voc_nano.py --- .../custom/voc_format/yolox_voc_nano/yolox_voc_nano.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/exps/example/custom/voc_format/yolox_voc_nano/yolox_voc_nano.py b/exps/example/custom/voc_format/yolox_voc_nano/yolox_voc_nano.py index 5e4fb127f..0fb62da78 100644 --- a/exps/example/custom/voc_format/yolox_voc_nano/yolox_voc_nano.py +++ b/exps/example/custom/voc_format/yolox_voc_nano/yolox_voc_nano.py @@ -67,8 +67,8 @@ def get_data_loader(self, batch_size, is_distributed, no_aug=False, cache_img=Fa with wait_for_the_master(local_rank): dataset = VOCDetection( - data_dir=os.path.join(get_yolox_datadir(), "VOCdevkit"), - image_sets=[('2007', 'trainval'), ('2012', 'trainval')], + data_dir=os.path.join(get_yolox_datadir(), "pedestrian_voc"), + image_sets=[('train')], img_size=self.input_size, preproc=TrainTransform( max_labels=50, @@ -125,8 +125,8 @@ def get_eval_loader(self, batch_size, is_distributed, testdev=False, legacy=Fals from yolox.data import VOCDetection, ValTransform valdataset = VOCDetection( - data_dir=os.path.join(get_yolox_datadir(), "VOCdevkit"), - image_sets=[('2007', 'test')], + data_dir=os.path.join(get_yolox_datadir(), "pedestrian_voc"), + image_sets=[('valid')], img_size=self.test_size, preproc=ValTransform(legacy=legacy), ) From 2adbb068406a25f9540ccf5bc3ad9f8f441aee36 Mon Sep 17 00:00:00 2001 From: lujulia <39236354+lujulia@users.noreply.github.com> Date: Sun, 10 Jul 2022 14:57:49 +0800 Subject: [PATCH 42/59] Update yolox_voc_nano.py --- .../voc_format/yolox_voc_nano.py | 82 +++++++++++-------- 1 file changed, 50 insertions(+), 32 deletions(-) diff --git a/exps/example/yolox_pedestrian/voc_format/yolox_voc_nano.py b/exps/example/yolox_pedestrian/voc_format/yolox_voc_nano.py index a3d02e62a..0fb62da78 100644 --- a/exps/example/yolox_pedestrian/voc_format/yolox_voc_nano.py +++ b/exps/example/yolox_pedestrian/voc_format/yolox_voc_nano.py @@ -2,23 +2,33 @@ # -*- coding:utf-8 -*- # Copyright (c) Megvii, Inc. and its affiliates. -from yolox.data import get_yolox_datadir -from yolox.exp import Exp as MyExp import os -import random -import torch.nn as nn + import torch import torch.distributed as dist +from yolox.data import get_yolox_datadir +from yolox.exp import Exp as MyExp + class Exp(MyExp): def __init__(self): super(Exp, self).__init__() - self.num_classes = 1 + self.num_classes = 20 self.depth = 0.33 self.width = 0.25 - self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] + self.input_size = (416, 416) + self.mosaic_scale = (0.5, 1.5) + self.random_size = (10, 20) + self.test_size = (416, 416) + self.warmup_epochs = 1 + # ---------- transform config ------------ # + #self.mosaic_prob = 1.0 self.enable_mixup = False + #self.mixup_prob = 1.0 + #self.hsv_prob = 1.0 + #self.flip_prob = 0.5 + self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] def get_model(self, sublinear=False): @@ -39,7 +49,7 @@ def init_yolo(M): self.model.head.initialize_biases(1e-2) return self.model - def get_data_loader(self, batch_size, is_distributed, no_aug=False): + def get_data_loader(self, batch_size, is_distributed, no_aug=False, cache_img=False): from yolox.data import ( VOCDetection, TrainTransform, @@ -47,40 +57,48 @@ def get_data_loader(self, batch_size, is_distributed, no_aug=False): DataLoader, InfiniteSampler, MosaicDetection, + worker_init_reset_seed, ) - - dataset = VOCDetection( - data_dir=os.path.join(get_yolox_datadir(), "pedestrian_voc"), - image_sets=[('train')], - img_size=self.input_size, - preproc=TrainTransform( - rgb_means=(0.485, 0.456, 0.406), - std=(0.229, 0.224, 0.225), - max_labels=50, - ), + from yolox.utils import ( + wait_for_the_master, + get_local_rank, ) + local_rank = get_local_rank() + + with wait_for_the_master(local_rank): + dataset = VOCDetection( + data_dir=os.path.join(get_yolox_datadir(), "pedestrian_voc"), + image_sets=[('train')], + img_size=self.input_size, + preproc=TrainTransform( + max_labels=50, + flip_prob=self.flip_prob, + hsv_prob=self.hsv_prob), + cache=cache_img, + ) dataset = MosaicDetection( dataset, mosaic=not no_aug, img_size=self.input_size, preproc=TrainTransform( - rgb_means=(0.485, 0.456, 0.406), - std=(0.229, 0.224, 0.225), max_labels=120, - ), + flip_prob=self.flip_prob, + hsv_prob=self.hsv_prob), degrees=self.degrees, translate=self.translate, - scale=self.scale, + mosaic_scale=self.mosaic_scale, + mixup_scale=self.mixup_scale, shear=self.shear, - perspective=self.perspective, enable_mixup=self.enable_mixup, + mosaic_prob=self.mosaic_prob, + mixup_prob=self.mixup_prob, ) self.dataset = dataset if is_distributed: - batch_size = batch_size // dist.get_world_size() + batch_size = batch_size // dist.get_world_size() sampler = InfiniteSampler( len(self.dataset), seed=self.seed if self.seed else 0 @@ -90,31 +108,31 @@ def get_data_loader(self, batch_size, is_distributed, no_aug=False): sampler=sampler, batch_size=batch_size, drop_last=False, - input_dimension=self.input_size, mosaic=not no_aug, ) dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True} dataloader_kwargs["batch_sampler"] = batch_sampler + + # Make sure each process has different random seed, especially for 'fork' method + dataloader_kwargs["worker_init_fn"] = worker_init_reset_seed + train_loader = DataLoader(self.dataset, **dataloader_kwargs) return train_loader - def get_eval_loader(self, batch_size, is_distributed, testdev=False): + def get_eval_loader(self, batch_size, is_distributed, testdev=False, legacy=False): from yolox.data import VOCDetection, ValTransform valdataset = VOCDetection( data_dir=os.path.join(get_yolox_datadir(), "pedestrian_voc"), image_sets=[('valid')], img_size=self.test_size, - preproc=ValTransform( - rgb_means=(0.485, 0.456, 0.406), - std=(0.229, 0.224, 0.225), - ), + preproc=ValTransform(legacy=legacy), ) if is_distributed: - batch_size = batch_size // dist.get_world_size() + batch_size = batch_size // dist.get_world_size() sampler = torch.utils.data.distributed.DistributedSampler( valdataset, shuffle=False ) @@ -131,10 +149,10 @@ def get_eval_loader(self, batch_size, is_distributed, testdev=False): return val_loader - def get_evaluator(self, batch_size, is_distributed, testdev=False): + def get_evaluator(self, batch_size, is_distributed, testdev=False, legacy=False): from yolox.evaluators import VOCEvaluator - val_loader = self.get_eval_loader(batch_size, is_distributed, testdev=testdev) + val_loader = self.get_eval_loader(batch_size, is_distributed, testdev, legacy) evaluator = VOCEvaluator( dataloader=val_loader, img_size=self.test_size, From f3a902bfea311e2972aa03fd20cf45162f01ba2e Mon Sep 17 00:00:00 2001 From: lujulia <39236354+lujulia@users.noreply.github.com> Date: Sun, 10 Jul 2022 14:58:26 +0800 Subject: [PATCH 43/59] Update yolox_voc_nano.py --- exps/example/yolox_pedestrian/voc_format/yolox_voc_nano.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/exps/example/yolox_pedestrian/voc_format/yolox_voc_nano.py b/exps/example/yolox_pedestrian/voc_format/yolox_voc_nano.py index 0fb62da78..c38679cca 100644 --- a/exps/example/yolox_pedestrian/voc_format/yolox_voc_nano.py +++ b/exps/example/yolox_pedestrian/voc_format/yolox_voc_nano.py @@ -14,13 +14,13 @@ class Exp(MyExp): def __init__(self): super(Exp, self).__init__() - self.num_classes = 20 + self.num_classes = 1 self.depth = 0.33 self.width = 0.25 - self.input_size = (416, 416) + #self.input_size = (416, 416) self.mosaic_scale = (0.5, 1.5) self.random_size = (10, 20) - self.test_size = (416, 416) + #self.test_size = (416, 416) self.warmup_epochs = 1 # ---------- transform config ------------ # #self.mosaic_prob = 1.0 From 58dd583b129b35f7698b2bd6ad57ce761cc674a9 Mon Sep 17 00:00:00 2001 From: lujulia <39236354+lujulia@users.noreply.github.com> Date: Sun, 10 Jul 2022 15:02:00 +0800 Subject: [PATCH 44/59] Update yolox_voc_s.py --- .../voc_format/yolox_voc_s.py | 69 +++++++++++-------- 1 file changed, 42 insertions(+), 27 deletions(-) diff --git a/exps/example/yolox_pedestrian/voc_format/yolox_voc_s.py b/exps/example/yolox_pedestrian/voc_format/yolox_voc_s.py index fa27310ab..4801f559d 100644 --- a/exps/example/yolox_pedestrian/voc_format/yolox_voc_s.py +++ b/exps/example/yolox_pedestrian/voc_format/yolox_voc_s.py @@ -1,12 +1,11 @@ # encoding: utf-8 import os -import random + import torch -import torch.nn as nn import torch.distributed as dist -from yolox.exp import Exp as MyExp from yolox.data import get_yolox_datadir +from yolox.exp import Exp as MyExp class Exp(MyExp): @@ -15,9 +14,17 @@ def __init__(self): self.num_classes = 1 self.depth = 0.33 self.width = 0.50 + self.warmup_epochs = 1 + + # ---------- transform config ------------ # + self.mosaic_prob = 1.0 + self.mixup_prob = 1.0 + self.hsv_prob = 1.0 + self.flip_prob = 0.5 + self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] - def get_data_loader(self, batch_size, is_distributed, no_aug=False): + def get_data_loader(self, batch_size, is_distributed, no_aug=False, cache_img=False): from yolox.data import ( VOCDetection, TrainTransform, @@ -25,34 +32,42 @@ def get_data_loader(self, batch_size, is_distributed, no_aug=False): DataLoader, InfiniteSampler, MosaicDetection, + worker_init_reset_seed, ) - - dataset = VOCDetection( - data_dir=os.path.join(get_yolox_datadir(), "pedestrian_voc"), - image_sets=[('train')], - img_size=self.input_size, - preproc=TrainTransform( - rgb_means=(0.485, 0.456, 0.406), - std=(0.229, 0.224, 0.225), - max_labels=50, - ), + from yolox.utils import ( + wait_for_the_master, + get_local_rank, ) + local_rank = get_local_rank() + + with wait_for_the_master(local_rank): + dataset = VOCDetection( + data_dir=os.path.join(get_yolox_datadir(), "pedestrian_voc"), + image_sets=[('train')], + img_size=self.input_size, + preproc=TrainTransform( + max_labels=50, + flip_prob=self.flip_prob, + hsv_prob=self.hsv_prob), + cache=cache_img, + ) dataset = MosaicDetection( dataset, mosaic=not no_aug, img_size=self.input_size, preproc=TrainTransform( - rgb_means=(0.485, 0.456, 0.406), - std=(0.229, 0.224, 0.225), max_labels=120, - ), + flip_prob=self.flip_prob, + hsv_prob=self.hsv_prob), degrees=self.degrees, translate=self.translate, - scale=self.scale, + mosaic_scale=self.mosaic_scale, + mixup_scale=self.mixup_scale, shear=self.shear, - perspective=self.perspective, enable_mixup=self.enable_mixup, + mosaic_prob=self.mosaic_prob, + mixup_prob=self.mixup_prob, ) self.dataset = dataset @@ -68,27 +83,27 @@ def get_data_loader(self, batch_size, is_distributed, no_aug=False): sampler=sampler, batch_size=batch_size, drop_last=False, - input_dimension=self.input_size, mosaic=not no_aug, ) dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True} dataloader_kwargs["batch_sampler"] = batch_sampler + + # Make sure each process has different random seed, especially for 'fork' method + dataloader_kwargs["worker_init_fn"] = worker_init_reset_seed + train_loader = DataLoader(self.dataset, **dataloader_kwargs) return train_loader - def get_eval_loader(self, batch_size, is_distributed, testdev=False): + def get_eval_loader(self, batch_size, is_distributed, testdev=False, legacy=False): from yolox.data import VOCDetection, ValTransform valdataset = VOCDetection( data_dir=os.path.join(get_yolox_datadir(), "pedestrian_voc"), image_sets=[('valid')], img_size=self.test_size, - preproc=ValTransform( - rgb_means=(0.485, 0.456, 0.406), - std=(0.229, 0.224, 0.225), - ), + preproc=ValTransform(legacy=legacy), ) if is_distributed: @@ -109,10 +124,10 @@ def get_eval_loader(self, batch_size, is_distributed, testdev=False): return val_loader - def get_evaluator(self, batch_size, is_distributed, testdev=False): + def get_evaluator(self, batch_size, is_distributed, testdev=False, legacy=False): from yolox.evaluators import VOCEvaluator - val_loader = self.get_eval_loader(batch_size, is_distributed, testdev=testdev) + val_loader = self.get_eval_loader(batch_size, is_distributed, testdev, legacy) evaluator = VOCEvaluator( dataloader=val_loader, img_size=self.test_size, From 8dd855bd1ef6d651457d681480d45f56b82678ba Mon Sep 17 00:00:00 2001 From: lujulia <39236354+lujulia@users.noreply.github.com> Date: Sun, 10 Jul 2022 15:04:03 +0800 Subject: [PATCH 45/59] Update yolox_voc_nano_adam.py --- .../voc_format/yolox_voc_nano_adam.py | 81 +++++++++++-------- 1 file changed, 48 insertions(+), 33 deletions(-) diff --git a/exps/example/yolox_pedestrian/voc_format/yolox_voc_nano_adam.py b/exps/example/yolox_pedestrian/voc_format/yolox_voc_nano_adam.py index 8e05e67ff..e35d8bd2f 100644 --- a/exps/example/yolox_pedestrian/voc_format/yolox_voc_nano_adam.py +++ b/exps/example/yolox_pedestrian/voc_format/yolox_voc_nano_adam.py @@ -3,25 +3,33 @@ # Copyright (c) Megvii, Inc. and its affiliates. import os -import random -import torch.nn as nn + import torch import torch.distributed as dist -from yolox.exp import Exp as MyExp + from yolox.data import get_yolox_datadir +from yolox.exp import Exp as MyExp class Exp(MyExp): def __init__(self): super(Exp, self).__init__() - self.num_classes = 1 + self.num_classes = 20 self.depth = 0.33 self.width = 0.25 - self.scale = (0.5, 1.5) + self.input_size = (416, 416) + self.mosaic_scale = (0.5, 1.5) self.random_size = (10, 20) + self.test_size = (416, 416) self.eps = 1e-8 - self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] + self.warmup_epochs = 1 + # ---------- transform config ------------ # + #self.mosaic_prob = 1.0 self.enable_mixup = False + #self.mixup_prob = 1.0 + #self.hsv_prob = 1.0 + #self.flip_prob = 0.5 + self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] def get_model(self, sublinear=False): @@ -41,7 +49,7 @@ def init_yolo(M): self.model.apply(init_yolo) self.model.head.initialize_biases(1e-2) return self.model - + def get_optimizer(self, batch_size): if "optimizer" not in self.__dict__: if self.warmup_epochs > 0: @@ -67,10 +75,9 @@ def get_optimizer(self, batch_size): ) # add pg1 with weight_decay optimizer.add_param_group({"params": pg2}) self.optimizer = optimizer - return self.optimizer - - def get_data_loader(self, batch_size, is_distributed, no_aug=False): + + def get_data_loader(self, batch_size, is_distributed, no_aug=False, cache_img=False): from yolox.data import ( VOCDetection, TrainTransform, @@ -78,34 +85,42 @@ def get_data_loader(self, batch_size, is_distributed, no_aug=False): DataLoader, InfiniteSampler, MosaicDetection, + worker_init_reset_seed, ) - - dataset = VOCDetection( - data_dir=os.path.join(get_yolox_datadir(), "pedestrian_voc"), - image_sets=[('train')], - img_size=self.input_size, - preproc=TrainTransform( - rgb_means=(0.485, 0.456, 0.406), - std=(0.229, 0.224, 0.225), - max_labels=50, - ), + from yolox.utils import ( + wait_for_the_master, + get_local_rank, ) + local_rank = get_local_rank() + + with wait_for_the_master(local_rank): + dataset = VOCDetection( + data_dir=os.path.join(get_yolox_datadir(), "pedestrian_voc"), + image_sets=[('train')], + img_size=self.input_size, + preproc=TrainTransform( + max_labels=50, + flip_prob=self.flip_prob, + hsv_prob=self.hsv_prob), + cache=cache_img, + ) dataset = MosaicDetection( dataset, mosaic=not no_aug, img_size=self.input_size, preproc=TrainTransform( - rgb_means=(0.485, 0.456, 0.406), - std=(0.229, 0.224, 0.225), max_labels=120, - ), + flip_prob=self.flip_prob, + hsv_prob=self.hsv_prob), degrees=self.degrees, translate=self.translate, - scale=self.scale, + mosaic_scale=self.mosaic_scale, + mixup_scale=self.mixup_scale, shear=self.shear, - perspective=self.perspective, enable_mixup=self.enable_mixup, + mosaic_prob=self.mosaic_prob, + mixup_prob=self.mixup_prob, ) self.dataset = dataset @@ -121,27 +136,27 @@ def get_data_loader(self, batch_size, is_distributed, no_aug=False): sampler=sampler, batch_size=batch_size, drop_last=False, - input_dimension=self.input_size, mosaic=not no_aug, ) dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True} dataloader_kwargs["batch_sampler"] = batch_sampler + + # Make sure each process has different random seed, especially for 'fork' method + dataloader_kwargs["worker_init_fn"] = worker_init_reset_seed + train_loader = DataLoader(self.dataset, **dataloader_kwargs) return train_loader - def get_eval_loader(self, batch_size, is_distributed, testdev=False): + def get_eval_loader(self, batch_size, is_distributed, testdev=False, legacy=False): from yolox.data import VOCDetection, ValTransform valdataset = VOCDetection( data_dir=os.path.join(get_yolox_datadir(), "pedestrian_voc"), image_sets=[('valid')], img_size=self.test_size, - preproc=ValTransform( - rgb_means=(0.485, 0.456, 0.406), - std=(0.229, 0.224, 0.225), - ), + preproc=ValTransform(legacy=legacy), ) if is_distributed: @@ -162,10 +177,10 @@ def get_eval_loader(self, batch_size, is_distributed, testdev=False): return val_loader - def get_evaluator(self, batch_size, is_distributed, testdev=False): + def get_evaluator(self, batch_size, is_distributed, testdev=False, legacy=False): from yolox.evaluators import VOCEvaluator - val_loader = self.get_eval_loader(batch_size, is_distributed, testdev=testdev) + val_loader = self.get_eval_loader(batch_size, is_distributed, testdev, legacy) evaluator = VOCEvaluator( dataloader=val_loader, img_size=self.test_size, From 79c33158e364543c6897da51e07e07b8dc583f3a Mon Sep 17 00:00:00 2001 From: lujulia <39236354+lujulia@users.noreply.github.com> Date: Sun, 10 Jul 2022 15:05:16 +0800 Subject: [PATCH 46/59] Update yolox_voc_nano_adam.py --- exps/example/yolox_pedestrian/voc_format/yolox_voc_nano_adam.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exps/example/yolox_pedestrian/voc_format/yolox_voc_nano_adam.py b/exps/example/yolox_pedestrian/voc_format/yolox_voc_nano_adam.py index e35d8bd2f..fb5cb1e60 100644 --- a/exps/example/yolox_pedestrian/voc_format/yolox_voc_nano_adam.py +++ b/exps/example/yolox_pedestrian/voc_format/yolox_voc_nano_adam.py @@ -14,7 +14,7 @@ class Exp(MyExp): def __init__(self): super(Exp, self).__init__() - self.num_classes = 20 + self.num_classes = 1 self.depth = 0.33 self.width = 0.25 self.input_size = (416, 416) From 33954ea25eed0d0e4ee15394cb436cf3af442b76 Mon Sep 17 00:00:00 2001 From: lujulia <39236354+lujulia@users.noreply.github.com> Date: Sun, 10 Jul 2022 15:59:41 +0800 Subject: [PATCH 47/59] Update demo.py --- tools/demo.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/demo.py b/tools/demo.py index 0e966900c..1730d82ae 100644 --- a/tools/demo.py +++ b/tools/demo.py @@ -118,7 +118,7 @@ def __init__( self.test_size = exp.test_size self.device = device self.fp16 = fp16 - self.preproc = ValTransform(legacy=legacy) + self.preproc = ValTransform(legacy)#=legacy) if trt_file is not None: from torch2trt import TRTModule @@ -160,7 +160,7 @@ def inference(self, img): outputs = self.decoder(outputs, dtype=outputs.type()) outputs = postprocess( outputs, self.num_classes, self.confthre, - self.nmsthre, class_agnostic=True + self.nmsthre#, class_agnostic=True ) logger.info("Infer time: {:.4f}s".format(time.time() - t0)) return outputs, img_info From f983881681a63f5fbae9ddafdd2f5215762582d4 Mon Sep 17 00:00:00 2001 From: lujulia <39236354+lujulia@users.noreply.github.com> Date: Sun, 10 Jul 2022 16:22:40 +0800 Subject: [PATCH 48/59] Update requirements.txt --- requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 7227f09b4..46efe646f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,6 +12,6 @@ tabulate # verified versions # pycocotools corresponds to https://github.com/ppwwyyxx/cocoapi pycocotools>=2.0.2 -onnx==1.8.1 -onnxruntime==1.8.0 +onnx>=1.8.1 +onnxruntime>=1.8.0 onnx-simplifier==0.3.5 From 577c62f6572ea54635aa6715e4f667bd2926ae76 Mon Sep 17 00:00:00 2001 From: lujulia <39236354+lujulia@users.noreply.github.com> Date: Sun, 10 Jul 2022 19:21:38 +0800 Subject: [PATCH 49/59] Update demo.py --- tools/demo.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tools/demo.py b/tools/demo.py index 1730d82ae..8588b1e31 100644 --- a/tools/demo.py +++ b/tools/demo.py @@ -118,7 +118,7 @@ def __init__( self.test_size = exp.test_size self.device = device self.fp16 = fp16 - self.preproc = ValTransform(legacy)#=legacy) + self.preproc = ValTransform(legacy=legacy) if trt_file is not None: from torch2trt import TRTModule @@ -160,7 +160,7 @@ def inference(self, img): outputs = self.decoder(outputs, dtype=outputs.type()) outputs = postprocess( outputs, self.num_classes, self.confthre, - self.nmsthre#, class_agnostic=True + self.nmsthre, class_agnostic=True ) logger.info("Infer time: {:.4f}s".format(time.time() - t0)) return outputs, img_info @@ -190,9 +190,11 @@ def visual(self, output, img_info, cls_conf=0.35): cls = output[:, 6] scores = output[:, 4] * output[:, 5] + vis_res = vis(img, bboxes, scores, cls, cls_conf, self.cls_names) return vis_res + def image_demo(predictor, vis_folder, path, current_time, save_result): if os.path.isdir(path): files = get_image_list(path) @@ -243,7 +245,7 @@ def imageflow_demo(predictor, vis_folder, current_time, args): else: cv2.namedWindow("yolox", cv2.WINDOW_NORMAL) cv2.imshow("yolox", result_frame) - ch = cv2.waitKey(1000) + ch = cv2.waitKey(1) if ch == 27 or ch == ord("q") or ch == ord("Q"): break else: From 16eb42a8cfcf1693063588dba4aae7b8391638d3 Mon Sep 17 00:00:00 2001 From: lujulia <39236354+lujulia@users.noreply.github.com> Date: Mon, 11 Jul 2022 15:33:15 +0800 Subject: [PATCH 50/59] Update data_augment.py --- yolox/data/data_augment.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/yolox/data/data_augment.py b/yolox/data/data_augment.py index 21cd7b56d..f4da7580b 100644 --- a/yolox/data/data_augment.py +++ b/yolox/data/data_augment.py @@ -157,6 +157,13 @@ def preproc(img, input_size, swap=(2, 0, 1)): padded_img = np.ascontiguousarray(padded_img, dtype=np.float32) return padded_img, r +def sliding_window(image, YstepSize, XstepSize, windowSize): + # slide a window across the image + for y in range(0, image.shape[0], YstepSize): + for x in range(0, image.shape[1], XstepSize): + # yield the current window + yield (x, y, image[y:y + windowSize[1], x:x + windowSize[0]]) + class TrainTransform: def __init__(self, max_labels=50, flip_prob=0.5, hsv_prob=1.0): From 8c9bba339eee2ab76f1683b74cd0709e682e49d0 Mon Sep 17 00:00:00 2001 From: lujulia <39236354+lujulia@users.noreply.github.com> Date: Mon, 11 Jul 2022 22:52:30 +0800 Subject: [PATCH 51/59] Update visualize.py --- yolox/utils/visualize.py | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/yolox/utils/visualize.py b/yolox/utils/visualize.py index 4be7d616c..72eba4d65 100644 --- a/yolox/utils/visualize.py +++ b/yolox/utils/visualize.py @@ -45,15 +45,24 @@ def vis(img, boxes, scores, cls_ids, conf=0.5, class_names=None): class_count[class_names[cls_id]] = class_count[class_names[cls_id]]+1 class_AP[class_names[cls_id]] = class_AP[class_names[cls_id]]+float('{:.1f}'.format(score * 100)) cv2.putText(img, text, (x0, y0 + txt_size[1]), font, 0.4, txt_color, thickness=1) - line = 0 - for k in class_count: - cv2.putText(img, str(k)+": "+str(class_count[k]), (15,25+line), font, 0.8, (0, 255, 255), thickness=2) - if class_count[k] !=0: - class_AP[k]=class_AP[k]/class_count[k] - else: - class_AP[k]=0.0 - cv2.putText(img, "AP"+": "+'{:.1f}%'.format(class_AP[k]), (15,50+line), font, 0.8, (0, 255, 255), thickness=2) - line = line+50 + + x0 = 15 + y0 = 0 + row = 0 + for k in class_count: + if((y0+row+50)>=img.shape[0]): + x0 = x0+200 + y0 = 25 + row = 0 + else: + row = row+25 + cv2.putText(img, str(k)+": "+str(class_count[k]), (x0,y0+row), font, 0.8, (0, 255, 255), thickness=2) + if class_count[k] !=0: + class_AP[k]=class_AP[k]/class_count[k] + else: + class_AP[k]=0.0 + row = row+25 + cv2.putText(img, "AP"+": "+'{:.1f}%'.format(class_AP[k]), (x0,y0+row), font, 0.8, (0, 255, 255), thickness=2) return img From a73cef19633d66c7d1a40ff549fce8e749ffe398 Mon Sep 17 00:00:00 2001 From: lujulia <39236354+lujulia@users.noreply.github.com> Date: Mon, 11 Jul 2022 23:03:27 +0800 Subject: [PATCH 52/59] Update demo.py --- tools/demo.py | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/tools/demo.py b/tools/demo.py index 8588b1e31..dc065ca20 100644 --- a/tools/demo.py +++ b/tools/demo.py @@ -12,7 +12,7 @@ import torch from yolox.data.data_augment import ValTransform -from yolox.data.datasets import COCO_CLASSES +from yolox.data.datasets import COCO_CLASSES, VOC_CLASSES from yolox.exp import get_exp from yolox.utils import fuse_model, get_model_info, postprocess, vis @@ -175,11 +175,23 @@ def visual(self, output, img_info, cls_conf=0.35): for i in self.cls_names: class_count[i] = 0 class_AP[i] = 0.0 - line = 0 - for k in class_count: - cv2.putText(img, str(k)+": "+str(class_count[k]), (15,25+line), font, 0.8, (0, 255, 255), thickness=2) - cv2.putText(img, "AP"+": "+'{:.1f}%'.format(class_AP[k]), (15,50+line), font, 0.8, (0, 255, 255), thickness=2) - line = line+50 + x0 = 15 + y0 = 0 + row = 0 + for k in class_count: + if((y0+row+50)>=img.shape[0]): + x0 = x0+200 + y0 = 25 + row = 0 + else: + row = row+25 + cv2.putText(img, str(k)+": "+str(class_count[k]), (x0,y0+row), font, 0.8, (0, 255, 255), thickness=2) + if class_count[k] !=0: + class_AP[k]=class_AP[k]/class_count[k] + else: + class_AP[k]=0.0 + row = row+25 + cv2.putText(img, "AP"+": "+'{:.1f}%'.format(class_AP[k]), (x0,y0+row), font, 0.8, (0, 255, 255), thickness=2) return img output = output.cpu() From fefd852015c53571905778a3645c7f53e7f699f9 Mon Sep 17 00:00:00 2001 From: lujulia <39236354+lujulia@users.noreply.github.com> Date: Mon, 11 Jul 2022 23:06:56 +0800 Subject: [PATCH 53/59] Add files via upload --- tools/demo_sliding_window.py | 387 +++++++++++++++++++++++++++++++++++ 1 file changed, 387 insertions(+) create mode 100644 tools/demo_sliding_window.py diff --git a/tools/demo_sliding_window.py b/tools/demo_sliding_window.py new file mode 100644 index 000000000..29846e4aa --- /dev/null +++ b/tools/demo_sliding_window.py @@ -0,0 +1,387 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. + +import argparse +import os +import time +from loguru import logger + +import cv2 + +import torch + +from yolox.data.data_augment import ValTransform, sliding_window +from yolox.data.datasets import COCO_CLASSES, VOC_CLASSES +from yolox.exp import get_exp +from yolox.utils import fuse_model, get_model_info, postprocess, vis + +IMAGE_EXT = [".jpg", ".jpeg", ".webp", ".bmp", ".png"] + + +def make_parser(): + parser = argparse.ArgumentParser("YOLOX Demo!") + parser.add_argument( + "demo", default="image", help="demo type, eg. image, video and webcam" + ) + parser.add_argument("-expn", "--experiment-name", type=str, default=None) + parser.add_argument("-n", "--name", type=str, default=None, help="model name") + + parser.add_argument( + "--path", default="./assets/dog.jpg", help="path to images or video" + ) + parser.add_argument("--camid", type=int, default=0, help="webcam demo camera id") + parser.add_argument( + "--save_result", + action="store_true", + help="whether to save the inference result of image/video", + ) + + # exp file + parser.add_argument( + "-f", + "--exp_file", + default=None, + type=str, + help="please input your experiment description file", + ) + parser.add_argument("-c", "--ckpt", default=None, type=str, help="ckpt for eval") + parser.add_argument( + "--device", + default="cpu", + type=str, + help="device to run our model, can either be cpu or gpu", + ) + parser.add_argument("--conf", default=0.3, type=float, help="test conf") + parser.add_argument("--nms", default=0.3, type=float, help="test nms threshold") + parser.add_argument("--tsize", default=None, type=int, help="test img size") + parser.add_argument( + "--fp16", + dest="fp16", + default=False, + action="store_true", + help="Adopting mix precision evaluating.", + ) + parser.add_argument( + "--legacy", + dest="legacy", + default=False, + action="store_true", + help="To be compatible with older versions", + ) + parser.add_argument( + "--fuse", + dest="fuse", + default=False, + action="store_true", + help="Fuse conv and bn for testing.", + ) + parser.add_argument( + "--trt", + dest="trt", + default=False, + action="store_true", + help="Using TensorRT model for testing.", + ) + return parser + + +def get_image_list(path): + image_names = [] + for maindir, subdir, file_name_list in os.walk(path): + for filename in file_name_list: + apath = os.path.join(maindir, filename) + ext = os.path.splitext(apath)[1] + if ext in IMAGE_EXT: + image_names.append(apath) + return image_names + + +class Predictor(object): + def __init__( + self, + model, + exp, + cls_names=COCO_CLASSES, + trt_file=None, + decoder=None, + device="cpu", + fp16=False, + legacy=False, + ): + self.model = model + self.cls_names = cls_names + self.decoder = decoder + self.num_classes = exp.num_classes + self.confthre = exp.test_conf + self.nmsthre = exp.nmsthre + self.test_size = exp.test_size + self.device = device + self.fp16 = fp16 + self.preproc = ValTransform(legacy=legacy) + if trt_file is not None: + from torch2trt import TRTModule + + model_trt = TRTModule() + model_trt.load_state_dict(torch.load(trt_file)) + + x = torch.ones(1, 3, exp.test_size[0], exp.test_size[1]).cuda() + self.model(x) + self.model = model_trt + + def inference(self, img): + img_info = {"id": 0} + if isinstance(img, str): + img_info["file_name"] = os.path.basename(img) + img = cv2.imread(img) + else: + img_info["file_name"] = None + + height, width = img.shape[:2] + img_info["height"] = height + img_info["width"] = width + img_info["raw_img"] = img + + #ratio = min(self.test_size[0] / img.shape[0], self.test_size[1] / img.shape[1]) + #img_info["ratio"] = ratio + (winW, winH) = (exp.test_size[1], exp.test_size[0]) + (imgW, imgH)= (img.shape[1],img.shape[0]) + if (imgH%winH): + y_stepSize = winH-(winH*(imgH//winH+1)-imgH)//(imgH//winH) + if(imgW%winW): + x_stepSize = winW-(winW*(imgW//winW+1)-imgW)//(imgW//winW) + else: + x_stepSize = winW + else: + y_stepSize = winH + if(imgW%winW): + x_stepSize = winW-(winW*(imgW//winW+1)-imgW)//(imgW//winW) + else: + x_stepSize = winW + numW = 0 + for (x, y, window) in sliding_window(img, YstepSize=y_stepSize, XstepSize=x_stepSize, windowSize=(winW, winH)): + # if the window does not meet our desired window size, ignore it + if window.shape[0] != winH or window.shape[1] != winW: + continue + + Wimg, _ = self.preproc(window, None, self.test_size) + Wimg = torch.from_numpy(Wimg).unsqueeze(0) + Wimg = Wimg.float() + if self.device == "gpu": + Wimg = Wimg.cuda() + if self.fp16: + Wimg = Wimg.half() # to FP16 + + with torch.no_grad(): + t0 = time.time() + Woutputs = self.model(Wimg) + if numW != 0: + Woutputs[:, :, 0] = torch.add(Woutputs[:, :,0], x) + Woutputs[:, :, 1] = torch.add(Woutputs[:, :,1], y) + outputs = torch.cat((outputs, Woutputs), 1) + else: + outputs = Woutputs + numW=numW+1 + + if self.decoder is not None: + outputs = self.decoder(outputs, dtype=outputs.type()) + outputs = postprocess( + outputs, self.num_classes, self.confthre, + self.nmsthre, class_agnostic=True + ) + + if outputs[0] is None: + pass + elif len(outputs[0]) == 2: + li_outputs = [] + temp = torch.empty(1, 7) + temp[0][0] = torch.min(outputs[0][0, 0], outputs[0][1, 0]) + temp[0][1] = torch.min(outputs[0][0, 1], outputs[0][1, 1]) + temp[0][2] = torch.max(outputs[0][0, 2], outputs[0][1, 2]) + temp[0][3] = torch.max(outputs[0][0, 3], outputs[0][1, 3]) + temp[0][4] = torch.add(outputs[0][0, 4], outputs[0][1, 4]) / 2 + temp[0][5] = torch.add(outputs[0][0, 5], outputs[0][1, 5]) / 2 + temp[0][6] = torch.add(outputs[0][0, 6], outputs[0][1, 6]) / 2 + li_outputs.append(temp) + outputs = li_outputs + + + logger.info("Infer time: {:.4f}s".format(time.time() - t0)) + return outputs, img_info + + def visual(self, output, img_info, cls_conf=0.35): + #ratio = img_info["ratio"] + img = img_info["raw_img"] + if output is None: + font = cv2.FONT_HERSHEY_SIMPLEX + class_count = {} + class_AP = {} + for i in self.cls_names: + class_count[i] = 0 + class_AP[i] = 0.0 + x0 = 15 + y0 = 0 + row = 0 + for k in class_count: + if((y0+row+50)>=img.shape[0]): + x0 = x0+200 + y0 = 25 + row = 0 + else: + row = row+25 + cv2.putText(img, str(k)+": "+str(class_count[k]), (x0,y0+row), font, 0.8, (0, 255, 255), thickness=2) + if class_count[k] !=0: + class_AP[k]=class_AP[k]/class_count[k] + else: + class_AP[k]=0.0 + row = row+25 + cv2.putText(img, "AP"+": "+'{:.1f}%'.format(class_AP[k]), (x0,y0+row), font, 0.8, (0, 255, 255), thickness=2) + return img + output = output.cpu() + + bboxes = output[:, 0:4] + + # preprocessing: resize + #bboxes /= ratio + + cls = output[:, 6] + scores = output[:, 4] * output[:, 5] + + vis_res = vis(img, bboxes, scores, cls, cls_conf, self.cls_names) + return vis_res + + +def image_demo(predictor, vis_folder, path, current_time, save_result): + if os.path.isdir(path): + files = get_image_list(path) + else: + files = [path] + files.sort() + for image_name in files: + outputs, img_info = predictor.inference(image_name) + result_image = predictor.visual(outputs[0], img_info, predictor.confthre) + if save_result: + save_folder = os.path.join( + vis_folder, time.strftime("%Y_%m_%d_%H_%M_%S", current_time) + ) + os.makedirs(save_folder, exist_ok=True) + save_file_name = os.path.join(save_folder, os.path.basename(image_name)) + logger.info("Saving detection result in {}".format(save_file_name)) + cv2.imwrite(save_file_name, result_image) + ch = cv2.waitKey(0) + if ch == 27 or ch == ord("q") or ch == ord("Q"): + break + + +def imageflow_demo(predictor, vis_folder, current_time, args): + cap = cv2.VideoCapture(args.path if args.demo == "video" else args.camid) + width = cap.get(cv2.CAP_PROP_FRAME_WIDTH) # float + height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) # float + fps = cap.get(cv2.CAP_PROP_FPS) + if args.save_result: + save_folder = os.path.join( + vis_folder, time.strftime("%Y_%m_%d_%H_%M_%S", current_time) + ) + os.makedirs(save_folder, exist_ok=True) + if args.demo == "video": + save_path = os.path.join(save_folder, os.path.basename(args.path)) + else: + save_path = os.path.join(save_folder, "camera.mp4") + logger.info(f"video save_path is {save_path}") + vid_writer = cv2.VideoWriter( + save_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (int(width), int(height)) + ) + while True: + ret_val, frame = cap.read() + if ret_val: + outputs, img_info = predictor.inference(frame) + result_frame = predictor.visual(outputs[0], img_info, predictor.confthre) + if args.save_result: + vid_writer.write(result_frame) + else: + cv2.namedWindow("yolox", cv2.WINDOW_NORMAL) + cv2.imshow("yolox", result_frame) + ch = cv2.waitKey(1) + if ch == 27 or ch == ord("q") or ch == ord("Q"): + break + else: + break + + +def main(exp, args): + if not args.experiment_name: + args.experiment_name = exp.exp_name + + file_name = os.path.join(exp.output_dir, args.experiment_name) + os.makedirs(file_name, exist_ok=True) + + vis_folder = None + if args.save_result: + vis_folder = os.path.join(file_name, "vis_res") + os.makedirs(vis_folder, exist_ok=True) + + if args.trt: + args.device = "gpu" + + logger.info("Args: {}".format(args)) + + if args.conf is not None: + exp.test_conf = args.conf + if args.nms is not None: + exp.nmsthre = args.nms + if args.tsize is not None: + exp.test_size = (args.tsize, args.tsize) + + model = exp.get_model() + logger.info("Model Summary: {}".format(get_model_info(model, exp.test_size))) + + if args.device == "gpu": + model.cuda() + if args.fp16: + model.half() # to FP16 + model.eval() + + if not args.trt: + if args.ckpt is None: + ckpt_file = os.path.join(file_name, "best_ckpt.pth") + else: + ckpt_file = args.ckpt + logger.info("loading checkpoint") + ckpt = torch.load(ckpt_file, map_location="cpu") + # load the model state dict + model.load_state_dict(ckpt["model"]) + logger.info("loaded checkpoint done.") + + if args.fuse: + logger.info("\tFusing model...") + model = fuse_model(model) + + if args.trt: + assert not args.fuse, "TensorRT model is not support model fusing!" + trt_file = os.path.join(file_name, "model_trt.pth") + assert os.path.exists( + trt_file + ), "TensorRT model is not found!\n Run python3 tools/trt.py first!" + model.head.decode_in_inference = False + decoder = model.head.decode_outputs + logger.info("Using TensorRT to inference") + else: + trt_file = None + decoder = None + + predictor = Predictor( + model, exp, COCO_CLASSES, trt_file, decoder, + args.device, args.fp16, args.legacy, + ) + current_time = time.localtime() + if args.demo == "image": + image_demo(predictor, vis_folder, args.path, current_time, args.save_result) + elif args.demo == "video" or args.demo == "webcam": + imageflow_demo(predictor, vis_folder, current_time, args) + + +if __name__ == "__main__": + args = make_parser().parse_args() + exp = get_exp(args.exp_file, args.name) + + main(exp, args) \ No newline at end of file From a3532ca79589e24f048d6d31ebde909f77a555cc Mon Sep 17 00:00:00 2001 From: lujulia <39236354+lujulia@users.noreply.github.com> Date: Tue, 12 Jul 2022 08:08:20 +0800 Subject: [PATCH 54/59] Update visualize.py --- yolox/utils/visualize.py | 540 ++++++++++++++++++++++++++++----------- 1 file changed, 396 insertions(+), 144 deletions(-) diff --git a/yolox/utils/visualize.py b/yolox/utils/visualize.py index 72eba4d65..e733a5dec 100644 --- a/yolox/utils/visualize.py +++ b/yolox/utils/visualize.py @@ -1,152 +1,404 @@ #!/usr/bin/env python3 # -*- coding:utf-8 -*- -# Copyright (c) Megvii Inc. All rights reserved. +# Copyright (c) Megvii, Inc. and its affiliates. + +import argparse +import os +import time +from loguru import logger import cv2 -import numpy as np - -__all__ = ["vis"] - - -def vis(img, boxes, scores, cls_ids, conf=0.5, class_names=None): - class_count = {} - class_AP = {} - for j in class_names: - class_count[j] = 0 - class_AP[j] = 0 - - for i in range(len(boxes)): - box = boxes[i] - cls_id = int(cls_ids[i]) - score = scores[i] - if score < conf: - continue - x0 = int(box[0]) - y0 = int(box[1]) - x1 = int(box[2]) - y1 = int(box[3]) - - color = (_COLORS[cls_id] * 255).astype(np.uint8).tolist() - text = '{:.1f}%'.format(score * 100)#'{}:{:.1f}%'.format(class_names[cls_id], score * 100) - txt_color = (0, 0, 0) if np.mean(_COLORS[cls_id]) > 0.5 else (255, 255, 255) - font = cv2.FONT_HERSHEY_SIMPLEX - - txt_size = cv2.getTextSize(text, font, 0.4, 1)[0] - cv2.rectangle(img, (x0, y0), (x1, y1), color, 2) - - txt_bk_color = (_COLORS[cls_id] * 255 * 0.7).astype(np.uint8).tolist() - cv2.rectangle( - img, - (x0, y0 + 1), - (x0 + txt_size[0] + 1, y0 + int(1.5*txt_size[1])), - txt_bk_color, - -1 + +import torch + +from yolox.data.data_augment import ValTransform, sliding_window +from yolox.data.datasets import COCO_CLASSES,VOC_CLASSES +from yolox.exp import get_exp +from yolox.utils import fuse_model, get_model_info, postprocess, vis + +IMAGE_EXT = [".jpg", ".jpeg", ".webp", ".bmp", ".png"] + + +def make_parser(): + parser = argparse.ArgumentParser("YOLOX Demo!") + parser.add_argument( + "demo", default="image", help="demo type, eg. image, video and webcam" + ) + parser.add_argument("-expn", "--experiment-name", type=str, default=None) + parser.add_argument("-n", "--name", type=str, default=None, help="model name") + + parser.add_argument( + "--path", default="./assets/dog.jpg", help="path to images or video" + ) + parser.add_argument("--camid", type=int, default=0, help="webcam demo camera id") + parser.add_argument( + "--save_result", + action="store_true", + help="whether to save the inference result of image/video", + ) + + # exp file + parser.add_argument( + "-f", + "--exp_file", + default=None, + type=str, + help="please input your experiment description file", + ) + parser.add_argument("-c", "--ckpt", default=None, type=str, help="ckpt for eval") + parser.add_argument( + "--device", + default="cpu", + type=str, + help="device to run our model, can either be cpu or gpu", + ) + parser.add_argument("--conf", default=0.3, type=float, help="test conf") + parser.add_argument("--nms", default=0.3, type=float, help="test nms threshold") + parser.add_argument("--tsize", default=None, type=int, help="test img size") + parser.add_argument( + "--fp16", + dest="fp16", + default=False, + action="store_true", + help="Adopting mix precision evaluating.", + ) + parser.add_argument( + "--legacy", + dest="legacy", + default=False, + action="store_true", + help="To be compatible with older versions", + ) + parser.add_argument( + "--fuse", + dest="fuse", + default=False, + action="store_true", + help="Fuse conv and bn for testing.", + ) + parser.add_argument( + "--trt", + dest="trt", + default=False, + action="store_true", + help="Using TensorRT model for testing.", + ) + return parser + + +def get_image_list(path): + image_names = [] + for maindir, subdir, file_name_list in os.walk(path): + for filename in file_name_list: + apath = os.path.join(maindir, filename) + ext = os.path.splitext(apath)[1] + if ext in IMAGE_EXT: + image_names.append(apath) + return image_names + + +class Predictor(object): + def __init__( + self, + model, + exp, + cls_names=COCO_CLASSES, + trt_file=None, + decoder=None, + device="cpu", + fp16=False, + legacy=False, + ): + self.model = model + self.cls_names = cls_names + self.decoder = decoder + self.num_classes = exp.num_classes + self.confthre = exp.test_conf + self.nmsthre = exp.nmsthre + self.test_size = exp.test_size + self.device = device + self.fp16 = fp16 + self.preproc = ValTransform(legacy=legacy) + if trt_file is not None: + from torch2trt import TRTModule + + model_trt = TRTModule() + model_trt.load_state_dict(torch.load(trt_file)) + + x = torch.ones(1, 3, exp.test_size[0], exp.test_size[1]).cuda() + self.model(x) + self.model = model_trt + + def inference(self, img): + img_info = {"id": 0} + if isinstance(img, str): + img_info["file_name"] = os.path.basename(img) + img = cv2.imread(img) + else: + img_info["file_name"] = None + + height, width = img.shape[:2] + img_info["height"] = height + img_info["width"] = width + img_info["raw_img"] = img + + #ratio = min(self.test_size[0] / img.shape[0], self.test_size[1] / img.shape[1]) + #img_info["ratio"] = ratio + # initial + (imgW, imgH)= (img.shape[1],img.shape[0]) + (winW, winH) = (exp.test_size[1], exp.test_size[0]) + # deciding window size + if (imgH=img.shape[0]): - x0 = x0+200 - y0 = 25 + logger.info(outputs) + """ + # + if outputs[0] is None: + pass + elif len(outputs[0]) == 2: + li_outputs = [] + temp = torch.empty(1, 7) + temp[0][0] = torch.min(outputs[0][0, 0], outputs[0][1, 0]) + temp[0][1] = torch.min(outputs[0][0, 1], outputs[0][1, 1]) + temp[0][2] = torch.max(outputs[0][0, 2], outputs[0][1, 2]) + temp[0][3] = torch.max(outputs[0][0, 3], outputs[0][1, 3]) + temp[0][4] = torch.add(outputs[0][0, 4], outputs[0][1, 4]) / 2 + temp[0][5] = torch.add(outputs[0][0, 5], outputs[0][1, 5]) / 2 + temp[0][6] = torch.add(outputs[0][0, 6], outputs[0][1, 6]) / 2 + li_outputs.append(temp) + outputs = li_outputs + """ + + logger.info("Infer time: {:.4f}s".format(time.time() - t0)) + return outputs, img_info + + def visual(self, output, img_info, cls_conf=0.35): + #ratio = img_info["ratio"] + img = img_info["raw_img"] + if output is None: + font = cv2.FONT_HERSHEY_SIMPLEX + class_count = {} + class_AP = {} + for i in self.cls_names: + class_count[i] = 0 + class_AP[i] = 0.0 + x0 = 15 + y0 = 0 row = 0 + for k in class_count: + if((y0+row+50)>=img.shape[0]): + x0 = x0+200 + y0 = 25 + row = 0 + else: + row = row+25 + cv2.putText(img, str(k)+": "+str(class_count[k]), (x0,y0+row), font, 0.8, (0, 255, 255), thickness=2) + if class_count[k] !=0: + class_AP[k]=class_AP[k]/class_count[k] + else: + class_AP[k]=0.0 + row = row+25 + cv2.putText(img, "AP"+": "+'{:.1f}%'.format(class_AP[k]), (x0,y0+row), font, 0.8, (0, 255, 255), thickness=2) + return img + output = output.cpu() + + bboxes = output[:, 0:4] + + # preprocessing: resize + #bboxes /= ratio + + cls = output[:, 6] + scores = output[:, 4] * output[:, 5] + + vis_res = vis(img, bboxes, scores, cls, cls_conf, self.cls_names) + return vis_res + + +def image_demo(predictor, vis_folder, path, current_time, save_result): + if os.path.isdir(path): + files = get_image_list(path) + else: + files = [path] + files.sort() + for image_name in files: + outputs, img_info = predictor.inference(image_name) + result_image = predictor.visual(outputs[0], img_info, predictor.confthre) + if save_result: + save_folder = os.path.join( + vis_folder, time.strftime("%Y_%m_%d_%H_%M_%S", current_time) + ) + os.makedirs(save_folder, exist_ok=True) + save_file_name = os.path.join(save_folder, os.path.basename(image_name)) + logger.info("Saving detection result in {}".format(save_file_name)) + cv2.imwrite(save_file_name, result_image) + ch = cv2.waitKey(0) + if ch == 27 or ch == ord("q") or ch == ord("Q"): + break + + +def imageflow_demo(predictor, vis_folder, current_time, args): + cap = cv2.VideoCapture(args.path if args.demo == "video" else args.camid) + width = cap.get(cv2.CAP_PROP_FRAME_WIDTH) # float + height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) # float + fps = cap.get(cv2.CAP_PROP_FPS) + if args.save_result: + save_folder = os.path.join( + vis_folder, time.strftime("%Y_%m_%d_%H_%M_%S", current_time) + ) + os.makedirs(save_folder, exist_ok=True) + if args.demo == "video": + save_path = os.path.join(save_folder, os.path.basename(args.path)) + else: + save_path = os.path.join(save_folder, "camera.mp4") + logger.info(f"video save_path is {save_path}") + vid_writer = cv2.VideoWriter( + save_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (int(width), int(height)) + ) + while True: + ret_val, frame = cap.read() + if ret_val: + outputs, img_info = predictor.inference(frame) + result_frame = predictor.visual(outputs[0], img_info, predictor.confthre) + if args.save_result: + vid_writer.write(result_frame) + else: + cv2.namedWindow("yolox", cv2.WINDOW_NORMAL) + cv2.imshow("yolox", result_frame) + ch = cv2.waitKey(1) + if ch == 27 or ch == ord("q") or ch == ord("Q"): + break else: - row = row+25 - cv2.putText(img, str(k)+": "+str(class_count[k]), (x0,y0+row), font, 0.8, (0, 255, 255), thickness=2) - if class_count[k] !=0: - class_AP[k]=class_AP[k]/class_count[k] + break + + +def main(exp, args): + if not args.experiment_name: + args.experiment_name = exp.exp_name + + file_name = os.path.join(exp.output_dir, args.experiment_name) + os.makedirs(file_name, exist_ok=True) + + vis_folder = None + if args.save_result: + vis_folder = os.path.join(file_name, "vis_res") + os.makedirs(vis_folder, exist_ok=True) + + if args.trt: + args.device = "gpu" + + logger.info("Args: {}".format(args)) + + if args.conf is not None: + exp.test_conf = args.conf + if args.nms is not None: + exp.nmsthre = args.nms + if args.tsize is not None: + exp.test_size = (args.tsize, args.tsize) + + model = exp.get_model() + logger.info("Model Summary: {}".format(get_model_info(model, exp.test_size))) + + if args.device == "gpu": + model.cuda() + if args.fp16: + model.half() # to FP16 + model.eval() + + if not args.trt: + if args.ckpt is None: + ckpt_file = os.path.join(file_name, "best_ckpt.pth") else: - class_AP[k]=0.0 - row = row+25 - cv2.putText(img, "AP"+": "+'{:.1f}%'.format(class_AP[k]), (x0,y0+row), font, 0.8, (0, 255, 255), thickness=2) - return img - - -_COLORS = np.array( - [ - 0.000, 0.447, 0.741, - 0.850, 0.325, 0.098, - 0.929, 0.694, 0.125, - 0.494, 0.184, 0.556, - 0.466, 0.674, 0.188, - 0.301, 0.745, 0.933, - 0.635, 0.078, 0.184, - 0.300, 0.300, 0.300, - 0.600, 0.600, 0.600, - 1.000, 0.000, 0.000, - 1.000, 0.500, 0.000, - 0.749, 0.749, 0.000, - 0.000, 1.000, 0.000, - 0.000, 0.000, 1.000, - 0.667, 0.000, 1.000, - 0.333, 0.333, 0.000, - 0.333, 0.667, 0.000, - 0.333, 1.000, 0.000, - 0.667, 0.333, 0.000, - 0.667, 0.667, 0.000, - 0.667, 1.000, 0.000, - 1.000, 0.333, 0.000, - 1.000, 0.667, 0.000, - 1.000, 1.000, 0.000, - 0.000, 0.333, 0.500, - 0.000, 0.667, 0.500, - 0.000, 1.000, 0.500, - 0.333, 0.000, 0.500, - 0.333, 0.333, 0.500, - 0.333, 0.667, 0.500, - 0.333, 1.000, 0.500, - 0.667, 0.000, 0.500, - 0.667, 0.333, 0.500, - 0.667, 0.667, 0.500, - 0.667, 1.000, 0.500, - 1.000, 0.000, 0.500, - 1.000, 0.333, 0.500, - 1.000, 0.667, 0.500, - 1.000, 1.000, 0.500, - 0.000, 0.333, 1.000, - 0.000, 0.667, 1.000, - 0.000, 1.000, 1.000, - 0.333, 0.000, 1.000, - 0.333, 0.333, 1.000, - 0.333, 0.667, 1.000, - 0.333, 1.000, 1.000, - 0.667, 0.000, 1.000, - 0.667, 0.333, 1.000, - 0.667, 0.667, 1.000, - 0.667, 1.000, 1.000, - 1.000, 0.000, 1.000, - 1.000, 0.333, 1.000, - 1.000, 0.667, 1.000, - 0.333, 0.000, 0.000, - 0.500, 0.000, 0.000, - 0.667, 0.000, 0.000, - 0.833, 0.000, 0.000, - 1.000, 0.000, 0.000, - 0.000, 0.167, 0.000, - 0.000, 0.333, 0.000, - 0.000, 0.500, 0.000, - 0.000, 0.667, 0.000, - 0.000, 0.833, 0.000, - 0.000, 1.000, 0.000, - 0.000, 0.000, 0.167, - 0.000, 0.000, 0.333, - 0.000, 0.000, 0.500, - 0.000, 0.000, 0.667, - 0.000, 0.000, 0.833, - 0.000, 0.000, 1.000, - 0.000, 0.000, 0.000, - 0.143, 0.143, 0.143, - 0.286, 0.286, 0.286, - 0.429, 0.429, 0.429, - 0.571, 0.571, 0.571, - 0.714, 0.714, 0.714, - 0.857, 0.857, 0.857, - 0.000, 0.447, 0.741, - 0.314, 0.717, 0.741, - 0.50, 0.5, 0 - ] -).astype(np.float32).reshape(-1, 3) + ckpt_file = args.ckpt + logger.info("loading checkpoint") + ckpt = torch.load(ckpt_file, map_location="cpu") + # load the model state dict + model.load_state_dict(ckpt["model"]) + logger.info("loaded checkpoint done.") + + if args.fuse: + logger.info("\tFusing model...") + model = fuse_model(model) + + if args.trt: + assert not args.fuse, "TensorRT model is not support model fusing!" + trt_file = os.path.join(file_name, "model_trt.pth") + assert os.path.exists( + trt_file + ), "TensorRT model is not found!\n Run python3 tools/trt.py first!" + model.head.decode_in_inference = False + decoder = model.head.decode_outputs + logger.info("Using TensorRT to inference") + else: + trt_file = None + decoder = None + + predictor = Predictor( + model, exp, COCO_CLASSES, trt_file, decoder, + args.device, args.fp16, args.legacy, + ) + current_time = time.localtime() + if args.demo == "image": + image_demo(predictor, vis_folder, args.path, current_time, args.save_result) + elif args.demo == "video" or args.demo == "webcam": + imageflow_demo(predictor, vis_folder, current_time, args) + + +if __name__ == "__main__": + args = make_parser().parse_args() + exp = get_exp(args.exp_file, args.name) + + main(exp, args) From e261097aa533dbf0c1959ac24c7923c2be6a5127 Mon Sep 17 00:00:00 2001 From: lujulia <39236354+lujulia@users.noreply.github.com> Date: Fri, 15 Jul 2022 17:23:53 +0800 Subject: [PATCH 55/59] Update visualize.py --- yolox/utils/visualize.py | 541 +++++++++++---------------------------- 1 file changed, 145 insertions(+), 396 deletions(-) diff --git a/yolox/utils/visualize.py b/yolox/utils/visualize.py index e733a5dec..16aa9dee5 100644 --- a/yolox/utils/visualize.py +++ b/yolox/utils/visualize.py @@ -1,404 +1,153 @@ #!/usr/bin/env python3 # -*- coding:utf-8 -*- -# Copyright (c) Megvii, Inc. and its affiliates. - -import argparse -import os -import time -from loguru import logger +# Copyright (c) Megvii Inc. All rights reserved. import cv2 - -import torch - -from yolox.data.data_augment import ValTransform, sliding_window -from yolox.data.datasets import COCO_CLASSES,VOC_CLASSES -from yolox.exp import get_exp -from yolox.utils import fuse_model, get_model_info, postprocess, vis - -IMAGE_EXT = [".jpg", ".jpeg", ".webp", ".bmp", ".png"] - - -def make_parser(): - parser = argparse.ArgumentParser("YOLOX Demo!") - parser.add_argument( - "demo", default="image", help="demo type, eg. image, video and webcam" - ) - parser.add_argument("-expn", "--experiment-name", type=str, default=None) - parser.add_argument("-n", "--name", type=str, default=None, help="model name") - - parser.add_argument( - "--path", default="./assets/dog.jpg", help="path to images or video" - ) - parser.add_argument("--camid", type=int, default=0, help="webcam demo camera id") - parser.add_argument( - "--save_result", - action="store_true", - help="whether to save the inference result of image/video", - ) - - # exp file - parser.add_argument( - "-f", - "--exp_file", - default=None, - type=str, - help="please input your experiment description file", - ) - parser.add_argument("-c", "--ckpt", default=None, type=str, help="ckpt for eval") - parser.add_argument( - "--device", - default="cpu", - type=str, - help="device to run our model, can either be cpu or gpu", - ) - parser.add_argument("--conf", default=0.3, type=float, help="test conf") - parser.add_argument("--nms", default=0.3, type=float, help="test nms threshold") - parser.add_argument("--tsize", default=None, type=int, help="test img size") - parser.add_argument( - "--fp16", - dest="fp16", - default=False, - action="store_true", - help="Adopting mix precision evaluating.", - ) - parser.add_argument( - "--legacy", - dest="legacy", - default=False, - action="store_true", - help="To be compatible with older versions", - ) - parser.add_argument( - "--fuse", - dest="fuse", - default=False, - action="store_true", - help="Fuse conv and bn for testing.", - ) - parser.add_argument( - "--trt", - dest="trt", - default=False, - action="store_true", - help="Using TensorRT model for testing.", - ) - return parser - - -def get_image_list(path): - image_names = [] - for maindir, subdir, file_name_list in os.walk(path): - for filename in file_name_list: - apath = os.path.join(maindir, filename) - ext = os.path.splitext(apath)[1] - if ext in IMAGE_EXT: - image_names.append(apath) - return image_names - - -class Predictor(object): - def __init__( - self, - model, - exp, - cls_names=COCO_CLASSES, - trt_file=None, - decoder=None, - device="cpu", - fp16=False, - legacy=False, - ): - self.model = model - self.cls_names = cls_names - self.decoder = decoder - self.num_classes = exp.num_classes - self.confthre = exp.test_conf - self.nmsthre = exp.nmsthre - self.test_size = exp.test_size - self.device = device - self.fp16 = fp16 - self.preproc = ValTransform(legacy=legacy) - if trt_file is not None: - from torch2trt import TRTModule - - model_trt = TRTModule() - model_trt.load_state_dict(torch.load(trt_file)) - - x = torch.ones(1, 3, exp.test_size[0], exp.test_size[1]).cuda() - self.model(x) - self.model = model_trt - - def inference(self, img): - img_info = {"id": 0} - if isinstance(img, str): - img_info["file_name"] = os.path.basename(img) - img = cv2.imread(img) - else: - img_info["file_name"] = None - - height, width = img.shape[:2] - img_info["height"] = height - img_info["width"] = width - img_info["raw_img"] = img - - #ratio = min(self.test_size[0] / img.shape[0], self.test_size[1] / img.shape[1]) - #img_info["ratio"] = ratio - # initial - (imgW, imgH)= (img.shape[1],img.shape[0]) - (winW, winH) = (exp.test_size[1], exp.test_size[0]) - # deciding window size - if (imgH 0.5 else (255, 255, 255) + font = cv2.FONT_HERSHEY_SIMPLEX + + txt_size = cv2.getTextSize(text, font, 0.4, 1)[0] + cv2.rectangle(img, (x0, y0), (x1, y1), color, 2) + + txt_bk_color = (_COLORS[cls_id] * 255 * 0.7).astype(np.uint8).tolist() + cv2.rectangle( + img, + (x0, y0 + 1), + (x0 + txt_size[0] + 1, y0 + int(1.5*txt_size[1])), + txt_bk_color, + -1 ) - logger.info(outputs) - """ - # - if outputs[0] is None: - pass - elif len(outputs[0]) == 2: - li_outputs = [] - temp = torch.empty(1, 7) - temp[0][0] = torch.min(outputs[0][0, 0], outputs[0][1, 0]) - temp[0][1] = torch.min(outputs[0][0, 1], outputs[0][1, 1]) - temp[0][2] = torch.max(outputs[0][0, 2], outputs[0][1, 2]) - temp[0][3] = torch.max(outputs[0][0, 3], outputs[0][1, 3]) - temp[0][4] = torch.add(outputs[0][0, 4], outputs[0][1, 4]) / 2 - temp[0][5] = torch.add(outputs[0][0, 5], outputs[0][1, 5]) / 2 - temp[0][6] = torch.add(outputs[0][0, 6], outputs[0][1, 6]) / 2 - li_outputs.append(temp) - outputs = li_outputs - """ - - logger.info("Infer time: {:.4f}s".format(time.time() - t0)) - return outputs, img_info - - def visual(self, output, img_info, cls_conf=0.35): - #ratio = img_info["ratio"] - img = img_info["raw_img"] - if output is None: - font = cv2.FONT_HERSHEY_SIMPLEX - class_count = {} - class_AP = {} - for i in self.cls_names: - class_count[i] = 0 - class_AP[i] = 0.0 - x0 = 15 - y0 = 0 + class_count[class_names[cls_id]] = class_count[class_names[cls_id]]+1 + class_AP[class_names[cls_id]] = class_AP[class_names[cls_id]]+float('{:.1f}'.format(score * 100)) + cv2.putText(img, text, (x0, y0 + txt_size[1]), font, 0.4, txt_color, thickness=1) + + x0 = 15 + y0 = 0 + row = 0 + for k in class_count: + if((y0+row+50)>=img.shape[0]): + x0 = x0+200 + y0 = 25 row = 0 - for k in class_count: - if((y0+row+50)>=img.shape[0]): - x0 = x0+200 - y0 = 25 - row = 0 - else: - row = row+25 - cv2.putText(img, str(k)+": "+str(class_count[k]), (x0,y0+row), font, 0.8, (0, 255, 255), thickness=2) - if class_count[k] !=0: - class_AP[k]=class_AP[k]/class_count[k] - else: - class_AP[k]=0.0 - row = row+25 - cv2.putText(img, "AP"+": "+'{:.1f}%'.format(class_AP[k]), (x0,y0+row), font, 0.8, (0, 255, 255), thickness=2) - return img - output = output.cpu() - - bboxes = output[:, 0:4] - - # preprocessing: resize - #bboxes /= ratio - - cls = output[:, 6] - scores = output[:, 4] * output[:, 5] - - vis_res = vis(img, bboxes, scores, cls, cls_conf, self.cls_names) - return vis_res - - -def image_demo(predictor, vis_folder, path, current_time, save_result): - if os.path.isdir(path): - files = get_image_list(path) - else: - files = [path] - files.sort() - for image_name in files: - outputs, img_info = predictor.inference(image_name) - result_image = predictor.visual(outputs[0], img_info, predictor.confthre) - if save_result: - save_folder = os.path.join( - vis_folder, time.strftime("%Y_%m_%d_%H_%M_%S", current_time) - ) - os.makedirs(save_folder, exist_ok=True) - save_file_name = os.path.join(save_folder, os.path.basename(image_name)) - logger.info("Saving detection result in {}".format(save_file_name)) - cv2.imwrite(save_file_name, result_image) - ch = cv2.waitKey(0) - if ch == 27 or ch == ord("q") or ch == ord("Q"): - break - - -def imageflow_demo(predictor, vis_folder, current_time, args): - cap = cv2.VideoCapture(args.path if args.demo == "video" else args.camid) - width = cap.get(cv2.CAP_PROP_FRAME_WIDTH) # float - height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) # float - fps = cap.get(cv2.CAP_PROP_FPS) - if args.save_result: - save_folder = os.path.join( - vis_folder, time.strftime("%Y_%m_%d_%H_%M_%S", current_time) - ) - os.makedirs(save_folder, exist_ok=True) - if args.demo == "video": - save_path = os.path.join(save_folder, os.path.basename(args.path)) - else: - save_path = os.path.join(save_folder, "camera.mp4") - logger.info(f"video save_path is {save_path}") - vid_writer = cv2.VideoWriter( - save_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (int(width), int(height)) - ) - while True: - ret_val, frame = cap.read() - if ret_val: - outputs, img_info = predictor.inference(frame) - result_frame = predictor.visual(outputs[0], img_info, predictor.confthre) - if args.save_result: - vid_writer.write(result_frame) - else: - cv2.namedWindow("yolox", cv2.WINDOW_NORMAL) - cv2.imshow("yolox", result_frame) - ch = cv2.waitKey(1) - if ch == 27 or ch == ord("q") or ch == ord("Q"): - break else: - break - - -def main(exp, args): - if not args.experiment_name: - args.experiment_name = exp.exp_name - - file_name = os.path.join(exp.output_dir, args.experiment_name) - os.makedirs(file_name, exist_ok=True) - - vis_folder = None - if args.save_result: - vis_folder = os.path.join(file_name, "vis_res") - os.makedirs(vis_folder, exist_ok=True) - - if args.trt: - args.device = "gpu" - - logger.info("Args: {}".format(args)) - - if args.conf is not None: - exp.test_conf = args.conf - if args.nms is not None: - exp.nmsthre = args.nms - if args.tsize is not None: - exp.test_size = (args.tsize, args.tsize) - - model = exp.get_model() - logger.info("Model Summary: {}".format(get_model_info(model, exp.test_size))) - - if args.device == "gpu": - model.cuda() - if args.fp16: - model.half() # to FP16 - model.eval() - - if not args.trt: - if args.ckpt is None: - ckpt_file = os.path.join(file_name, "best_ckpt.pth") + row = row+25 + cv2.putText(img, str(k)+": "+str(class_count[k]), (x0,y0+row), font, 0.8, (0, 255, 255), thickness=2) + if class_count[k] !=0: + class_AP[k]=class_AP[k]/class_count[k] else: - ckpt_file = args.ckpt - logger.info("loading checkpoint") - ckpt = torch.load(ckpt_file, map_location="cpu") - # load the model state dict - model.load_state_dict(ckpt["model"]) - logger.info("loaded checkpoint done.") - - if args.fuse: - logger.info("\tFusing model...") - model = fuse_model(model) - - if args.trt: - assert not args.fuse, "TensorRT model is not support model fusing!" - trt_file = os.path.join(file_name, "model_trt.pth") - assert os.path.exists( - trt_file - ), "TensorRT model is not found!\n Run python3 tools/trt.py first!" - model.head.decode_in_inference = False - decoder = model.head.decode_outputs - logger.info("Using TensorRT to inference") - else: - trt_file = None - decoder = None - - predictor = Predictor( - model, exp, COCO_CLASSES, trt_file, decoder, - args.device, args.fp16, args.legacy, - ) - current_time = time.localtime() - if args.demo == "image": - image_demo(predictor, vis_folder, args.path, current_time, args.save_result) - elif args.demo == "video" or args.demo == "webcam": - imageflow_demo(predictor, vis_folder, current_time, args) - - -if __name__ == "__main__": - args = make_parser().parse_args() - exp = get_exp(args.exp_file, args.name) - - main(exp, args) + class_AP[k]=0.0 + row = row+25 + cv2.putText(img, "AP"+": "+'{:.1f}%'.format(class_AP[k]), (x0,y0+row), font, 0.8, (0, 255, 255), thickness=2) + + return img + + +_COLORS = np.array( + [ + 0.000, 0.447, 0.741, + 0.850, 0.325, 0.098, + 0.929, 0.694, 0.125, + 0.494, 0.184, 0.556, + 0.466, 0.674, 0.188, + 0.301, 0.745, 0.933, + 0.635, 0.078, 0.184, + 0.300, 0.300, 0.300, + 0.600, 0.600, 0.600, + 1.000, 0.000, 0.000, + 1.000, 0.500, 0.000, + 0.749, 0.749, 0.000, + 0.000, 1.000, 0.000, + 0.000, 0.000, 1.000, + 0.667, 0.000, 1.000, + 0.333, 0.333, 0.000, + 0.333, 0.667, 0.000, + 0.333, 1.000, 0.000, + 0.667, 0.333, 0.000, + 0.667, 0.667, 0.000, + 0.667, 1.000, 0.000, + 1.000, 0.333, 0.000, + 1.000, 0.667, 0.000, + 1.000, 1.000, 0.000, + 0.000, 0.333, 0.500, + 0.000, 0.667, 0.500, + 0.000, 1.000, 0.500, + 0.333, 0.000, 0.500, + 0.333, 0.333, 0.500, + 0.333, 0.667, 0.500, + 0.333, 1.000, 0.500, + 0.667, 0.000, 0.500, + 0.667, 0.333, 0.500, + 0.667, 0.667, 0.500, + 0.667, 1.000, 0.500, + 1.000, 0.000, 0.500, + 1.000, 0.333, 0.500, + 1.000, 0.667, 0.500, + 1.000, 1.000, 0.500, + 0.000, 0.333, 1.000, + 0.000, 0.667, 1.000, + 0.000, 1.000, 1.000, + 0.333, 0.000, 1.000, + 0.333, 0.333, 1.000, + 0.333, 0.667, 1.000, + 0.333, 1.000, 1.000, + 0.667, 0.000, 1.000, + 0.667, 0.333, 1.000, + 0.667, 0.667, 1.000, + 0.667, 1.000, 1.000, + 1.000, 0.000, 1.000, + 1.000, 0.333, 1.000, + 1.000, 0.667, 1.000, + 0.333, 0.000, 0.000, + 0.500, 0.000, 0.000, + 0.667, 0.000, 0.000, + 0.833, 0.000, 0.000, + 1.000, 0.000, 0.000, + 0.000, 0.167, 0.000, + 0.000, 0.333, 0.000, + 0.000, 0.500, 0.000, + 0.000, 0.667, 0.000, + 0.000, 0.833, 0.000, + 0.000, 1.000, 0.000, + 0.000, 0.000, 0.167, + 0.000, 0.000, 0.333, + 0.000, 0.000, 0.500, + 0.000, 0.000, 0.667, + 0.000, 0.000, 0.833, + 0.000, 0.000, 1.000, + 0.000, 0.000, 0.000, + 0.143, 0.143, 0.143, + 0.286, 0.286, 0.286, + 0.429, 0.429, 0.429, + 0.571, 0.571, 0.571, + 0.714, 0.714, 0.714, + 0.857, 0.857, 0.857, + 0.000, 0.447, 0.741, + 0.314, 0.717, 0.741, + 0.50, 0.5, 0 + ] +).astype(np.float32).reshape(-1, 3) From cf8685e46cf2465b01811b22c5f31e0998d096a2 Mon Sep 17 00:00:00 2001 From: "I-CHEN,LU" <39236354+lujulia@users.noreply.github.com> Date: Sat, 9 Sep 2023 09:27:52 +0800 Subject: [PATCH 56/59] Update README.md --- README.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 9f64852ca..7d78808a2 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,9 @@
+
+ + ## Introduction YOLOX is an anchor-free version of YOLO, with a simpler design but better performance! It aims to bridge the gap between research and industrial communities. For more details, please refer to our [report on Arxiv](https://arxiv.org/abs/2107.08430). @@ -246,4 +249,4 @@ It is hoped that every AI practitioner in the world will stick to the concept of
没有孙剑博士的指导,YOLOX也不会问世并开源给社区使用。 孙剑博士的离去是CV领域的一大损失,我们在此特别添加了这个部分来表达对我们的“船长”孙老师的纪念和哀思。 -希望世界上的每个AI从业者秉持着“持续创新拓展认知边界,非凡科技成就产品价值”的观念,一路向前。 \ No newline at end of file +希望世界上的每个AI从业者秉持着“持续创新拓展认知边界,非凡科技成就产品价值”的观念,一路向前。 From ff2a91c73bd9d16df300c6dac5b3f1441ebbbd7a Mon Sep 17 00:00:00 2001 From: "I-CHEN,LU" <39236354+lujulia@users.noreply.github.com> Date: Sat, 9 Sep 2023 09:28:10 +0800 Subject: [PATCH 57/59] Update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 7d78808a2..9cd3e3272 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,4 @@ +
From 8fb0fbe3dc75aa9b90ddcb664232c48a659947f5 Mon Sep 17 00:00:00 2001 From: "I-CHEN,LU" <39236354+lujulia@users.noreply.github.com> Date: Wed, 27 Sep 2023 22:36:00 +0800 Subject: [PATCH 58/59] Update demo_sliding_window.py --- tools/demo_sliding_window.py | 57 +++++++++++++++++++++++++++--------- 1 file changed, 43 insertions(+), 14 deletions(-) diff --git a/tools/demo_sliding_window.py b/tools/demo_sliding_window.py index 29846e4aa..7a0e20f00 100644 --- a/tools/demo_sliding_window.py +++ b/tools/demo_sliding_window.py @@ -11,7 +11,7 @@ import torch -from yolox.data.data_augment import ValTransform, sliding_window +from yolox.data.data_augment import preproc, sliding_window from yolox.data.datasets import COCO_CLASSES, VOC_CLASSES from yolox.exp import get_exp from yolox.utils import fuse_model, get_model_info, postprocess, vis @@ -62,6 +62,7 @@ def make_parser(): action="store_true", help="Adopting mix precision evaluating.", ) + """ parser.add_argument( "--legacy", dest="legacy", @@ -69,6 +70,7 @@ def make_parser(): action="store_true", help="To be compatible with older versions", ) + """ parser.add_argument( "--fuse", dest="fuse", @@ -106,8 +108,8 @@ def __init__( trt_file=None, decoder=None, device="cpu", - fp16=False, - legacy=False, + #fp16=False, + #legacy=False, ): self.model = model self.cls_names = cls_names @@ -117,8 +119,8 @@ def __init__( self.nmsthre = exp.nmsthre self.test_size = exp.test_size self.device = device - self.fp16 = fp16 - self.preproc = ValTransform(legacy=legacy) + #self.fp16 = fp16 + #self.preproc = ValTransform(legacy=legacy) if trt_file is not None: from torch2trt import TRTModule @@ -128,6 +130,9 @@ def __init__( x = torch.ones(1, 3, exp.test_size[0], exp.test_size[1]).cuda() self.model(x) self.model = model_trt + self.rgb_means = (0.485, 0.456, 0.406) + self.std = (0.229, 0.224, 0.225) + def inference(self, img): img_info = {"id": 0} @@ -141,9 +146,33 @@ def inference(self, img): img_info["height"] = height img_info["width"] = width img_info["raw_img"] = img - + + #img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT,value=(0,0,0)) #ratio = min(self.test_size[0] / img.shape[0], self.test_size[1] / img.shape[1]) #img_info["ratio"] = ratio + """ + if (img.shape[0]>exp.test_size[0]): + h_r = (img.shape[0]//exp.test_size[0]+1)*exp.test_size[0]-img.shape[0] + elif(img.shape[0]exp.test_size[1]): + w_r = (img.shape[1]//exp.test_size[1]+1)*exp.test_size[1]-img.shape[1] + elif(img.shape[1] Date: Wed, 27 Sep 2023 22:36:32 +0800 Subject: [PATCH 59/59] Update demo_sliding_window.py