1- """Optimizing AUROC loss on imbalanced dataset**
1+ """02_Optimizing_AUROC_with_ResNet20_on_Imbalanced_CIFAR10.ipynb
22
3- Author: Zhuoning Yuan
3+ ** Author** : Zhuoning Yuan
44
5- If you find this tutorial helpful in your work, please acknowledge our library and cite the following paper:
5+ **Introduction**
6+ In this tutorial, you will learn how to quickly train a ResNet20 model by optimizing **AUROC** using our novel [AUCMLoss](https://arxiv.org/abs/2012.03173) and `PESG` optimizer on a binary image classification task on Cifar10. After completion of this tutorial, you should be able to use LibAUC to train your own models on your own datasets.
7+
8+ **Useful Resources**:
9+ * Website: https://libauc.org
10+ * Github: https://github.com/Optimization-AI/LibAUC
611
12+ **Reference**:
13+ If you find this tutorial helpful in your work, please acknowledge our library and cite the following paper:
714@inproceedings{yuan2021large,
815 title={Large-scale robust deep auc maximization: A new surrogate loss and empirical studies on medical image classification},
916 author={Yuan, Zhuoning and Yan, Yan and Sonka, Milan and Yang, Tianbao},
1017 booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision},
1118 pages={3040--3049},
1219 year={2021}
1320}
14-
15- @misc{libauc2022,
16- title={LibAUC: A Deep Learning Library for X-Risk Optimization.},
17- author={Zhuoning Yuan, Zi-Hao Qiu, Gang Li, Dixian Zhu, Zhishuai Guo, Quanqi Hu, Bokun Wang, Qi Qi, Yongjian Zhong, Tianbao Yang},
18- year={2022}
19- }
2021"""
2122
23+
2224from libauc .losses import AUCMLoss
2325from libauc .optimizers import PESG
2426from libauc .models import resnet20 as ResNet20
2527from libauc .datasets import CIFAR10
2628from libauc .utils import ImbalancedDataGenerator
2729from libauc .sampler import DualSampler
30+ from libauc .metrics import auc_roc_score
2831
2932import torch
3033from PIL import Image
3336from torch .utils .data import Dataset
3437from sklearn .metrics import roc_auc_score
3538
36-
3739def set_all_seeds (SEED ):
3840 # REPRODUCIBILITY
3941 torch .manual_seed (SEED )
@@ -71,82 +73,122 @@ def __getitem__(self, idx):
7173 return image , target
7274
7375
74- # paramaters
76+ # HyperParameters
7577SEED = 123
7678BATCH_SIZE = 128
7779imratio = 0.1 # for demo
80+ total_epochs = 100
81+ decay_epochs = [50 , 75 ]
82+
7883lr = 0.1
79- gamma = 500
80- weight_decay = 1e-4
8184margin = 1.0
85+ epoch_decay = 0.003 # refers gamma in the paper
86+ weight_decay = 0.0001
8287
88+ # oversampling minority class, you can tune it in (0, 0.5]
89+ # e.g., sampling_rate=0.2 is that num of positive samples in mini-batch is sampling_rate*batch_size=13
90+ sampling_rate = 0.2
8391
84- # dataloader
92+ # load data as numpy arrays
8593train_data , train_targets = CIFAR10 (root = './data' , train = True )
8694test_data , test_targets = CIFAR10 (root = './data' , train = False )
8795
96+ # generate imbalanced data
8897generator = ImbalancedDataGenerator (verbose = True , random_seed = 0 )
8998(train_images , train_labels ) = generator .transform (train_data , train_targets , imratio = imratio )
9099(test_images , test_labels ) = generator .transform (test_data , test_targets , imratio = 0.5 )
91100
92- trainloader = torch .utils .data .DataLoader (ImageDataset (train_images , train_labels ), batch_size = BATCH_SIZE , shuffle = True , num_workers = 1 , pin_memory = True , drop_last = True )
93- testloader = torch .utils .data .DataLoader ( ImageDataset (test_images , test_labels , mode = 'test' ), batch_size = BATCH_SIZE , shuffle = False , num_workers = 1 , pin_memory = True )
101+ # data augmentations
102+ trainSet = ImageDataset (train_images , train_labels )
103+ trainSet_eval = ImageDataset (train_images , train_labels , mode = 'test' )
104+ testSet = ImageDataset (test_images , test_labels , mode = 'test' )
94105
106+ # dataloaders
107+ sampler = DualSampler (trainSet , BATCH_SIZE , sampling_rate = sampling_rate )
108+ trainloader = torch .utils .data .DataLoader (trainSet , batch_size = BATCH_SIZE , sampler = sampler , num_workers = 2 )
109+ trainloader_eval = torch .utils .data .DataLoader (trainSet_eval , batch_size = BATCH_SIZE , shuffle = False , num_workers = 2 )
110+ testloader = torch .utils .data .DataLoader (testSet , batch_size = BATCH_SIZE , shuffle = False , num_workers = 2 )
95111
112+ """# **Creating models & AUC Optimizer**"""
113+ # You can include sigmoid/l2 activations on model's outputs before computing loss
96114model = ResNet20 (pretrained = False , last_activation = None , num_classes = 1 )
97115model = model .cuda ()
98116
99- Loss = AUCMLoss ()
117+ # You can also pass Loss.a, Loss.b, Loss.alpha to optimizer (for old version users)
118+ loss_fn = AUCMLoss ()
100119optimizer = PESG (model ,
101- a = Loss .a ,
102- b = Loss .b ,
103- alpha = Loss .alpha ,
120+ loss_fn = loss_fn ,
104121 lr = lr ,
105- gamma = gamma ,
122+ momentum = 0.9 ,
106123 margin = margin ,
124+ epoch_decay = epoch_decay ,
107125 weight_decay = weight_decay )
108126
109127
128+ """# **Training**"""
110129print ('Start Training' )
111130print ('-' * 30 )
112- for epoch in range (100 ):
113-
114- if epoch == 50 or epoch == 75 :
115- # decrease learning rate by 10x & update regularizer
116- optimizer .update_regularizer (decay_factor = 10 )
131+
132+ train_log = []
133+ test_log = []
134+ for epoch in range (total_epochs ):
135+ if epoch in decay_epochs :
136+ optimizer .update_regularizer (decay_factor = 10 ) # decrease learning rate by 10x & update regularizer
117137
118- train_pred = []
119- train_true = []
138+ train_loss = []
120139 model .train ()
121140 for data , targets in trainloader :
122141 data , targets = data .cuda (), targets .cuda ()
123142 y_pred = model (data )
124143 y_pred = torch .sigmoid (y_pred )
125- loss = Loss (y_pred , targets )
144+ loss = loss_fn (y_pred , targets )
126145 optimizer .zero_grad ()
127146 loss .backward ()
128147 optimizer .step ()
129-
130- train_pred .append (y_pred .cpu ().detach ().numpy ())
131- train_true .append (targets .cpu ().detach ().numpy ())
132-
133- train_true = np .concatenate (train_true )
134- train_pred = np .concatenate (train_pred )
135- train_auc = roc_auc_score (train_true , train_pred )
136-
148+ train_loss .append (loss .item ())
149+
150+ # evaluation on train & test sets
137151 model .eval ()
138- test_pred = []
139- test_true = []
140- for j , data in enumerate (testloader ):
141- test_data , test_targets = data
152+ train_pred_list = []
153+ train_true_list = []
154+ for train_data , train_targets in trainloader_eval :
155+ train_data = train_data .cuda ()
156+ train_pred = model (train_data )
157+ train_pred_list .append (train_pred .cpu ().detach ().numpy ())
158+ train_true_list .append (train_targets .numpy ())
159+ train_true = np .concatenate (train_true_list )
160+ train_pred = np .concatenate (train_pred_list )
161+ train_auc = auc_roc_score (train_true , train_pred )
162+ train_loss = np .mean (train_loss )
163+
164+ test_pred_list = []
165+ test_true_list = []
166+ for test_data , test_targets in testloader :
142167 test_data = test_data .cuda ()
143- y_pred = model (test_data )
144- test_pred .append (y_pred .cpu ().detach ().numpy ())
145- test_true .append (test_targets .numpy ())
146- test_true = np .concatenate (test_true )
147- test_pred = np .concatenate (test_pred )
148- val_auc = roc_auc_score (test_true , test_pred )
168+ test_pred = model (test_data )
169+ test_pred_list .append (test_pred .cpu ().detach ().numpy ())
170+ test_true_list .append (test_targets .numpy ())
171+ test_true = np .concatenate (test_true_list )
172+ test_pred = np .concatenate (test_pred_list )
173+ val_auc = auc_roc_score (test_true , test_pred )
149174 model .train ()
150-
175+
151176 # print results
152- print ("epoch: {}, train_loss: {:4f}, train_auc:{:4f}, test_auc:{:4f}, lr:{:4f}" .format (epoch , loss .item (), train_auc , val_auc , optimizer .lr ))
177+ print ("epoch: %s, train_loss: %.4f, train_auc: %.4f, test_auc: %.4f, lr: %.4f" % (epoch , train_loss , train_auc , val_auc , optimizer .lr ))
178+ train_log .append (train_auc )
179+ test_log .append (val_auc )
180+
181+
182+ """# **Visualization**
183+ Now, let's see the learning curve of optimizing AUROC on train and tes sets.
184+ """
185+ import matplotlib .pyplot as plt
186+ plt .rcParams ["figure.figsize" ] = (9 ,5 )
187+ x = np .arange (len (train_log ))
188+ plt .figure ()
189+ plt .plot (x , train_log , LineStyle = '-' , label = 'Train Set' , linewidth = 3 )
190+ plt .plot (x , test_log , LineStyle = '-' , label = 'Test Set' , linewidth = 3 )
191+ plt .title ('AUCMLoss (10% CIFAR10)' ,fontsize = 25 )
192+ plt .legend (fontsize = 15 )
193+ plt .ylabel ('AUROC' , fontsize = 25 )
194+ plt .xlabel ('Epoch' , fontsize = 25 )
0 commit comments