Skip to content

Commit ff80f90

Browse files
authored
Add files via upload
1 parent ddb615a commit ff80f90

File tree

3 files changed

+372
-166
lines changed

3 files changed

+372
-166
lines changed
Lines changed: 91 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,33 @@
1-
"""Optimizing AUROC loss on imbalanced dataset**
1+
"""02_Optimizing_AUROC_with_ResNet20_on_Imbalanced_CIFAR10.ipynb
22
3-
Author: Zhuoning Yuan
3+
**Author**: Zhuoning Yuan
44
5-
If you find this tutorial helpful in your work, please acknowledge our library and cite the following paper:
5+
**Introduction**
6+
In this tutorial, you will learn how to quickly train a ResNet20 model by optimizing **AUROC** using our novel [AUCMLoss](https://arxiv.org/abs/2012.03173) and `PESG` optimizer on a binary image classification task on Cifar10. After completion of this tutorial, you should be able to use LibAUC to train your own models on your own datasets.
7+
8+
**Useful Resources**:
9+
* Website: https://libauc.org
10+
* Github: https://github.com/Optimization-AI/LibAUC
611
12+
**Reference**:
13+
If you find this tutorial helpful in your work, please acknowledge our library and cite the following paper:
714
@inproceedings{yuan2021large,
815
title={Large-scale robust deep auc maximization: A new surrogate loss and empirical studies on medical image classification},
916
author={Yuan, Zhuoning and Yan, Yan and Sonka, Milan and Yang, Tianbao},
1017
booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision},
1118
pages={3040--3049},
1219
year={2021}
1320
}
14-
15-
@misc{libauc2022,
16-
title={LibAUC: A Deep Learning Library for X-Risk Optimization.},
17-
author={Zhuoning Yuan, Zi-Hao Qiu, Gang Li, Dixian Zhu, Zhishuai Guo, Quanqi Hu, Bokun Wang, Qi Qi, Yongjian Zhong, Tianbao Yang},
18-
year={2022}
19-
}
2021
"""
2122

23+
2224
from libauc.losses import AUCMLoss
2325
from libauc.optimizers import PESG
2426
from libauc.models import resnet20 as ResNet20
2527
from libauc.datasets import CIFAR10
2628
from libauc.utils import ImbalancedDataGenerator
2729
from libauc.sampler import DualSampler
30+
from libauc.metrics import auc_roc_score
2831

2932
import torch
3033
from PIL import Image
@@ -33,7 +36,6 @@
3336
from torch.utils.data import Dataset
3437
from sklearn.metrics import roc_auc_score
3538

36-
3739
def set_all_seeds(SEED):
3840
# REPRODUCIBILITY
3941
torch.manual_seed(SEED)
@@ -71,82 +73,122 @@ def __getitem__(self, idx):
7173
return image, target
7274

7375

74-
# paramaters
76+
# HyperParameters
7577
SEED = 123
7678
BATCH_SIZE = 128
7779
imratio = 0.1 # for demo
80+
total_epochs = 100
81+
decay_epochs = [50, 75]
82+
7883
lr = 0.1
79-
gamma = 500
80-
weight_decay = 1e-4
8184
margin = 1.0
85+
epoch_decay = 0.003 # refers gamma in the paper
86+
weight_decay = 0.0001
8287

88+
# oversampling minority class, you can tune it in (0, 0.5]
89+
# e.g., sampling_rate=0.2 is that num of positive samples in mini-batch is sampling_rate*batch_size=13
90+
sampling_rate = 0.2
8391

84-
# dataloader
92+
# load data as numpy arrays
8593
train_data, train_targets = CIFAR10(root='./data', train=True)
8694
test_data, test_targets = CIFAR10(root='./data', train=False)
8795

96+
# generate imbalanced data
8897
generator = ImbalancedDataGenerator(verbose=True, random_seed=0)
8998
(train_images, train_labels) = generator.transform(train_data, train_targets, imratio=imratio)
9099
(test_images, test_labels) = generator.transform(test_data, test_targets, imratio=0.5)
91100

92-
trainloader = torch.utils.data.DataLoader(ImageDataset(train_images, train_labels), batch_size=BATCH_SIZE, shuffle=True, num_workers=1, pin_memory=True, drop_last=True)
93-
testloader = torch.utils.data.DataLoader( ImageDataset(test_images, test_labels, mode='test'), batch_size=BATCH_SIZE, shuffle=False, num_workers=1, pin_memory=True)
101+
# data augmentations
102+
trainSet = ImageDataset(train_images, train_labels)
103+
trainSet_eval = ImageDataset(train_images, train_labels, mode='test')
104+
testSet = ImageDataset(test_images, test_labels, mode='test')
94105

106+
# dataloaders
107+
sampler = DualSampler(trainSet, BATCH_SIZE, sampling_rate=sampling_rate)
108+
trainloader = torch.utils.data.DataLoader(trainSet, batch_size=BATCH_SIZE, sampler=sampler, num_workers=2)
109+
trainloader_eval = torch.utils.data.DataLoader(trainSet_eval, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)
110+
testloader = torch.utils.data.DataLoader(testSet, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)
95111

112+
"""# **Creating models & AUC Optimizer**"""
113+
# You can include sigmoid/l2 activations on model's outputs before computing loss
96114
model = ResNet20(pretrained=False, last_activation=None, num_classes=1)
97115
model = model.cuda()
98116

99-
Loss = AUCMLoss()
117+
# You can also pass Loss.a, Loss.b, Loss.alpha to optimizer (for old version users)
118+
loss_fn = AUCMLoss()
100119
optimizer = PESG(model,
101-
a=Loss.a,
102-
b=Loss.b,
103-
alpha=Loss.alpha,
120+
loss_fn=loss_fn,
104121
lr=lr,
105-
gamma=gamma,
122+
momentum=0.9,
106123
margin=margin,
124+
epoch_decay=epoch_decay,
107125
weight_decay=weight_decay)
108126

109127

128+
"""# **Training**"""
110129
print ('Start Training')
111130
print ('-'*30)
112-
for epoch in range(100):
113-
114-
if epoch == 50 or epoch==75:
115-
# decrease learning rate by 10x & update regularizer
116-
optimizer.update_regularizer(decay_factor=10)
131+
132+
train_log = []
133+
test_log = []
134+
for epoch in range(total_epochs):
135+
if epoch in decay_epochs:
136+
optimizer.update_regularizer(decay_factor=10) # decrease learning rate by 10x & update regularizer
117137

118-
train_pred = []
119-
train_true = []
138+
train_loss = []
120139
model.train()
121140
for data, targets in trainloader:
122141
data, targets = data.cuda(), targets.cuda()
123142
y_pred = model(data)
124143
y_pred = torch.sigmoid(y_pred)
125-
loss = Loss(y_pred, targets)
144+
loss = loss_fn(y_pred, targets)
126145
optimizer.zero_grad()
127146
loss.backward()
128147
optimizer.step()
129-
130-
train_pred.append(y_pred.cpu().detach().numpy())
131-
train_true.append(targets.cpu().detach().numpy())
132-
133-
train_true = np.concatenate(train_true)
134-
train_pred = np.concatenate(train_pred)
135-
train_auc = roc_auc_score(train_true, train_pred)
136-
148+
train_loss.append(loss.item())
149+
150+
# evaluation on train & test sets
137151
model.eval()
138-
test_pred = []
139-
test_true = []
140-
for j, data in enumerate(testloader):
141-
test_data, test_targets = data
152+
train_pred_list = []
153+
train_true_list = []
154+
for train_data, train_targets in trainloader_eval:
155+
train_data = train_data.cuda()
156+
train_pred = model(train_data)
157+
train_pred_list.append(train_pred.cpu().detach().numpy())
158+
train_true_list.append(train_targets.numpy())
159+
train_true = np.concatenate(train_true_list)
160+
train_pred = np.concatenate(train_pred_list)
161+
train_auc = auc_roc_score(train_true, train_pred)
162+
train_loss = np.mean(train_loss)
163+
164+
test_pred_list = []
165+
test_true_list = []
166+
for test_data, test_targets in testloader:
142167
test_data = test_data.cuda()
143-
y_pred = model(test_data)
144-
test_pred.append(y_pred.cpu().detach().numpy())
145-
test_true.append(test_targets.numpy())
146-
test_true = np.concatenate(test_true)
147-
test_pred = np.concatenate(test_pred)
148-
val_auc = roc_auc_score(test_true, test_pred)
168+
test_pred = model(test_data)
169+
test_pred_list.append(test_pred.cpu().detach().numpy())
170+
test_true_list.append(test_targets.numpy())
171+
test_true = np.concatenate(test_true_list)
172+
test_pred = np.concatenate(test_pred_list)
173+
val_auc = auc_roc_score(test_true, test_pred)
149174
model.train()
150-
175+
151176
# print results
152-
print("epoch: {}, train_loss: {:4f}, train_auc:{:4f}, test_auc:{:4f}, lr:{:4f}".format(epoch, loss.item(), train_auc, val_auc, optimizer.lr ))
177+
print("epoch: %s, train_loss: %.4f, train_auc: %.4f, test_auc: %.4f, lr: %.4f"%(epoch, train_loss, train_auc, val_auc, optimizer.lr ))
178+
train_log.append(train_auc)
179+
test_log.append(val_auc)
180+
181+
182+
"""# **Visualization**
183+
Now, let's see the learning curve of optimizing AUROC on train and tes sets.
184+
"""
185+
import matplotlib.pyplot as plt
186+
plt.rcParams["figure.figsize"] = (9,5)
187+
x=np.arange(len(train_log))
188+
plt.figure()
189+
plt.plot(x, train_log, LineStyle='-', label='Train Set', linewidth=3)
190+
plt.plot(x, test_log, LineStyle='-', label='Test Set', linewidth=3)
191+
plt.title('AUCMLoss (10% CIFAR10)',fontsize=25)
192+
plt.legend(fontsize=15)
193+
plt.ylabel('AUROC', fontsize=25)
194+
plt.xlabel('Epoch', fontsize=25)

0 commit comments

Comments
 (0)