Optimizing One-Way partial AUC on Imbalanced CIFAR10 Dataset (SOPA)
Introduction
In this tutorial, we will learn how to quickly train a ResNet18 model by optimizing one way partial AUC (OPAUC) score using our novel pAUC_CVaR_Loss
and SOPA
optimizer [Ref] method on a binary image classification task on Cifar10. After completion of this tutorial, you should be able to use LibAUC to train your own models on your own datasets.
Reference:
If you find this tutorial helpful in your work, please cite our [library paper] and the following papers:
@inproceedings{zhu2022auc,
title={When auc meets dro: Optimizing partial auc for deep learning with non-convex convergence guarantee},
author={Zhu, Dixian and Li, Gang and Wang, Bokun and Wu, Xiaodong and Yang, Tianbao},
booktitle={International Conference on Machine Learning},
pages={27548--27573},
year={2022},
organization={PMLR}
}
Install LibAUC
Let’s start with install our library here. In this tutorial, we will use the lastest version for LibAUC by using pip install -U
.
!pip install -U libauc
Importing LibAUC
Import required libraries to use
from libauc.models import resnet18
from libauc.datasets import CIFAR10
from libauc.utils import ImbalancedDataGenerator
from libauc.losses import pAUC_CVaR_Loss
from libauc.optimizers import SOPA
from libauc.utils import ImbalancedDataGenerator
from libauc.sampler import DualSampler # data resampling (for binary class)
from libauc.metrics import pauc_roc_score
import torch
from PIL import Image
import numpy as np
import torchvision.transforms as transforms
from torch.utils.data import Dataset
Reproducibility
These functions limit the number of sources of randomness behaviors, such as model intialization, data shuffling, etcs. However, completely reproducible results are not guaranteed across PyTorch releases [Ref].
def set_all_seeds(SEED):
# REPRODUCIBILITY
torch.manual_seed(SEED)
np.random.seed(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
Image Dataset
Now we define the data input pipeline such as data augmentations. In this tutorial, we use RandomCrop
, RandomHorizontalFlip
. The pos_index_map
helps map global index to local index for reducing memory cost in loss function since we only need to track the indices for positive samples.
class ImageDataset(Dataset):
def __init__(self, images, targets, image_size=32, crop_size=30, mode='train'):
self.images = images.astype(np.uint8)
self.targets = targets
self.mode = mode
self.transform_train = transforms.Compose([
transforms.ToTensor(),
transforms.RandomCrop((crop_size, crop_size), padding=None),
transforms.RandomHorizontalFlip(),
transforms.Resize((image_size, image_size)),
])
self.transform_test = transforms.Compose([
transforms.ToTensor(),
transforms.Resize((image_size, image_size)),
])
# for loss function
self.pos_indices = np.flatnonzero(targets==1)
self.pos_index_map = {}
for i, idx in enumerate(self.pos_indices):
self.pos_index_map[idx] = i
def __len__(self):
return len(self.images)
def __getitem__(self, idx):
image = self.images[idx]
target = self.targets[idx]
image = Image.fromarray(image.astype('uint8'))
if self.mode == 'train':
idx = self.pos_index_map[idx] if idx in self.pos_indices else -1
image = self.transform_train(image)
else:
image = self.transform_test(image)
return image, target, idx
HyperParameters
# HyperParameters
SEED = 123
batch_size = 64
total_epochs = 60
weight_decay = 5e-4 # regularization weight decay
lr = 1e-3 # learning rate
eta = 5.0 # learning rate for control negative samples weights
decay_epochs = [20, 40]
decay_factor = 10
beta = 0.1 # upper bound for FPR
# oversampling minority class, you can tune it in (0, 0.5]
# e.g., sampling_rate=0.5 is that num of positive samples in mini-batch is sampling_rate*batch_size=32
sampling_rate = 0.5
Loading datasets
# load data as numpy arrays
train_data, train_targets = CIFAR10(root='./data', train=True).as_array()
test_data, test_targets = CIFAR10(root='./data', train=False).as_array()
# generate imbalanced data
generator = ImbalancedDataGenerator(shuffle=True, verbose=True, random_seed=0)
(train_images, train_labels) = generator.transform(train_data, train_targets, imratio=0.2)
(test_images, test_labels) = generator.transform(test_data, test_targets, imratio=0.5)
# data augmentations
trainDataset = ImageDataset(train_images, train_labels)
testDataset = ImageDataset(test_images, test_labels, mode='test')
# dataloaders
sampler = DualSampler(trainDataset, batch_size, sampling_rate=sampling_rate)
trainloader = torch.utils.data.DataLoader(trainDataset, batch_size, sampler=sampler, shuffle=False, num_workers=1)
testloader = torch.utils.data.DataLoader(testDataset, batch_size=batch_size, shuffle=False, num_workers=1)
Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz
0%| | 0/170498071 [00:00<?, ?it/s]
Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified
#SAMPLES: 31250, CLASS 0.0 COUNT: 25000, CLASS RATIO: 0.8000
#SAMPLES: 31250, CLASS 1.0 COUNT: 6250, CLASS RATIO: 0.2000
#SAMPLES: 10000, CLASS 0 COUNT: 5000, CLASS RATIO: 0.5000
#SAMPLES: 10000, CLASS 1 COUNT: 5000, CLASS RATIO: 0.5000
Creating models & OPAUC Optimizer
# You can include sigmoid/l2 activations on model's outputs before computing loss
set_all_seeds(SEED)
model = resnet18(pretrained=False, num_classes=1, last_activation=None)
model = model.cuda()
# Initialize the loss function and optimizer
# When we don't have mapping function for index, please provide data_len = the length of the dataset.
loss_fn = pAUC_CVaR_Loss(pos_len=sampler.pos_len, data_len=sampler.pos_len, beta=beta, eta=eta)
optimizer = SOPA(model.parameters(), loss_fn=loss_fn, mode='adam', lr=lr, weight_decay=weight_decay)
Training
import warnings
warnings.filterwarnings("ignore")
print ('Start Training')
print ('-'*30)
tr_pAUC=[]
te_pAUC=[]
for epoch in range(total_epochs):
if epoch in decay_epochs:
optimizer.update_lr(decay_factor=decay_factor)
train_loss = 0
model.train()
for idx, data in enumerate(trainloader):
train_data, train_labels, index = data
train_data, train_labels = train_data.cuda(), train_labels.cuda()
y_pred = model(train_data)
y_prob = torch.sigmoid(y_pred)
loss = loss_fn(y_prob, train_labels, index.cuda())
train_loss = train_loss + loss.cpu().detach().numpy()
optimizer.zero_grad()
loss.backward()
optimizer.step()
train_loss = train_loss/(idx+1)
# evaluation
model.eval()
with torch.no_grad():
train_pred = []
train_true = []
for jdx, data in enumerate(trainloader):
train_data, train_labels, _ = data
train_data = train_data.cuda()
y_pred = model(train_data)
y_prob = torch.sigmoid(y_pred)
train_pred.append(y_prob.cpu().detach().numpy())
train_true.append(train_labels.numpy())
train_true = np.concatenate(train_true)
train_pred = np.concatenate(train_pred)
single_train_auc = pauc_roc_score(train_true, train_pred, max_fpr = 0.3)
test_pred = []
test_true = []
for jdx, data in enumerate(testloader):
test_data, test_labels, index = data
test_data = test_data.cuda()
y_pred = model(test_data)
test_pred.append(y_pred.cpu().detach().numpy())
test_true.append(test_labels.numpy())
test_true = np.concatenate(test_true)
test_pred = np.concatenate(test_pred)
single_test_auc = pauc_roc_score(test_true, test_pred, max_fpr = 0.3)
print('Epoch=%s, Loss=%.4f, Train_pAUC(0.3)=%.4f, Test_pAUC(0.3)=%.4f, lr=%.4f'%(epoch, train_loss, single_train_auc, single_test_auc, optimizer.lr))
tr_pAUC.append(single_train_auc)
te_pAUC.append(single_test_auc)
Start Training
------------------------------
Epoch=0, Loss=8.1060, Train_pAUC(0.3)=0.6361, Test_pAUC(0.3)=0.6298, lr=0.0010
Epoch=1, Loss=6.0949, Train_pAUC(0.3)=0.7160, Test_pAUC(0.3)=0.6971, lr=0.0010
Epoch=2, Loss=5.1303, Train_pAUC(0.3)=0.7163, Test_pAUC(0.3)=0.6951, lr=0.0010
Epoch=3, Loss=4.4228, Train_pAUC(0.3)=0.8176, Test_pAUC(0.3)=0.7793, lr=0.0010
Epoch=4, Loss=3.9360, Train_pAUC(0.3)=0.7580, Test_pAUC(0.3)=0.7306, lr=0.0010
Epoch=5, Loss=3.5119, Train_pAUC(0.3)=0.8493, Test_pAUC(0.3)=0.7973, lr=0.0010
Epoch=6, Loss=3.1343, Train_pAUC(0.3)=0.8552, Test_pAUC(0.3)=0.7938, lr=0.0010
Epoch=7, Loss=2.7456, Train_pAUC(0.3)=0.8773, Test_pAUC(0.3)=0.8173, lr=0.0010
Epoch=8, Loss=2.4535, Train_pAUC(0.3)=0.8803, Test_pAUC(0.3)=0.7983, lr=0.0010
Epoch=9, Loss=2.2526, Train_pAUC(0.3)=0.9098, Test_pAUC(0.3)=0.8359, lr=0.0010
Epoch=10, Loss=1.9403, Train_pAUC(0.3)=0.9288, Test_pAUC(0.3)=0.8284, lr=0.0010
Epoch=11, Loss=1.7761, Train_pAUC(0.3)=0.9432, Test_pAUC(0.3)=0.8485, lr=0.0010
Epoch=12, Loss=1.5937, Train_pAUC(0.3)=0.9324, Test_pAUC(0.3)=0.8351, lr=0.0010
Epoch=13, Loss=1.4121, Train_pAUC(0.3)=0.9445, Test_pAUC(0.3)=0.8325, lr=0.0010
Epoch=14, Loss=1.3269, Train_pAUC(0.3)=0.9546, Test_pAUC(0.3)=0.8431, lr=0.0010
Epoch=15, Loss=1.2714, Train_pAUC(0.3)=0.9583, Test_pAUC(0.3)=0.8505, lr=0.0010
Epoch=16, Loss=1.1135, Train_pAUC(0.3)=0.9579, Test_pAUC(0.3)=0.8517, lr=0.0010
Epoch=17, Loss=1.0596, Train_pAUC(0.3)=0.9568, Test_pAUC(0.3)=0.8314, lr=0.0010
Epoch=18, Loss=1.0046, Train_pAUC(0.3)=0.9638, Test_pAUC(0.3)=0.8531, lr=0.0010
Epoch=19, Loss=0.9641, Train_pAUC(0.3)=0.9383, Test_pAUC(0.3)=0.8205, lr=0.0010
Reducing lr to 0.00010 @ T=15620!
Epoch=20, Loss=0.5578, Train_pAUC(0.3)=0.9900, Test_pAUC(0.3)=0.8705, lr=0.0001
Epoch=21, Loss=0.3650, Train_pAUC(0.3)=0.9921, Test_pAUC(0.3)=0.8696, lr=0.0001
Epoch=22, Loss=0.2878, Train_pAUC(0.3)=0.9936, Test_pAUC(0.3)=0.8702, lr=0.0001
Epoch=23, Loss=0.2501, Train_pAUC(0.3)=0.9949, Test_pAUC(0.3)=0.8726, lr=0.0001
Epoch=24, Loss=0.2165, Train_pAUC(0.3)=0.9952, Test_pAUC(0.3)=0.8738, lr=0.0001
Epoch=25, Loss=0.1896, Train_pAUC(0.3)=0.9960, Test_pAUC(0.3)=0.8728, lr=0.0001
Epoch=26, Loss=0.1716, Train_pAUC(0.3)=0.9963, Test_pAUC(0.3)=0.8725, lr=0.0001
Epoch=27, Loss=0.1611, Train_pAUC(0.3)=0.9966, Test_pAUC(0.3)=0.8745, lr=0.0001
Epoch=28, Loss=0.1558, Train_pAUC(0.3)=0.9971, Test_pAUC(0.3)=0.8721, lr=0.0001
Epoch=29, Loss=0.1426, Train_pAUC(0.3)=0.9971, Test_pAUC(0.3)=0.8717, lr=0.0001
Epoch=30, Loss=0.1269, Train_pAUC(0.3)=0.9973, Test_pAUC(0.3)=0.8706, lr=0.0001
Epoch=31, Loss=0.1189, Train_pAUC(0.3)=0.9975, Test_pAUC(0.3)=0.8696, lr=0.0001
Epoch=32, Loss=0.1180, Train_pAUC(0.3)=0.9978, Test_pAUC(0.3)=0.8721, lr=0.0001
Epoch=33, Loss=0.0975, Train_pAUC(0.3)=0.9978, Test_pAUC(0.3)=0.8726, lr=0.0001
Epoch=34, Loss=0.1114, Train_pAUC(0.3)=0.9979, Test_pAUC(0.3)=0.8708, lr=0.0001
Epoch=35, Loss=0.0955, Train_pAUC(0.3)=0.9978, Test_pAUC(0.3)=0.8700, lr=0.0001
Epoch=36, Loss=0.0799, Train_pAUC(0.3)=0.9978, Test_pAUC(0.3)=0.8697, lr=0.0001
Epoch=37, Loss=0.0802, Train_pAUC(0.3)=0.9977, Test_pAUC(0.3)=0.8690, lr=0.0001
Epoch=38, Loss=0.0853, Train_pAUC(0.3)=0.9981, Test_pAUC(0.3)=0.8732, lr=0.0001
Epoch=39, Loss=0.0769, Train_pAUC(0.3)=0.9984, Test_pAUC(0.3)=0.8710, lr=0.0001
Reducing lr to 0.00001 @ T=31240!
Epoch=40, Loss=0.0668, Train_pAUC(0.3)=0.9984, Test_pAUC(0.3)=0.8703, lr=0.0000
Epoch=41, Loss=0.0569, Train_pAUC(0.3)=0.9983, Test_pAUC(0.3)=0.8708, lr=0.0000
Epoch=42, Loss=0.0529, Train_pAUC(0.3)=0.9984, Test_pAUC(0.3)=0.8722, lr=0.0000
Epoch=43, Loss=0.0490, Train_pAUC(0.3)=0.9985, Test_pAUC(0.3)=0.8709, lr=0.0000
Epoch=44, Loss=0.0457, Train_pAUC(0.3)=0.9986, Test_pAUC(0.3)=0.8709, lr=0.0000
Epoch=45, Loss=0.0401, Train_pAUC(0.3)=0.9986, Test_pAUC(0.3)=0.8715, lr=0.0000
Epoch=46, Loss=0.0400, Train_pAUC(0.3)=0.9985, Test_pAUC(0.3)=0.8695, lr=0.0000
Epoch=47, Loss=0.0422, Train_pAUC(0.3)=0.9986, Test_pAUC(0.3)=0.8705, lr=0.0000
Epoch=48, Loss=0.0420, Train_pAUC(0.3)=0.9985, Test_pAUC(0.3)=0.8712, lr=0.0000
Epoch=49, Loss=0.0406, Train_pAUC(0.3)=0.9984, Test_pAUC(0.3)=0.8706, lr=0.0000
Epoch=50, Loss=0.0395, Train_pAUC(0.3)=0.9985, Test_pAUC(0.3)=0.8687, lr=0.0000
Epoch=51, Loss=0.0396, Train_pAUC(0.3)=0.9985, Test_pAUC(0.3)=0.8714, lr=0.0000
Epoch=52, Loss=0.0349, Train_pAUC(0.3)=0.9986, Test_pAUC(0.3)=0.8706, lr=0.0000
Epoch=53, Loss=0.0317, Train_pAUC(0.3)=0.9986, Test_pAUC(0.3)=0.8696, lr=0.0000
Epoch=54, Loss=0.0340, Train_pAUC(0.3)=0.9986, Test_pAUC(0.3)=0.8724, lr=0.0000
Epoch=55, Loss=0.0294, Train_pAUC(0.3)=0.9988, Test_pAUC(0.3)=0.8695, lr=0.0000
Epoch=56, Loss=0.0308, Train_pAUC(0.3)=0.9985, Test_pAUC(0.3)=0.8683, lr=0.0000
Epoch=57, Loss=0.0271, Train_pAUC(0.3)=0.9986, Test_pAUC(0.3)=0.8698, lr=0.0000
Epoch=58, Loss=0.0259, Train_pAUC(0.3)=0.9986, Test_pAUC(0.3)=0.8698, lr=0.0000
Epoch=59, Loss=0.0288, Train_pAUC(0.3)=0.9984, Test_pAUC(0.3)=0.8707, lr=0.0000
Visualization
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (9,5)
x=np.arange(60)
plt.figure()
plt.plot(x, tr_pAUC, linestyle='--', label='SOPA train', linewidth=3)
plt.plot(x, te_pAUC, label='SOPA test', linewidth=3)
plt.title('CIFAR-10 (20% imbalanced)',fontsize=25)
plt.legend(fontsize=15)
plt.ylabel('OPAUC(0.3)',fontsize=25)
plt.xlabel('epochs',fontsize=25)