Optimizing Global Contrastive Loss with Automatic Temperature Individualization (iSogCLR)


Author: Zi-Hao Qiu
Edited by: Zhuoning Yuan, Tianbao Yang

Introduction

In this tutorial, we introduce the application of iSogCLR algorithm in a typical bimodal contrastive learning task. In pretraining stage, we sample a subset of the widely used CC3M dataset, which contains about 3,000,000 image-text pairs. And then we evaluate the pretrained models via zero-shot image/text retrieval on MS-COCO dataset.

For the convenience of reproduction, we provide a subset of CC3M here, which contains 300,000 image text pairs. We also provide the MS-COCO dataset and its jsons files here. The experiment in this tutorial is conducted one 4 Nvidia 3090 GPUs, you can modify the CUDA_VISIBLE_DEVICES option and batch_size_train option based on your equipments.

References

If you find this tutorial helpful in your work, please cite our library paper and the following papers:

@inproceedings{qiu2023isogclr,
     title={Not All Semantics are Created Equal: Contrastive Self-supervised Learning with Automatic Temperature Individualization},
     author={Qiu, Zi-Hao and Hu, Quanqi and Yuan, Zhuoning and Zhou, Denny and Zhang, Lijun and Yang, Tianbao},
     booktitle={International Conference on Machine Learning},
     year={2023},
     organization={PMLR}
   }

Install Latest LibAUC and Other Required Libs

!pip install -U libauc

Here we use timm library to build image encoder and use the transformers library to build text encoder.

!pip install timm
!pip install transformers

We compare our iSogCLR with CLIP, which is implemented by OpenCLIP

!pip install open_clip_torch

Import required libs

import os
os.environ["TOKENIZERS_PARALLELISM"] = "true"
os.environ["CUDA_VISIBLE_DEVICES"] = '0' # distributed training: '0,1,2,3'

import re
import argparse
from pathlib import Path
import json
import os
import random
import math
from functools import partial

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.backends.cudnn as cudnn
from torch import optim
import torchvision
from torchvision import transforms

from torch.utils.data import Dataset, Subset, DataLoader

from PIL import Image
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
Image.MAX_IMAGE_PIXELS = None

import cv2
import numpy as np

import timm
from transformers import AutoModel, AutoTokenizer

import open_clip
from open_clip.loss import ClipLoss


import libauc
from libauc.losses.contrastive import GCLoss_v2
from libauc.optimizer import iSogCLR
from libauc.utils import CosineLRScheduler

Arguments for experiments

# path to data folder
data_path = 'cc3m_subset'
train_file = 'cc3m_subset.json'

# model config
image_encoder = 'resnet50'
text_encoder = 'distilbert-base-uncased'
image_res = 256
vision_width = 768
embed_dim = 256
seed = 42

# optimizer and schedular
opt = 'adamW'
lr = 3e-4
min_lr = 1e-5
warmup = True
warmup_lr = 1e-5
weight_decay = 0.02
decay_rate = 1
epochs = 30
warmup_epochs = 20
cooldown_epochs = 0

# training & test settings
batch_size_train = 256
batch_size_test = 512
k_test = 256

# output path
output_dir = './output/'

# AMP training
use_amp = True

# loss config
temp = 0.01       # the temperature parameter for clip or sogclr
gamma = 0.8       # the parameter for the moving average estimator in sogclr/isogclr
rho = 8.0         # the rho parameter for isogclr
eta = 1e-4        # learning rate for the learnable temperature variables in isogclr
tau_init = 0.01   # the initial value of the learnable temperature variables in isogclr
beta_u = 0.9      # the momentum parameter for the graidents of the learnable temperature variables

n_gpus = torch.cuda.device_count()

val_coco_file = 'coco_val_new.json'
test_coco_file = 'coco_test_new.json'
coco_image_root = 'coco'

Path(output_dir).mkdir(parents=True, exist_ok=True)

Define helper functions

# we employ this function to preprocess the captions
def pre_caption(caption, max_words):
    caption = re.sub(
        r"([,.'!?\"()*#:;~])",
        '',
        caption.lower(),
    ).replace('-', ' ').replace('/', ' ').replace('<person>', 'person')

    caption = re.sub(
        r"\s{2,}",
        ' ',
        caption,
    )
    caption = caption.rstrip('\n')
    caption = caption.strip(' ')

    #truncate caption
    caption_words = caption.split(' ')
    if len(caption_words)>max_words:
        caption = ' '.join(caption_words[:max_words])

    return caption
class train_set(Dataset):
    def __init__(self, ann_file, transform, image_root, max_words=30):
        self.ann = []
        for f in ann_file:
            self.ann += json.load(open(f,'r'))
        self.transform = transform
        self.image_root = image_root
        self.max_words = max_words
        self.img_ids = {}

        n = 0
        for ann in self.ann:
            img_id = ann['image_id']
            if img_id not in self.img_ids.keys():
                self.img_ids[img_id] = n
                n += 1

    def __len__(self):
        return len(self.ann)

    def __getitem__(self, index):
        ann = self.ann[index]
        image_path = os.path.join(self.image_root, ann['image'])

        image = Image.open(image_path).convert('RGB')
        image = self.transform(image)

        caption = pre_caption(ann['caption'], self.max_words)

        return image, caption, self.img_ids[ann['image_id']], index



class eval_set(Dataset):
    def __init__(self, ann_file, transform, image_root, max_words=30):
        self.ann = json.load(open(ann_file,'r'))
        self.transform = transform
        self.image_root = image_root
        self.max_words = max_words

        self.text = []
        self.image = []
        self.txt2img = {}
        self.img2txt = {}

        txt_id = 0
        for img_id, ann in enumerate(self.ann):
            self.image.append(ann['image'])
            self.img2txt[img_id] = []
            for i, caption in enumerate(ann['caption']):
                self.text.append(pre_caption(caption,self.max_words))
                self.img2txt[img_id].append(txt_id)
                self.txt2img[txt_id] = img_id
                txt_id += 1

    def __len__(self):
        return len(self.image)

    def __getitem__(self, index):
        image_path = os.path.join(self.image_root, self.ann[index]['image'])
        image = Image.open(image_path).convert('RGB')
        image = self.transform(image)

        return image, index
def add_weight_decay(model, weight_decay=1e-5, skip_list=()):
    decay = []
    no_decay = []
    for name, param in model.named_parameters():
        if not param.requires_grad:
            continue  # frozen weights
        if len(param.shape) == 1 or name.endswith(".bias") or name in skip_list:
            no_decay.append(param)
        else:
            decay.append(param)
    return [
        {'params': no_decay, 'weight_decay': 0.},
        {'params': decay, 'weight_decay': weight_decay}]


def create_optimizer(model, opt, weight_decay=1e-5, filter_bias_and_bn=True):
    if weight_decay and filter_bias_and_bn:
        skip = {}
        if hasattr(model, 'no_weight_decay'):
            skip = model.no_weight_decay()
        parameters = add_weight_decay(model, weight_decay, skip)
        weight_decay = 0.
    else:
        parameters = model.parameters()

    opt_args = dict(lr=lr, weight_decay=weight_decay)
    optimizer = iSogCLR(parameters, mode=opt, **opt_args)

    return optimizer
def create_scheduler(optimizer):
    num_epochs = epochs

    lr_scheduler = CosineLRScheduler(
        optimizer,
        t_initial = num_epochs,
        t_mul = 1.0,
        lr_min = min_lr,
        decay_rate = decay_rate,
        warmup_lr_init = warmup_lr,
        warmup_t = warmup_epochs,
        cycle_limit = 1,
        t_in_epochs = True,
        noise_range_t = None,
        noise_pct = 0.67,
        noise_std = 1.0,
        noise_seed = 42,
    )

    return lr_scheduler

Fix random seed

The following functions limit the number of sources of randomness behaviors, such as model intialization, data shuffling, etcs.

# fix the seed for reproducibility
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)
cudnn.benchmark = True

Objectives

Here, we mainly introduce the Robust Global Contrastive Loss (RGCL) for learning representations for bimodal data (e.g., image-text data). For the detailed formulation, please refer to the paper,.

Define the model

# The following class includes the image encoder, text encoder and several objectives
class Model(nn.Module):
    def __init__(self, image_encoder = None, text_encoder = None,
                 embed_dim = 256, init_model = True, bsz = 128,
                 loss_type = 'clip',  # objective type: clip, sogclr, isogclr
                 gamma = 0.9,         # the coefficient for moving average estimator
                 temp = 0.01,         # temperature for clip or sogclr
                 rho = 8.0, eta = 0.01, tau_init = 0.01, beta_u = 0.9,  # params for isogclr
                 use_temp_net = True):    # True if you want to use temperature network for isogclr
        super().__init__()

        self.temp = temp

        self.visual_encoder = timm.create_model(image_encoder, pretrained=init_model)
        self.visual_encoder.reset_classifier(0)

        self.text_encoder = AutoModel.from_pretrained(text_encoder, local_files_only=False)

        if not init_model:
            self.text_encoder.init_weights()

        self.vision_proj = nn.Linear(self.visual_encoder.num_features, embed_dim)
        self.text_proj = nn.Linear(768, embed_dim)

        self.loss_type = loss_type

        if self.loss_type == 'clip':
            self.criterion = ClipLoss()        # here we employ the implementation from open-clip
            self.logit_scale = nn.Parameter(torch.ones([]) * np.log(1 / temp))
        elif self.loss_type == 'isogclr':
            self.criterion = GCLoss_v2(tau=temp, gamma=gamma, tau_min=0.005, tau_max=0.07,
                                       rho=rho, eta=eta, enable_isogclr=True)
        else:
            raise NotImplementedError

    def forward(self, image, text_ids, text_att_masks, idx, text_idx, epoch):
        image_embeds = self.visual_encoder(image)
        image_embeds = self.vision_proj(image_embeds)
        image_feat = F.normalize(image_embeds, dim=-1)

        text_output = self.text_encoder(text_ids, attention_mask=text_att_masks, output_hidden_states=False)
        text_embeds = self.text_proj(text_output.last_hidden_state[:,0,:])
        text_feat = F.normalize(text_embeds, dim=-1)

        if self.loss_type == 'clip':
            loss = self.criterion(image_feat, text_feat, self.logit_scale.exp())
            info = None
        elif self.loss_type == 'isogclr':
            loss, info = self.criterion(image_feat, text_feat, idx)

        return loss, info

Training function

def epoch_train(model, data_loader, optimizer, tokenizer, epoch, max_epoch, warmup_steps, device, scheduler, grad_scaler):
    # train
    model.train()

    print_freq = 50
    step_size = 100
    warmup_iterations = warmup_steps * step_size

    for i,(image, text, idx, text_idx) in enumerate(data_loader):
        optimizer.zero_grad()

        image = image.to(device, non_blocking=True)
        idx = idx.to(device, non_blocking=True)
        text_idx = text_idx.to(device, non_blocking=True)
        text_input = tokenizer(text, padding='max_length', truncation=True, max_length=30, return_tensors="pt").to(device)

        if grad_scaler is None:
            loss, info = model(image, text_input.input_ids, text_input.attention_mask, idx=idx, text_idx=text_idx, epoch=epoch)
            loss.mean().backward()
            optimizer.step()
        else:
            with torch.cuda.amp.autocast():
                loss, info = model(image, text_input.input_ids, text_input.attention_mask, idx=idx, text_idx=text_idx, epoch=epoch)
            grad_scaler.scale(loss.mean()).backward()
            grad_scaler.step(optimizer)
            grad_scaler.update()

        if epoch==0 and i%step_size==0 and i<=warmup_iterations:
            scheduler.step(i//step_size)

        if i%print_freq == 0:
            lr = optimizer.param_groups[0]["lr"]
            print("Epoch:", epoch, "iteration:", i, "lr:", lr, "loss:", loss.mean().item())
            if info is not None:
                print("tau_img: %.4f, tau_txt: %.4f" % (info[0].mean(), info[1].mean()))

Evaluation function

@torch.no_grad()
def evaluation(model, data_loader, tokenizer, device):
    # test
    model.eval()

    print('Computing features for evaluation...')
    texts = data_loader.dataset.text
    num_text = len(texts)
    text_bs = 256
    text_embeds = []
    for i in range(0, num_text, text_bs):
        text = texts[i: min(num_text, i+text_bs)]
        text_input = tokenizer(text, padding='max_length', truncation=True, max_length=30, return_tensors="pt").to(device)
        text_output = model.text_encoder(text_input.input_ids, attention_mask=text_input.attention_mask, output_hidden_states=False)
        text_embed = F.normalize(model.text_proj(text_output.last_hidden_state[:,0,:]), dim=-1)
        text_embeds.append(text_embed)
    text_embeds = torch.cat(text_embeds,dim=0)

    image_embeds = []
    for image, img_id in data_loader:
        image = image.to(device)
        image_feat = model.visual_encoder(image)
        image_embed = model.vision_proj(image_feat)
        image_embed = F.normalize(image_embed, dim=-1)
        image_embeds.append(image_embed)
    image_embeds = torch.cat(image_embeds,dim=0)

    sims_matrix = image_embeds @ text_embeds.t()
    score_matrix_i2t = torch.full((len(data_loader.dataset.image),len(texts)),-100.0).to(device)

    for i,sims in enumerate(sims_matrix):
        topk_sim, topk_idx = sims.topk(k=k_test, dim=0)
        score_matrix_i2t[i, topk_idx] = topk_sim

    sims_matrix = sims_matrix.t()
    score_matrix_t2i = torch.full((len(texts),len(data_loader.dataset.image)),-100.0).to(device)

    for i,sims in enumerate(sims_matrix):
        topk_sim, topk_idx = sims.topk(k=k_test, dim=0)
        score_matrix_t2i[i, topk_idx] = topk_sim

    return score_matrix_i2t.cpu().numpy(), score_matrix_t2i.cpu().numpy()



@torch.no_grad()
def itm_eval(scores_i2t, scores_t2i, txt2img, img2txt):

    #Images->Text
    ranks = np.zeros(scores_i2t.shape[0])
    for index,score in enumerate(scores_i2t):
        inds = np.argsort(score)[::-1]
        # Score
        rank = 1e20
        for i in img2txt[index]:
            tmp = np.where(inds == i)[0][0]
            if tmp < rank:
                rank = tmp
        ranks[index] = rank

    # Compute metrics
    tr1 = 100.0 * len(np.where(ranks < 1)[0]) / len(ranks)
    tr5 = 100.0 * len(np.where(ranks < 5)[0]) / len(ranks)
    tr10 = 100.0 * len(np.where(ranks < 10)[0]) / len(ranks)

    #Text->Images
    ranks = np.zeros(scores_t2i.shape[0])

    for index,score in enumerate(scores_t2i):
        inds = np.argsort(score)[::-1]
        ranks[index] = np.where(inds == txt2img[index])[0][0]

    # Compute metrics
    ir1 = 100.0 * len(np.where(ranks < 1)[0]) / len(ranks)
    ir5 = 100.0 * len(np.where(ranks < 5)[0]) / len(ranks)
    ir10 = 100.0 * len(np.where(ranks < 10)[0]) / len(ranks)

    tr_mean = (tr1 + tr5 + tr10) / 3
    ir_mean = (ir1 + ir5 + ir10) / 3
    r_mean = (tr_mean + ir_mean) / 2

    eval_result =  {'txt_r1': tr1,
                    'txt_r5': tr5,
                    'txt_r10': tr10,
                    'txt_r_mean': tr_mean,
                    'img_r1': ir1,
                    'img_r5': ir5,
                    'img_r10': ir10,
                    'img_r_mean': ir_mean,
                    'r_mean': r_mean}
    return eval_result

Create datasets and dataloaders

# set up the transformation, datasets and dataloaders
train_transform = transforms.Compose([
        transforms.RandomResizedCrop(image_res, scale=(0.5, 1.0), interpolation=Image.BICUBIC),
        transforms.RandomHorizontalFlip(),
        transforms.RandAugment(),
        transforms.ToTensor(),
        transforms.Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711)),
    ])

test_transform = transforms.Compose([
    transforms.Resize((image_res, image_res), interpolation=Image.BICUBIC),
    transforms.ToTensor(),
    transforms.Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711)),
    ])

train_dataset = train_set([train_file], train_transform, data_path)
val_coco_dataset = eval_set(val_coco_file, test_transform, coco_image_root)
test_coco_dataset = eval_set(test_coco_file, test_transform, coco_image_root)

print("len of train_dataset:", len(train_dataset))
print("len of coco val/test:", len(val_coco_dataset), len(test_coco_dataset))

train_loader = DataLoader(train_dataset, batch_size=batch_size_train * n_gpus, num_workers=16, pin_memory=True,
                         shuffle=True, drop_last=True, prefetch_factor=4)
val_loader = DataLoader(val_coco_dataset, batch_size=batch_size_test, num_workers=16, pin_memory=True,
                       shuffle=False, drop_last=False, prefetch_factor=12)
test_loader = DataLoader(test_coco_dataset, batch_size=batch_size_test, num_workers=16, pin_memory=True,
                       shuffle=False, drop_last=False, prefetch_factor=12)
len of train_dataset: 300000
len of coco val/test: 5000 5000

Launch training and evaluation for CLIP

# create the model
tokenizer = AutoTokenizer.from_pretrained(text_encoder, local_files_only=False)
model = Model(image_encoder=image_encoder, text_encoder=text_encoder, embed_dim=embed_dim,
              init_model=True, bsz=batch_size_train, loss_type='clip',
              gamma=gamma, temp=temp, rho=rho, eta=eta, tau_init=tau_init, beta_u=beta_u)

model = model.cuda()
if n_gpus > 1:
    print("Using", n_gpus, "GPUs")
    model = nn.DataParallel(model)
# set up the optimizer and objective function
optimizer = create_optimizer(model, opt, weight_decay)
lr_scheduler = create_scheduler(optimizer)

if use_amp:
    grad_scaler = torch.cuda.amp.GradScaler()
else:
    grad_scaler = None

# training loop
for epoch in range(0, epochs):
    train_stats = epoch_train(model, train_loader, optimizer, tokenizer, epoch, epochs,
                              warmup_epochs, torch.device('cuda'), lr_scheduler, grad_scaler)

    # evaluate the model on ms-coco data
    try:
      # for distributed training
      score_val_i2t_coco, score_val_t2i_coco = evaluation(model.module, val_loader, tokenizer,  torch.device('cuda')) # model.module
      score_test_i2t_coco, score_test_t2i_coco = evaluation(model.module, test_loader, tokenizer,  torch.device('cuda'))
    except:
      # for non-distributed training
      score_val_i2t_coco, score_val_t2i_coco = evaluation(model, val_loader, tokenizer,  torch.device('cuda')) # model.module
      score_test_i2t_coco, score_test_t2i_coco = evaluation(model, test_loader, tokenizer,  torch.device('cuda'))

    print("Epoch:", epoch)
    val_result_coco = itm_eval(score_val_i2t_coco, score_val_t2i_coco, val_loader.dataset.txt2img, val_loader.dataset.img2txt)
    print("coco val:", val_result_coco)
    test_result_coco = itm_eval(score_test_i2t_coco, score_test_t2i_coco, test_loader.dataset.txt2img, test_loader.dataset.img2txt)
    print("coco test:", test_result_coco)

    lr_scheduler.step(epoch+warmup_epochs+1)
Epoch: 0 iteration: 0 lr: 1e-05 loss: 11.74642562866211
Epoch: 0 iteration: 50 lr: 1e-05 loss: 7.507866859436035
Epoch: 0 iteration: 100 lr: 2.45e-05 loss: 5.759531497955322
Epoch: 0 iteration: 150 lr: 2.45e-05 loss: 4.457749843597412
Epoch: 0 iteration: 200 lr: 3.899999999999999e-05 loss: 3.844197988510132
Epoch: 0 iteration: 250 lr: 3.899999999999999e-05 loss: 3.469355583190918
Computing features for evaluation...
Computing features for evaluation...
Epoch: 0
coco val: {'txt_r1': 3.86, 'txt_r5': 12.8, 'txt_r10': 19.8, 'txt_r_mean': 12.153333333333334, 'img_r1': 1.8872451019592162, 'img_r5': 7.0171931227509, 'img_r10': 12.043182726909237, 'img_r_mean': 6.982540317206451, 'r_mean': 9.567936825269893}
coco test: {'txt_r1': 3.6, 'txt_r5': 12.38, 'txt_r10': 18.84, 'txt_r_mean': 11.606666666666667, 'img_r1': 1.8032786885245902, 'img_r5': 7.005197920831668, 'img_r10': 11.943222710915634, 'img_r_mean': 6.917233106757298, 'r_mean': 9.261949886711982}
Epoch: 1 iteration: 0 lr: 0.0002992056748283996 loss: 3.105051279067993
Epoch: 1 iteration: 50 lr: 0.0002992056748283996 loss: 2.4103074073791504
Epoch: 1 iteration: 100 lr: 0.0002992056748283996 loss: 2.2818379402160645
Epoch: 1 iteration: 150 lr: 0.0002992056748283996 loss: 2.118741989135742
Epoch: 1 iteration: 200 lr: 0.0002992056748283996 loss: 1.9152384996414185
Epoch: 1 iteration: 250 lr: 0.0002992056748283996 loss: 1.8800408840179443
Computing features for evaluation...
Computing features for evaluation...
Epoch: 1
coco val: {'txt_r1': 15.04, 'txt_r5': 33.92, 'txt_r10': 45.58, 'txt_r_mean': 31.513333333333332, 'img_r1': 8.06077568972411, 'img_r5': 22.718912435025988, 'img_r10': 33.10275889644142, 'img_r_mean': 21.29414900706384, 'r_mean': 26.403741170198586}
coco test: {'txt_r1': 14.64, 'txt_r5': 34.1, 'txt_r10': 45.68, 'txt_r_mean': 31.473333333333333, 'img_r1': 7.804878048780488, 'img_r5': 22.82686925229908, 'img_r10': 33.88644542183127, 'img_r_mean': 21.50606424097028, 'r_mean': 26.489698787151806}
Epoch: 2 iteration: 0 lr: 0.0002968314021064018 loss: 1.5531284809112549
Epoch: 2 iteration: 50 lr: 0.0002968314021064018 loss: 1.5267637968063354
Epoch: 2 iteration: 100 lr: 0.0002968314021064018 loss: 1.4859260320663452
Epoch: 2 iteration: 150 lr: 0.0002968314021064018 loss: 1.552567958831787
Epoch: 2 iteration: 200 lr: 0.0002968314021064018 loss: 1.4763367176055908
Epoch: 2 iteration: 250 lr: 0.0002968314021064018 loss: 1.501932978630066
Computing features for evaluation...
Computing features for evaluation...
Epoch: 2
coco val: {'txt_r1': 15.7, 'txt_r5': 36.7, 'txt_r10': 48.82, 'txt_r_mean': 33.74, 'img_r1': 10.163934426229508, 'img_r5': 26.62934826069572, 'img_r10': 37.99280287884846, 'img_r_mean': 24.92869518859123, 'r_mean': 29.334347594295615}
coco test: {'txt_r1': 15.74, 'txt_r5': 36.68, 'txt_r10': 48.24, 'txt_r_mean': 33.553333333333335, 'img_r1': 9.75609756097561, 'img_r5': 26.965213914434226, 'img_r10': 38.74050379848061, 'img_r_mean': 25.153938424630145, 'r_mean': 29.35363587898174}
Epoch: 3 iteration: 0 lr: 0.00029290319486279724 loss: 1.2079691886901855
Epoch: 3 iteration: 50 lr: 0.00029290319486279724 loss: 1.2061635255813599
Epoch: 3 iteration: 100 lr: 0.00029290319486279724 loss: 1.181814432144165
Epoch: 3 iteration: 150 lr: 0.00029290319486279724 loss: 1.235809564590454
Epoch: 3 iteration: 200 lr: 0.00029290319486279724 loss: 1.2041468620300293
Epoch: 3 iteration: 250 lr: 0.00029290319486279724 loss: 1.2037649154663086
Computing features for evaluation...
Computing features for evaluation...
Epoch: 3
coco val: {'txt_r1': 16.04, 'txt_r5': 36.82, 'txt_r10': 49.36, 'txt_r_mean': 34.07333333333333, 'img_r1': 10.44782087165134, 'img_r5': 27.457017193122752, 'img_r10': 38.48460615753699, 'img_r_mean': 25.463148074103696, 'r_mean': 29.76824070371851}
coco test: {'txt_r1': 15.7, 'txt_r5': 36.5, 'txt_r10': 48.58, 'txt_r_mean': 33.593333333333334, 'img_r1': 10.343862455017993, 'img_r5': 27.988804478208717, 'img_r10': 39.36825269892043, 'img_r_mean': 25.900306544049045, 'r_mean': 29.74681993869119}
Epoch: 4 iteration: 0 lr: 0.00028746409135817707 loss: 1.0229318141937256
Epoch: 4 iteration: 50 lr: 0.00028746409135817707 loss: 0.8746964931488037
Epoch: 4 iteration: 100 lr: 0.00028746409135817707 loss: 1.064015507698059
Epoch: 4 iteration: 150 lr: 0.00028746409135817707 loss: 1.0923449993133545
Epoch: 4 iteration: 200 lr: 0.00028746409135817707 loss: 0.979778528213501
Epoch: 4 iteration: 250 lr: 0.00028746409135817707 loss: 1.0288567543029785
Computing features for evaluation...
Computing features for evaluation...
Epoch: 4
coco val: {'txt_r1': 16.2, 'txt_r5': 37.3, 'txt_r10': 49.6, 'txt_r_mean': 34.36666666666667, 'img_r1': 11.407437025189925, 'img_r5': 28.64454218312675, 'img_r10': 40.199920031987205, 'img_r_mean': 26.750633080101295, 'r_mean': 30.55864987338398}
coco test: {'txt_r1': 15.46, 'txt_r5': 37.26, 'txt_r10': 48.44, 'txt_r_mean': 33.72, 'img_r1': 10.71171531387445, 'img_r5': 28.95641743302679, 'img_r10': 40.863654538184726, 'img_r_mean': 26.843929095028653, 'r_mean': 30.281964547514328}
Epoch: 5 iteration: 0 lr: 0.0002805736835487436 loss: 0.748623251914978
Epoch: 5 iteration: 50 lr: 0.0002805736835487436 loss: 0.8048175573348999
Epoch: 5 iteration: 100 lr: 0.0002805736835487436 loss: 0.8324432969093323
Epoch: 5 iteration: 150 lr: 0.0002805736835487436 loss: 0.8187351822853088
Epoch: 5 iteration: 200 lr: 0.0002805736835487436 loss: 0.8561583757400513
Epoch: 5 iteration: 250 lr: 0.0002805736835487436 loss: 0.7616273164749146
Computing features for evaluation...
Computing features for evaluation...
Epoch: 5
coco val: {'txt_r1': 15.6, 'txt_r5': 37.12, 'txt_r10': 49.98, 'txt_r_mean': 34.23333333333333, 'img_r1': 11.463414634146341, 'img_r5': 29.52019192323071, 'img_r10': 41.35545781687325, 'img_r_mean': 27.446354791416766, 'r_mean': 30.839844062375047}
coco test: {'txt_r1': 14.72, 'txt_r5': 35.68, 'txt_r10': 48.34, 'txt_r_mean': 32.913333333333334, 'img_r1': 11.523390643742504, 'img_r5': 30.143942423030786, 'img_r10': 41.67932826869252, 'img_r_mean': 27.78222044515527, 'r_mean': 30.3477768892443}
Epoch: 6 iteration: 0 lr: 0.0002723074641843674 loss: 0.5856387615203857
Epoch: 6 iteration: 50 lr: 0.0002723074641843674 loss: 0.7076289057731628
Epoch: 6 iteration: 100 lr: 0.0002723074641843674 loss: 0.6565060615539551
Epoch: 6 iteration: 150 lr: 0.0002723074641843674 loss: 0.6765242218971252
Epoch: 6 iteration: 200 lr: 0.0002723074641843674 loss: 0.7100015878677368
Epoch: 6 iteration: 250 lr: 0.0002723074641843674 loss: 0.6650581955909729
Computing features for evaluation...
Computing features for evaluation...
Epoch: 6
coco val: {'txt_r1': 15.54, 'txt_r5': 37.94, 'txt_r10': 50.16, 'txt_r_mean': 34.54666666666666, 'img_r1': 11.243502598960417, 'img_r5': 29.432227109156337, 'img_r10': 41.04758096761295, 'img_r_mean': 27.24110355857657, 'r_mean': 30.893885112621614}
coco test: {'txt_r1': 15.78, 'txt_r5': 36.78, 'txt_r10': 49.24, 'txt_r_mean': 33.93333333333334, 'img_r1': 11.379448220711716, 'img_r5': 29.956017592962816, 'img_r10': 41.45541783286685, 'img_r_mean': 27.59696121551379, 'r_mean': 30.765147274423562}
Epoch: 7 iteration: 0 lr: 0.00026275599969422214 loss: 0.5822378396987915
Epoch: 7 iteration: 50 lr: 0.00026275599969422214 loss: 0.5452847480773926
Epoch: 7 iteration: 100 lr: 0.00026275599969422214 loss: 0.5890320539474487
Epoch: 7 iteration: 150 lr: 0.00026275599969422214 loss: 0.558639645576477
Epoch: 7 iteration: 200 lr: 0.00026275599969422214 loss: 0.6335784196853638
Epoch: 7 iteration: 250 lr: 0.00026275599969422214 loss: 0.6401098370552063
Computing features for evaluation...
Computing features for evaluation...
Epoch: 7
coco val: {'txt_r1': 16.54, 'txt_r5': 38.52, 'txt_r10': 51.04, 'txt_r_mean': 35.36666666666667, 'img_r1': 11.795281887245102, 'img_r5': 29.89204318272691, 'img_r10': 41.58736505397841, 'img_r_mean': 27.758230041316807, 'r_mean': 31.562448353991737}
coco test: {'txt_r1': 16.28, 'txt_r5': 37.32, 'txt_r10': 49.64, 'txt_r_mean': 34.413333333333334, 'img_r1': 11.47141143542583, 'img_r5': 30.275889644142342, 'img_r10': 42.147141143542584, 'img_r_mean': 27.964814074370253, 'r_mean': 31.189073703851793}
Epoch: 8 iteration: 0 lr: 0.0002520239379220344 loss: 0.5210278034210205
Epoch: 8 iteration: 50 lr: 0.0002520239379220344 loss: 0.4082544445991516
Epoch: 8 iteration: 100 lr: 0.0002520239379220344 loss: 0.4823477864265442
Epoch: 8 iteration: 150 lr: 0.0002520239379220344 loss: 0.49092692136764526
Epoch: 8 iteration: 200 lr: 0.0002520239379220344 loss: 0.5032364130020142
Epoch: 8 iteration: 250 lr: 0.0002520239379220344 loss: 0.4627079963684082
Computing features for evaluation...
Computing features for evaluation...
Epoch: 8
coco val: {'txt_r1': 15.3, 'txt_r5': 36.62, 'txt_r10': 49.02, 'txt_r_mean': 33.64666666666667, 'img_r1': 11.615353858456617, 'img_r5': 29.78808476609356, 'img_r10': 41.175529788084766, 'img_r_mean': 27.526322804211645, 'r_mean': 30.586494735439157}
coco test: {'txt_r1': 14.7, 'txt_r5': 34.02, 'txt_r10': 47.42, 'txt_r_mean': 32.04666666666667, 'img_r1': 11.411435425829668, 'img_r5': 29.76809276289484, 'img_r10': 41.44342263094762, 'img_r_mean': 27.540983606557376, 'r_mean': 29.79382513661202}
Epoch: 9 iteration: 0 lr: 0.00024022886158240857 loss: 0.34958702325820923
Epoch: 9 iteration: 50 lr: 0.00024022886158240857 loss: 0.4485335350036621
Epoch: 9 iteration: 100 lr: 0.00024022886158240857 loss: 0.41256430745124817
Epoch: 9 iteration: 150 lr: 0.00024022886158240857 loss: 0.3847663998603821
Epoch: 9 iteration: 200 lr: 0.00024022886158240857 loss: 0.434209942817688
Epoch: 9 iteration: 250 lr: 0.00024022886158240857 loss: 0.4179908037185669
Computing features for evaluation...
Computing features for evaluation...
Epoch: 9
coco val: {'txt_r1': 15.56, 'txt_r5': 37.96, 'txt_r10': 50.06, 'txt_r_mean': 34.52666666666667, 'img_r1': 11.611355457816874, 'img_r5': 29.848060775689724, 'img_r10': 41.5953618552579, 'img_r_mean': 27.684926029588166, 'r_mean': 31.10579634812742}
coco test: {'txt_r1': 15.74, 'txt_r5': 36.64, 'txt_r10': 48.5, 'txt_r_mean': 33.626666666666665, 'img_r1': 11.80327868852459, 'img_r5': 29.772091163534586, 'img_r10': 41.583366653338665, 'img_r_mean': 27.719578835132612, 'r_mean': 30.673122750899637}
Epoch: 10 iteration: 0 lr: 0.00022749999999999997 loss: 0.33992326259613037
Epoch: 10 iteration: 50 lr: 0.00022749999999999997 loss: 0.3966507911682129
Epoch: 10 iteration: 100 lr: 0.00022749999999999997 loss: 0.3801310360431671
Epoch: 10 iteration: 150 lr: 0.00022749999999999997 loss: 0.342434823513031
Epoch: 10 iteration: 200 lr: 0.00022749999999999997 loss: 0.3833215832710266
Epoch: 10 iteration: 250 lr: 0.00022749999999999997 loss: 0.43105077743530273
Computing features for evaluation...
Computing features for evaluation...
Epoch: 10
coco val: {'txt_r1': 16.44, 'txt_r5': 39.08, 'txt_r10': 51.28, 'txt_r_mean': 35.6, 'img_r1': 11.915233906437425, 'img_r5': 30.091963214714113, 'img_r10': 41.84326269492203, 'img_r_mean': 27.950153272024522, 'r_mean': 31.775076636012262}
coco test: {'txt_r1': 15.32, 'txt_r5': 37.7, 'txt_r10': 50.18, 'txt_r_mean': 34.4, 'img_r1': 11.859256297481007, 'img_r5': 30.403838464614154, 'img_r10': 41.911235505797684, 'img_r_mean': 28.058110089297617, 'r_mean': 31.22905504464881}
Epoch: 11 iteration: 0 lr: 0.00021397681324599103 loss: 0.31117188930511475
Epoch: 11 iteration: 50 lr: 0.00021397681324599103 loss: 0.33558982610702515
Epoch: 11 iteration: 100 lr: 0.00021397681324599103 loss: 0.36867523193359375
Epoch: 11 iteration: 150 lr: 0.00021397681324599103 loss: 0.28263527154922485
Epoch: 11 iteration: 200 lr: 0.00021397681324599103 loss: 0.3501768112182617
Epoch: 11 iteration: 250 lr: 0.00021397681324599103 loss: 0.36479008197784424
Computing features for evaluation...
Computing features for evaluation...
Epoch: 11
coco val: {'txt_r1': 15.28, 'txt_r5': 37.84, 'txt_r10': 49.2, 'txt_r_mean': 34.10666666666667, 'img_r1': 11.979208316673331, 'img_r5': 30.23190723710516, 'img_r10': 42.05517792882847, 'img_r_mean': 28.08876449420232, 'r_mean': 31.097715580434496}
coco test: {'txt_r1': 15.22, 'txt_r5': 36.18, 'txt_r10': 48.02, 'txt_r_mean': 33.14, 'img_r1': 11.955217912834867, 'img_r5': 30.979608156737307, 'img_r10': 42.60295881647341, 'img_r_mean': 28.512594962015196, 'r_mean': 30.8262974810076}
Epoch: 12 iteration: 0 lr: 0.00019980746418436736 loss: 0.27429062128067017
Epoch: 12 iteration: 50 lr: 0.00019980746418436736 loss: 0.3097416162490845
Epoch: 12 iteration: 100 lr: 0.00019980746418436736 loss: 0.30445027351379395
Epoch: 12 iteration: 150 lr: 0.00019980746418436736 loss: 0.3258894681930542
Epoch: 12 iteration: 200 lr: 0.00019980746418436736 loss: 0.27619031071662903
Epoch: 12 iteration: 250 lr: 0.00019980746418436736 loss: 0.30364763736724854
Computing features for evaluation...
Computing features for evaluation...
Epoch: 12
coco val: {'txt_r1': 16.56, 'txt_r5': 38.14, 'txt_r10': 50.62, 'txt_r_mean': 35.10666666666666, 'img_r1': 12.35905637744902, 'img_r5': 31.139544182327068, 'img_r10': 42.51899240303879, 'img_r_mean': 28.672530987604958, 'r_mean': 31.88959882713581}
coco test: {'txt_r1': 15.18, 'txt_r5': 36.3, 'txt_r10': 49.56, 'txt_r_mean': 33.68, 'img_r1': 12.295081967213115, 'img_r5': 31.211515393842465, 'img_r10': 42.998800479808075, 'img_r_mean': 28.835132613621216, 'r_mean': 31.25756630681061}
Epoch: 13 iteration: 0 lr: 0.00018514719516857505 loss: 0.2100810557603836
Epoch: 13 iteration: 50 lr: 0.00018514719516857505 loss: 0.2885628938674927
Epoch: 13 iteration: 100 lr: 0.00018514719516857505 loss: 0.2615102529525757
Epoch: 13 iteration: 150 lr: 0.00018514719516857505 loss: 0.30048686265945435
Epoch: 13 iteration: 200 lr: 0.00018514719516857505 loss: 0.30662938952445984
Epoch: 13 iteration: 250 lr: 0.00018514719516857505 loss: 0.3095318377017975
Computing features for evaluation...
Computing features for evaluation...
Epoch: 13
coco val: {'txt_r1': 16.22, 'txt_r5': 37.66, 'txt_r10': 50.22, 'txt_r_mean': 34.699999999999996, 'img_r1': 11.511395441823272, 'img_r5': 29.488204718112755, 'img_r10': 40.95961615353858, 'img_r_mean': 27.3197387711582, 'r_mean': 31.009869385579098}
coco test: {'txt_r1': 16.1, 'txt_r5': 37.5, 'txt_r10': 49.88, 'txt_r_mean': 34.49333333333333, 'img_r1': 11.923230707716913, 'img_r5': 30.22391043582567, 'img_r10': 41.63934426229508, 'img_r_mean': 27.928828468612554, 'r_mean': 31.211080900972945}
Epoch: 14 iteration: 0 lr: 0.00017015662717380974 loss: 0.22490891814231873
Epoch: 14 iteration: 50 lr: 0.00017015662717380974 loss: 0.24104690551757812
Epoch: 14 iteration: 100 lr: 0.00017015662717380974 loss: 0.27677229046821594
Epoch: 14 iteration: 150 lr: 0.00017015662717380974 loss: 0.25092434883117676
Epoch: 14 iteration: 200 lr: 0.00017015662717380974 loss: 0.23248010873794556
Epoch: 14 iteration: 250 lr: 0.00017015662717380974 loss: 0.2669617235660553
Computing features for evaluation...
Computing features for evaluation...
Epoch: 14
coco val: {'txt_r1': 15.82, 'txt_r5': 36.82, 'txt_r10': 49.34, 'txt_r_mean': 33.99333333333333, 'img_r1': 12.047181127548981, 'img_r5': 30.635745701719312, 'img_r10': 42.6109556177529, 'img_r_mean': 28.431294149007062, 'r_mean': 31.2123137411702}
coco test: {'txt_r1': 14.78, 'txt_r5': 35.76, 'txt_r10': 48.42, 'txt_r_mean': 32.98666666666667, 'img_r1': 12.243102758896441, 'img_r5': 30.695721711315475, 'img_r10': 42.36705317872851, 'img_r_mean': 28.435292549646807, 'r_mean': 30.71097960815674}
Epoch: 15 iteration: 0 lr: 0.000155 loss: 0.1818775236606598
Epoch: 15 iteration: 50 lr: 0.000155 loss: 0.21323110163211823
Epoch: 15 iteration: 100 lr: 0.000155 loss: 0.2310401201248169
Epoch: 15 iteration: 150 lr: 0.000155 loss: 0.2086959332227707
Epoch: 15 iteration: 200 lr: 0.000155 loss: 0.22357095777988434
Epoch: 15 iteration: 250 lr: 0.000155 loss: 0.24121759831905365
Computing features for evaluation...
Computing features for evaluation...
Epoch: 15
coco val: {'txt_r1': 16.8, 'txt_r5': 38.16, 'txt_r10': 51.14, 'txt_r_mean': 35.36666666666667, 'img_r1': 11.935225909636145, 'img_r5': 30.103958416633347, 'img_r10': 42.33106757297081, 'img_r_mean': 28.123417299746766, 'r_mean': 31.74504198320672}
coco test: {'txt_r1': 15.2, 'txt_r5': 37.24, 'txt_r10': 50.12, 'txt_r_mean': 34.18666666666667, 'img_r1': 11.611355457816874, 'img_r5': 30.403838464614154, 'img_r10': 42.12714914034386, 'img_r_mean': 28.047447687591628, 'r_mean': 31.117057177129148}
Epoch: 16 iteration: 0 lr: 0.00013984337282619026 loss: 0.20621338486671448
Epoch: 16 iteration: 50 lr: 0.00013984337282619026 loss: 0.20322853326797485
Epoch: 16 iteration: 100 lr: 0.00013984337282619026 loss: 0.2034672498703003
Epoch: 16 iteration: 150 lr: 0.00013984337282619026 loss: 0.2079382836818695
Epoch: 16 iteration: 200 lr: 0.00013984337282619026 loss: 0.21095183491706848
Epoch: 16 iteration: 250 lr: 0.00013984337282619026 loss: 0.20369692146778107
Computing features for evaluation...
Computing features for evaluation...
Epoch: 16
coco val: {'txt_r1': 16.8, 'txt_r5': 37.92, 'txt_r10': 50.68, 'txt_r_mean': 35.13333333333333, 'img_r1': 12.231107556977209, 'img_r5': 30.69972011195522, 'img_r10': 42.54698120751699, 'img_r_mean': 28.492602958816473, 'r_mean': 31.8129681460749}
coco test: {'txt_r1': 15.04, 'txt_r5': 36.64, 'txt_r10': 49.7, 'txt_r_mean': 33.79333333333333, 'img_r1': 12.29908036785286, 'img_r5': 30.86765293882447, 'img_r10': 42.5109956017593, 'img_r_mean': 28.559242969478873, 'r_mean': 31.1762881514061}
Epoch: 17 iteration: 0 lr: 0.00012485280483142487 loss: 0.16787829995155334
Epoch: 17 iteration: 50 lr: 0.00012485280483142487 loss: 0.16973815858364105
Epoch: 17 iteration: 100 lr: 0.00012485280483142487 loss: 0.17559704184532166
Epoch: 17 iteration: 150 lr: 0.00012485280483142487 loss: 0.19280369579792023
Epoch: 17 iteration: 200 lr: 0.00012485280483142487 loss: 0.18810811638832092
Epoch: 17 iteration: 250 lr: 0.00012485280483142487 loss: 0.1578725427389145
Computing features for evaluation...
Computing features for evaluation...
Epoch: 17
coco val: {'txt_r1': 17.0, 'txt_r5': 39.04, 'txt_r10': 51.44, 'txt_r_mean': 35.82666666666666, 'img_r1': 12.343062774890043, 'img_r5': 30.735705717712914, 'img_r10': 42.03118752499, 'img_r_mean': 28.369985339197655, 'r_mean': 32.098326002932154}
coco test: {'txt_r1': 15.72, 'txt_r5': 36.94, 'txt_r10': 50.26, 'txt_r_mean': 34.306666666666665, 'img_r1': 12.02718912435026, 'img_r5': 30.567772890843663, 'img_r10': 42.139144342263094, 'img_r_mean': 28.24470211915234, 'r_mean': 31.275684392909504}
Epoch: 18 iteration: 0 lr: 0.00011019253581563262 loss: 0.17534607648849487
Epoch: 18 iteration: 50 lr: 0.00011019253581563262 loss: 0.19806219637393951
Epoch: 18 iteration: 100 lr: 0.00011019253581563262 loss: 0.16321659088134766
Epoch: 18 iteration: 150 lr: 0.00011019253581563262 loss: 0.15023337304592133
Epoch: 18 iteration: 200 lr: 0.00011019253581563262 loss: 0.14350810647010803
Epoch: 18 iteration: 250 lr: 0.00011019253581563262 loss: 0.1909620463848114
Computing features for evaluation...
Computing features for evaluation...
Epoch: 18
coco val: {'txt_r1': 15.72, 'txt_r5': 37.14, 'txt_r10': 50.2, 'txt_r_mean': 34.35333333333333, 'img_r1': 12.00719712115154, 'img_r5': 30.091963214714113, 'img_r10': 41.819272291083564, 'img_r_mean': 27.972810875649742, 'r_mean': 31.163072104491537}
coco test: {'txt_r1': 14.98, 'txt_r5': 35.98, 'txt_r10': 48.94, 'txt_r_mean': 33.3, 'img_r1': 11.603358656537385, 'img_r5': 30.307876849260296, 'img_r10': 41.92323070771691, 'img_r_mean': 27.94482207117153, 'r_mean': 30.622411035585763}
Epoch: 19 iteration: 0 lr: 9.602318675400897e-05 loss: 0.17000208795070648
Epoch: 19 iteration: 50 lr: 9.602318675400897e-05 loss: 0.14290763437747955
Epoch: 19 iteration: 100 lr: 9.602318675400897e-05 loss: 0.1349085569381714
Epoch: 19 iteration: 150 lr: 9.602318675400897e-05 loss: 0.15767492353916168
Epoch: 19 iteration: 200 lr: 9.602318675400897e-05 loss: 0.15036305785179138
Epoch: 19 iteration: 250 lr: 9.602318675400897e-05 loss: 0.17334865033626556
Computing features for evaluation...
Computing features for evaluation...
Epoch: 19
coco val: {'txt_r1': 16.64, 'txt_r5': 38.02, 'txt_r10': 50.84, 'txt_r_mean': 35.166666666666664, 'img_r1': 12.287085165933627, 'img_r5': 30.635745701719312, 'img_r10': 42.091163534586165, 'img_r_mean': 28.3379981340797, 'r_mean': 31.752332400373184}
coco test: {'txt_r1': 15.64, 'txt_r5': 37.7, 'txt_r10': 49.62, 'txt_r_mean': 34.32, 'img_r1': 12.059176329468213, 'img_r5': 30.743702518992404, 'img_r10': 42.21111555377849, 'img_r_mean': 28.3379981340797, 'r_mean': 31.328999067039852}
Epoch: 20 iteration: 0 lr: 8.250000000000001e-05 loss: 0.14072063565254211
Epoch: 20 iteration: 50 lr: 8.250000000000001e-05 loss: 0.12933437526226044
Epoch: 20 iteration: 100 lr: 8.250000000000001e-05 loss: 0.20693959295749664
Epoch: 20 iteration: 150 lr: 8.250000000000001e-05 loss: 0.15231087803840637
Epoch: 20 iteration: 200 lr: 8.250000000000001e-05 loss: 0.15985363721847534
Epoch: 20 iteration: 250 lr: 8.250000000000001e-05 loss: 0.14119693636894226
Computing features for evaluation...
Computing features for evaluation...
Epoch: 20
coco val: {'txt_r1': 16.54, 'txt_r5': 39.04, 'txt_r10': 50.56, 'txt_r_mean': 35.38, 'img_r1': 12.774890043982406, 'img_r5': 31.20751699320272, 'img_r10': 43.06277489004398, 'img_r_mean': 29.015060642409704, 'r_mean': 32.19753032120485}
coco test: {'txt_r1': 15.62, 'txt_r5': 38.28, 'txt_r10': 51.2, 'txt_r_mean': 35.03333333333333, 'img_r1': 12.263094762095163, 'img_r5': 31.47141143542583, 'img_r10': 43.0187924830068, 'img_r_mean': 28.9177662268426, 'r_mean': 31.975549780087967}
Epoch: 21 iteration: 0 lr: 6.97711384175914e-05 loss: 0.12423430383205414
Epoch: 21 iteration: 50 lr: 6.97711384175914e-05 loss: 0.13421592116355896
Epoch: 21 iteration: 100 lr: 6.97711384175914e-05 loss: 0.09904897212982178
Epoch: 21 iteration: 150 lr: 6.97711384175914e-05 loss: 0.11255185306072235
Epoch: 21 iteration: 200 lr: 6.97711384175914e-05 loss: 0.14298436045646667
Epoch: 21 iteration: 250 lr: 6.97711384175914e-05 loss: 0.13077646493911743
Computing features for evaluation...
Computing features for evaluation...
Epoch: 21
coco val: {'txt_r1': 17.24, 'txt_r5': 38.74, 'txt_r10': 50.88, 'txt_r_mean': 35.620000000000005, 'img_r1': 12.558976409436225, 'img_r5': 31.331467413034787, 'img_r10': 42.630947620951616, 'img_r_mean': 28.84046381447421, 'r_mean': 32.230231907237105}
coco test: {'txt_r1': 15.84, 'txt_r5': 38.24, 'txt_r10': 51.44, 'txt_r_mean': 35.17333333333333, 'img_r1': 12.526989204318273, 'img_r5': 31.36745301879248, 'img_r10': 42.998800479808075, 'img_r_mean': 28.964414234306275, 'r_mean': 32.068873783819804}
Epoch: 22 iteration: 0 lr: 5.797606207796559e-05 loss: 0.09781420230865479
Epoch: 22 iteration: 50 lr: 5.797606207796559e-05 loss: 0.10436877608299255
Epoch: 22 iteration: 100 lr: 5.797606207796559e-05 loss: 0.09954556077718735
Epoch: 22 iteration: 150 lr: 5.797606207796559e-05 loss: 0.10239797830581665
Epoch: 22 iteration: 200 lr: 5.797606207796559e-05 loss: 0.15317881107330322
Epoch: 22 iteration: 250 lr: 5.797606207796559e-05 loss: 0.13270767033100128
Computing features for evaluation...
Computing features for evaluation...
Epoch: 22
coco val: {'txt_r1': 17.24, 'txt_r5': 38.46, 'txt_r10': 52.06, 'txt_r_mean': 35.92, 'img_r1': 12.782886845261896, 'img_r5': 31.6953218712515, 'img_r10': 43.02678928428629, 'img_r_mean': 29.168332666933225, 'r_mean': 32.54416633346661}
coco test: {'txt_r1': 15.6, 'txt_r5': 39.1, 'txt_r10': 50.8, 'txt_r_mean': 35.166666666666664, 'img_r1': 12.566973210715714, 'img_r5': 31.62734906037585, 'img_r10': 43.278688524590166, 'img_r_mean': 29.157670265227242, 'r_mean': 32.162168465946955}
Epoch: 23 iteration: 0 lr: 4.724400030577786e-05 loss: 0.09777984768152237
Epoch: 23 iteration: 50 lr: 4.724400030577786e-05 loss: 0.12258177995681763
Epoch: 23 iteration: 100 lr: 4.724400030577786e-05 loss: 0.1060154139995575
Epoch: 23 iteration: 150 lr: 4.724400030577786e-05 loss: 0.13091956079006195
Epoch: 23 iteration: 200 lr: 4.724400030577786e-05 loss: 0.10514585673809052
Epoch: 23 iteration: 250 lr: 4.724400030577786e-05 loss: 0.12769201397895813
Computing features for evaluation...
Computing features for evaluation...
Epoch: 23
coco val: {'txt_r1': 16.48, 'txt_r5': 38.66, 'txt_r10': 51.64, 'txt_r_mean': 35.593333333333334, 'img_r1': 12.686925229908036, 'img_r5': 31.591363454618154, 'img_r10': 43.114754098360656, 'img_r_mean': 29.131014260962285, 'r_mean': 32.36217379714781}
coco test: {'txt_r1': 15.62, 'txt_r5': 38.06, 'txt_r10': 51.1, 'txt_r_mean': 34.92666666666667, 'img_r1': 12.538984406237505, 'img_r5': 31.74330267892843, 'img_r10': 43.442622950819676, 'img_r_mean': 29.241636678661866, 'r_mean': 32.08415167266427}
Epoch: 24 iteration: 0 lr: 3.769253581563263e-05 loss: 0.08650655299425125
Epoch: 24 iteration: 50 lr: 3.769253581563263e-05 loss: 0.10609667003154755
Epoch: 24 iteration: 100 lr: 3.769253581563263e-05 loss: 0.10544316470623016
Epoch: 24 iteration: 150 lr: 3.769253581563263e-05 loss: 0.08425739407539368
Epoch: 24 iteration: 200 lr: 3.769253581563263e-05 loss: 0.11596322059631348
Epoch: 24 iteration: 250 lr: 3.769253581563263e-05 loss: 0.12456141412258148
Computing features for evaluation...
Computing features for evaluation...
Epoch: 24
coco val: {'txt_r1': 16.9, 'txt_r5': 39.34, 'txt_r10': 52.24, 'txt_r_mean': 36.160000000000004, 'img_r1': 12.730907636945222, 'img_r5': 31.463414634146343, 'img_r10': 43.2906837265094, 'img_r_mean': 29.161668665866987, 'r_mean': 32.6608343329335}
coco test: {'txt_r1': 16.0, 'txt_r5': 38.64, 'txt_r10': 51.36, 'txt_r_mean': 35.333333333333336, 'img_r1': 12.566973210715714, 'img_r5': 31.815273890443823, 'img_r10': 43.59456217512995, 'img_r_mean': 29.325603092096497, 'r_mean': 32.329468212714914}
Epoch: 25 iteration: 0 lr: 2.9426316451256386e-05 loss: 0.11349457502365112
Epoch: 25 iteration: 50 lr: 2.9426316451256386e-05 loss: 0.08233440667390823
Epoch: 25 iteration: 100 lr: 2.9426316451256386e-05 loss: 0.09436212480068207
Epoch: 25 iteration: 150 lr: 2.9426316451256386e-05 loss: 0.0920330286026001
Epoch: 25 iteration: 200 lr: 2.9426316451256386e-05 loss: 0.08613620698451996
Epoch: 25 iteration: 250 lr: 2.9426316451256386e-05 loss: 0.0929696261882782
Computing features for evaluation...
Computing features for evaluation...
Epoch: 25
coco val: {'txt_r1': 17.26, 'txt_r5': 39.76, 'txt_r10': 52.34, 'txt_r_mean': 36.45333333333333, 'img_r1': 12.69092363054778, 'img_r5': 31.43142742902839, 'img_r10': 43.04278288684526, 'img_r_mean': 29.055044648807144, 'r_mean': 32.75418899107024}
coco test: {'txt_r1': 15.84, 'txt_r5': 39.3, 'txt_r10': 51.54, 'txt_r_mean': 35.56, 'img_r1': 12.670931627349061, 'img_r5': 31.54338264694122, 'img_r10': 43.006797281087564, 'img_r_mean': 29.073703851792615, 'r_mean': 32.31685192589631}
Epoch: 26 iteration: 0 lr: 2.2535908641822855e-05 loss: 0.07479941099882126
Epoch: 26 iteration: 50 lr: 2.2535908641822855e-05 loss: 0.08746127784252167
Epoch: 26 iteration: 100 lr: 2.2535908641822855e-05 loss: 0.10455113649368286
Epoch: 26 iteration: 150 lr: 2.2535908641822855e-05 loss: 0.09784542769193649
Epoch: 26 iteration: 200 lr: 2.2535908641822855e-05 loss: 0.06572966277599335
Epoch: 26 iteration: 250 lr: 2.2535908641822855e-05 loss: 0.09240047633647919
Computing features for evaluation...
Computing features for evaluation...
Epoch: 26
coco val: {'txt_r1': 16.88, 'txt_r5': 39.52, 'txt_r10': 52.32, 'txt_r_mean': 36.24, 'img_r1': 12.794882047181128, 'img_r5': 31.70731707317073, 'img_r10': 43.29468212714914, 'img_r_mean': 29.265627082500334, 'r_mean': 32.75281354125017}
coco test: {'txt_r1': 15.76, 'txt_r5': 38.96, 'txt_r10': 51.74, 'txt_r_mean': 35.48666666666667, 'img_r1': 12.794882047181128, 'img_r5': 31.851259496201518, 'img_r10': 43.63454618152739, 'img_r_mean': 29.426895908303347, 'r_mean': 32.45678128748501}
Epoch: 27 iteration: 0 lr: 1.7096805137202738e-05 loss: 0.07049550861120224
Epoch: 27 iteration: 50 lr: 1.7096805137202738e-05 loss: 0.08527995645999908
Epoch: 27 iteration: 100 lr: 1.7096805137202738e-05 loss: 0.07916025817394257
Epoch: 27 iteration: 150 lr: 1.7096805137202738e-05 loss: 0.0926615446805954
Epoch: 27 iteration: 200 lr: 1.7096805137202738e-05 loss: 0.062070801854133606
Epoch: 27 iteration: 250 lr: 1.7096805137202738e-05 loss: 0.06778311729431152
Computing features for evaluation...
Computing features for evaluation...
Epoch: 27
coco val: {'txt_r1': 16.62, 'txt_r5': 39.08, 'txt_r10': 51.5, 'txt_r_mean': 35.733333333333334, 'img_r1': 12.854858056777289, 'img_r5': 31.679328268692522, 'img_r10': 43.238704518192726, 'img_r_mean': 29.257630281220845, 'r_mean': 32.49548180727709}
coco test: {'txt_r1': 15.62, 'txt_r5': 38.3, 'txt_r10': 51.6, 'txt_r_mean': 35.17333333333333, 'img_r1': 12.870851659336266, 'img_r5': 31.835265893642543, 'img_r10': 43.57856857257097, 'img_r_mean': 29.428228708516595, 'r_mean': 32.300781020924966}
Epoch: 28 iteration: 0 lr: 1.3168597893598175e-05 loss: 0.08952151238918304
Epoch: 28 iteration: 50 lr: 1.3168597893598175e-05 loss: 0.08497560024261475
Epoch: 28 iteration: 100 lr: 1.3168597893598175e-05 loss: 0.09802306443452835
Epoch: 28 iteration: 150 lr: 1.3168597893598175e-05 loss: 0.10137701034545898
Epoch: 28 iteration: 200 lr: 1.3168597893598175e-05 loss: 0.08434905111789703
Epoch: 28 iteration: 250 lr: 1.3168597893598175e-05 loss: 0.07585834711790085
Computing features for evaluation...
Computing features for evaluation...
Epoch: 28
coco val: {'txt_r1': 17.42, 'txt_r5': 39.7, 'txt_r10': 52.34, 'txt_r_mean': 36.48666666666667, 'img_r1': 13.066773290683727, 'img_r5': 31.999200319872052, 'img_r10': 43.47061175529788, 'img_r_mean': 29.51219512195122, 'r_mean': 32.999430894308944}
coco test: {'txt_r1': 16.02, 'txt_r5': 39.16, 'txt_r10': 52.3, 'txt_r_mean': 35.82666666666666, 'img_r1': 12.938824470211916, 'img_r5': 32.11515393842463, 'img_r10': 43.874450219912035, 'img_r_mean': 29.64280954284953, 'r_mean': 32.734738104758094}
Epoch: 29 iteration: 0 lr: 1.0794325171600358e-05 loss: 0.0904349684715271
Epoch: 29 iteration: 50 lr: 1.0794325171600358e-05 loss: 0.0633661150932312
Epoch: 29 iteration: 100 lr: 1.0794325171600358e-05 loss: 0.06782661378383636
Epoch: 29 iteration: 150 lr: 1.0794325171600358e-05 loss: 0.0833449587225914
Epoch: 29 iteration: 200 lr: 1.0794325171600358e-05 loss: 0.09229975193738937
Epoch: 29 iteration: 250 lr: 1.0794325171600358e-05 loss: 0.08226582407951355
Computing features for evaluation...
Computing features for evaluation...
Epoch: 29
coco val: {'txt_r1': 16.82, 'txt_r5': 39.46, 'txt_r10': 52.16, 'txt_r_mean': 36.14666666666667, 'img_r1': 12.922830867652939, 'img_r5': 31.747301079568174, 'img_r10': 43.45061975209916, 'img_r_mean': 29.373583899773422, 'r_mean': 32.76012528322005}
coco test: {'txt_r1': 15.9, 'txt_r5': 38.66, 'txt_r10': 51.54, 'txt_r_mean': 35.36666666666667, 'img_r1': 12.802878848460615, 'img_r5': 31.847261095561777, 'img_r10': 43.514594162335065, 'img_r_mean': 29.388244702119152, 'r_mean': 32.37745568439291}

Launch training and evaluation for iSogCLR

# create the model and wrap it in DDP
tokenizer = AutoTokenizer.from_pretrained(text_encoder, local_files_only=False)
model = Model(image_encoder=image_encoder, text_encoder=text_encoder, embed_dim=embed_dim,
              init_model=True, bsz=batch_size_train, loss_type='isogclr',
              gamma=gamma, temp=temp, rho=rho, eta=eta, tau_init=tau_init, beta_u=beta_u)

model = model.cuda()
if n_gpus > 1:
    print("Using", n_gpus, "GPUs")
    model = nn.DataParallel(model)
# set up the optimizer and objective function
optimizer = create_optimizer(model, opt, weight_decay)
lr_scheduler = create_scheduler(optimizer)

if use_amp:
    grad_scaler = torch.cuda.amp.GradScaler()
else:
    grad_scaler = None

# training loop
for epoch in range(0, epochs):
    train_stats = epoch_train(model, train_loader, optimizer, tokenizer, epoch, epochs,
                              warmup_epochs, torch.device('cuda'), lr_scheduler, grad_scaler)

    # evaluate the model on ms-coco data
    try:
        # for distributed training
        score_val_i2t_coco, score_val_t2i_coco = evaluation(model.module, val_loader, tokenizer,  torch.device('cuda'))
        score_test_i2t_coco, score_test_t2i_coco = evaluation(model.module, test_loader, tokenizer,  torch.device('cuda'))
    except:
        # for non-distributed training
        score_val_i2t_coco, score_val_t2i_coco = evaluation(model, val_loader, tokenizer,  torch.device('cuda'))
        score_test_i2t_coco, score_test_t2i_coco = evaluation(model, test_loader, tokenizer,  torch.device('cuda'))
    print("Epoch:", epoch)
    val_result_coco = itm_eval(score_val_i2t_coco, score_val_t2i_coco, val_loader.dataset.txt2img, val_loader.dataset.img2txt)
    print("coco val:", val_result_coco)
    test_result_coco = itm_eval(score_test_i2t_coco, score_test_t2i_coco, test_loader.dataset.txt2img, test_loader.dataset.img2txt)
    print("coco test:", test_result_coco)

    lr_scheduler.step(epoch+warmup_epochs+1)
Epoch: 0 iteration: 0 lr: 1e-05 loss: 24.701007843017578
tau_img: 0.0100, tau_txt: 0.0100
Epoch: 0 iteration: 50 lr: 1e-05 loss: 10.574981689453125
tau_img: 0.0100, tau_txt: 0.0100
Epoch: 0 iteration: 100 lr: 2.45e-05 loss: 4.697925567626953
tau_img: 0.0100, tau_txt: 0.0100
Epoch: 0 iteration: 150 lr: 2.45e-05 loss: 1.9576847553253174
tau_img: 0.0100, tau_txt: 0.0100
Epoch: 0 iteration: 200 lr: 3.899999999999999e-05 loss: 1.0460829734802246
tau_img: 0.0100, tau_txt: 0.0100
Epoch: 0 iteration: 250 lr: 3.899999999999999e-05 loss: 0.5043810606002808
tau_img: 0.0100, tau_txt: 0.0100
Computing features for evaluation...
Computing features for evaluation...
Epoch: 0
coco val: {'txt_r1': 4.1, 'txt_r5': 13.8, 'txt_r10': 21.34, 'txt_r_mean': 13.079999999999998, 'img_r1': 2.0591763294682126, 'img_r5': 7.860855657736905, 'img_r10': 13.13874450219912, 'img_r_mean': 7.686258829801413, 'r_mean': 10.383129414900706}
coco test: {'txt_r1': 4.2, 'txt_r5': 12.7, 'txt_r10': 20.2, 'txt_r_mean': 12.366666666666665, 'img_r1': 1.9832067173130747, 'img_r5': 7.493002798880448, 'img_r10': 12.950819672131148, 'img_r_mean': 7.4756763961082235, 'r_mean': 9.921171531387444}
Epoch: 1 iteration: 0 lr: 0.0002992056748283996 loss: 1.3195196390151978
tau_img: 0.0094, tau_txt: 0.0095
Epoch: 1 iteration: 50 lr: 0.0002992056748283996 loss: 0.075884610414505
tau_img: 0.0094, tau_txt: 0.0095
Epoch: 1 iteration: 100 lr: 0.0002992056748283996 loss: 0.3162369430065155
tau_img: 0.0094, tau_txt: 0.0095
Epoch: 1 iteration: 150 lr: 0.0002992056748283996 loss: 0.1882624328136444
tau_img: 0.0094, tau_txt: 0.0095
Epoch: 1 iteration: 200 lr: 0.0002992056748283996 loss: -0.10296255350112915
tau_img: 0.0094, tau_txt: 0.0095
Epoch: 1 iteration: 250 lr: 0.0002992056748283996 loss: 0.15444990992546082
tau_img: 0.0094, tau_txt: 0.0095
Computing features for evaluation...
Computing features for evaluation...
Epoch: 1
coco val: {'txt_r1': 12.22, 'txt_r5': 28.74, 'txt_r10': 40.32, 'txt_r_mean': 27.093333333333334, 'img_r1': 5.881647341063575, 'img_r5': 18.10075969612155, 'img_r10': 27.608956417433028, 'img_r_mean': 17.197121151539385, 'r_mean': 22.14522724243636}
coco test: {'txt_r1': 11.34, 'txt_r5': 29.4, 'txt_r10': 40.32, 'txt_r_mean': 27.02, 'img_r1': 5.593762495001999, 'img_r5': 18.376649340263896, 'img_r10': 27.984806077568972, 'img_r_mean': 17.318405970944955, 'r_mean': 22.169202985472477}
Epoch: 2 iteration: 0 lr: 0.0002968314021064018 loss: -0.0604383647441864
tau_img: 0.0088, tau_txt: 0.0088
Epoch: 2 iteration: 50 lr: 0.0002968314021064018 loss: 0.23243539035320282
tau_img: 0.0088, tau_txt: 0.0088
Epoch: 2 iteration: 100 lr: 0.0002968314021064018 loss: 0.04821205139160156
tau_img: 0.0088, tau_txt: 0.0088
Epoch: 2 iteration: 150 lr: 0.0002968314021064018 loss: 0.21965868771076202
tau_img: 0.0088, tau_txt: 0.0088
Epoch: 2 iteration: 200 lr: 0.0002968314021064018 loss: 0.05134771019220352
tau_img: 0.0088, tau_txt: 0.0088
Epoch: 2 iteration: 250 lr: 0.0002968314021064018 loss: 0.1536252200603485
tau_img: 0.0088, tau_txt: 0.0088
Computing features for evaluation...
Computing features for evaluation...
Epoch: 2
coco val: {'txt_r1': 14.64, 'txt_r5': 35.0, 'txt_r10': 46.5, 'txt_r_mean': 32.04666666666667, 'img_r1': 7.97281087564974, 'img_r5': 22.898840463814473, 'img_r10': 33.77049180327869, 'img_r_mean': 21.547381047580966, 'r_mean': 26.79702385712382}
coco test: {'txt_r1': 15.14, 'txt_r5': 34.42, 'txt_r10': 46.54, 'txt_r_mean': 32.03333333333333, 'img_r1': 8.388644542183126, 'img_r5': 23.594562175129948, 'img_r10': 34.406237504998, 'img_r_mean': 22.12981474077036, 'r_mean': 27.081574037051844}
Epoch: 3 iteration: 0 lr: 0.00029290319486279724 loss: -0.29481595754623413
tau_img: 0.0083, tau_txt: 0.0081
Epoch: 3 iteration: 50 lr: 0.00029290319486279724 loss: 0.06638230383396149
tau_img: 0.0083, tau_txt: 0.0081
Epoch: 3 iteration: 100 lr: 0.00029290319486279724 loss: 0.03567551076412201
tau_img: 0.0083, tau_txt: 0.0082
Epoch: 3 iteration: 150 lr: 0.00029290319486279724 loss: 0.05767179653048515
tau_img: 0.0083, tau_txt: 0.0081
Epoch: 3 iteration: 200 lr: 0.00029290319486279724 loss: 0.056682661175727844
tau_img: 0.0083, tau_txt: 0.0082
Epoch: 3 iteration: 250 lr: 0.00029290319486279724 loss: 0.28257113695144653
tau_img: 0.0083, tau_txt: 0.0082
Computing features for evaluation...
Computing features for evaluation...
Epoch: 3
coco val: {'txt_r1': 15.9, 'txt_r5': 37.2, 'txt_r10': 49.18, 'txt_r_mean': 34.093333333333334, 'img_r1': 9.70811675329868, 'img_r5': 26.3734506197521, 'img_r10': 37.31707317073171, 'img_r_mean': 24.466213514594163, 'r_mean': 29.279773423963746}
coco test: {'txt_r1': 15.52, 'txt_r5': 37.28, 'txt_r10': 48.94, 'txt_r_mean': 33.913333333333334, 'img_r1': 9.660135945621752, 'img_r5': 26.66533386645342, 'img_r10': 37.49300279888045, 'img_r_mean': 24.606157536985204, 'r_mean': 29.259745435159267}
Epoch: 4 iteration: 0 lr: 0.00028746409135817707 loss: -0.2583860158920288
tau_img: 0.0079, tau_txt: 0.0077
Epoch: 4 iteration: 50 lr: 0.00028746409135817707 loss: 0.04029808193445206
tau_img: 0.0079, tau_txt: 0.0076
Epoch: 4 iteration: 100 lr: 0.00028746409135817707 loss: 0.11739009618759155
tau_img: 0.0079, tau_txt: 0.0076
Epoch: 4 iteration: 150 lr: 0.00028746409135817707 loss: 0.32731348276138306
tau_img: 0.0079, tau_txt: 0.0076
Epoch: 4 iteration: 200 lr: 0.00028746409135817707 loss: -0.00629810243844986
tau_img: 0.0079, tau_txt: 0.0076
Epoch: 4 iteration: 250 lr: 0.00028746409135817707 loss: 0.15173837542533875
tau_img: 0.0079, tau_txt: 0.0076
Computing features for evaluation...
Computing features for evaluation...
Epoch: 4
coco val: {'txt_r1': 17.16, 'txt_r5': 38.44, 'txt_r10': 50.34, 'txt_r_mean': 35.31333333333333, 'img_r1': 10.903638544582167, 'img_r5': 27.86485405837665, 'img_r10': 39.40423830467813, 'img_r_mean': 26.057576969212317, 'r_mean': 30.685455151272826}
coco test: {'txt_r1': 17.0, 'txt_r5': 37.84, 'txt_r10': 50.16, 'txt_r_mean': 35.0, 'img_r1': 10.415833666533386, 'img_r5': 28.58856457417033, 'img_r10': 40.26789284286286, 'img_r_mean': 26.424097027855524, 'r_mean': 30.712048513927762}
Epoch: 5 iteration: 0 lr: 0.0002805736835487436 loss: -0.4848897457122803
tau_img: 0.0075, tau_txt: 0.0072
Epoch: 5 iteration: 50 lr: 0.0002805736835487436 loss: 0.06531377136707306
tau_img: 0.0075, tau_txt: 0.0072
Epoch: 5 iteration: 100 lr: 0.0002805736835487436 loss: 0.09321524202823639
tau_img: 0.0075, tau_txt: 0.0072
Epoch: 5 iteration: 150 lr: 0.0002805736835487436 loss: 0.218039870262146
tau_img: 0.0075, tau_txt: 0.0073
Epoch: 5 iteration: 200 lr: 0.0002805736835487436 loss: 0.1558637171983719
tau_img: 0.0075, tau_txt: 0.0072
Epoch: 5 iteration: 250 lr: 0.0002805736835487436 loss: -0.09588228911161423
tau_img: 0.0075, tau_txt: 0.0072
Computing features for evaluation...
Computing features for evaluation...
Epoch: 5
coco val: {'txt_r1': 18.54, 'txt_r5': 40.0, 'txt_r10': 51.6, 'txt_r_mean': 36.71333333333333, 'img_r1': 11.015593762495001, 'img_r5': 28.984406237505, 'img_r10': 40.42782886845262, 'img_r_mean': 26.809276289484206, 'r_mean': 31.76130481140877}
coco test: {'txt_r1': 16.56, 'txt_r5': 38.8, 'txt_r10': 51.22, 'txt_r_mean': 35.526666666666664, 'img_r1': 11.107556977209116, 'img_r5': 29.072371051579367, 'img_r10': 40.77169132347061, 'img_r_mean': 26.983873117419694, 'r_mean': 31.25526989204318}
Epoch: 6 iteration: 0 lr: 0.0002723074641843674 loss: -0.5769622325897217
tau_img: 0.0072, tau_txt: 0.0069
Epoch: 6 iteration: 50 lr: 0.0002723074641843674 loss: 0.37227633595466614
tau_img: 0.0072, tau_txt: 0.0069
Epoch: 6 iteration: 100 lr: 0.0002723074641843674 loss: 0.06294765323400497
tau_img: 0.0072, tau_txt: 0.0069
Epoch: 6 iteration: 150 lr: 0.0002723074641843674 loss: -0.028086403384804726
tau_img: 0.0072, tau_txt: 0.0069
Epoch: 6 iteration: 200 lr: 0.0002723074641843674 loss: 0.08182275295257568
tau_img: 0.0072, tau_txt: 0.0069
Epoch: 6 iteration: 250 lr: 0.0002723074641843674 loss: 0.16375750303268433
tau_img: 0.0072, tau_txt: 0.0069
Computing features for evaluation...
Computing features for evaluation...
Epoch: 6
coco val: {'txt_r1': 18.02, 'txt_r5': 40.82, 'txt_r10': 53.12, 'txt_r_mean': 37.32, 'img_r1': 11.431427429028389, 'img_r5': 29.748100759696122, 'img_r10': 41.47940823670532, 'img_r_mean': 27.55297880847661, 'r_mean': 32.4364894042383}
coco test: {'txt_r1': 17.68, 'txt_r5': 40.18, 'txt_r10': 52.56, 'txt_r_mean': 36.806666666666665, 'img_r1': 11.75529788084766, 'img_r5': 30.151939224310276, 'img_r10': 41.89924030387845, 'img_r_mean': 27.935492469678792, 'r_mean': 32.37107956817273}
Epoch: 7 iteration: 0 lr: 0.00026275599969422214 loss: -0.4518427550792694
tau_img: 0.0070, tau_txt: 0.0067
Epoch: 7 iteration: 50 lr: 0.00026275599969422214 loss: 0.2819710075855255
tau_img: 0.0070, tau_txt: 0.0067
Epoch: 7 iteration: 100 lr: 0.00026275599969422214 loss: 0.05290326103568077
tau_img: 0.0070, tau_txt: 0.0067
Epoch: 7 iteration: 150 lr: 0.00026275599969422214 loss: -0.008920110762119293
tau_img: 0.0070, tau_txt: 0.0067
Epoch: 7 iteration: 200 lr: 0.00026275599969422214 loss: 0.2930781841278076
tau_img: 0.0070, tau_txt: 0.0067
Epoch: 7 iteration: 250 lr: 0.00026275599969422214 loss: 0.14736725389957428
tau_img: 0.0070, tau_txt: 0.0067
Computing features for evaluation...
Computing features for evaluation...
Epoch: 7
coco val: {'txt_r1': 17.88, 'txt_r5': 40.54, 'txt_r10': 52.78, 'txt_r_mean': 37.06666666666667, 'img_r1': 11.571371451419433, 'img_r5': 30.023990403838464, 'img_r10': 41.543382646941225, 'img_r_mean': 27.71291483406638, 'r_mean': 32.38979075036652}
coco test: {'txt_r1': 18.14, 'txt_r5': 39.58, 'txt_r10': 51.58, 'txt_r_mean': 36.43333333333333, 'img_r1': 12.167133146741303, 'img_r5': 30.851659336265495, 'img_r10': 42.4390243902439, 'img_r_mean': 28.485938957750232, 'r_mean': 32.45963614554178}
Epoch: 8 iteration: 0 lr: 0.0002520239379220344 loss: -0.36706972122192383
tau_img: 0.0068, tau_txt: 0.0065
Epoch: 8 iteration: 50 lr: 0.0002520239379220344 loss: -0.229108527302742
tau_img: 0.0068, tau_txt: 0.0065
Epoch: 8 iteration: 100 lr: 0.0002520239379220344 loss: 0.31043940782546997
tau_img: 0.0068, tau_txt: 0.0065
Epoch: 8 iteration: 150 lr: 0.0002520239379220344 loss: 0.00404047966003418
tau_img: 0.0069, tau_txt: 0.0066
Epoch: 8 iteration: 200 lr: 0.0002520239379220344 loss: -0.24809685349464417
tau_img: 0.0069, tau_txt: 0.0066
Epoch: 8 iteration: 250 lr: 0.0002520239379220344 loss: -0.2770186960697174
tau_img: 0.0068, tau_txt: 0.0065
Computing features for evaluation...
Computing features for evaluation...
Epoch: 8
coco val: {'txt_r1': 16.92, 'txt_r5': 38.66, 'txt_r10': 51.2, 'txt_r_mean': 35.593333333333334, 'img_r1': 11.38344662135146, 'img_r5': 29.760095961615352, 'img_r10': 41.63934426229508, 'img_r_mean': 27.5942956150873, 'r_mean': 31.59381447421032}
coco test: {'txt_r1': 17.36, 'txt_r5': 38.22, 'txt_r10': 50.44, 'txt_r_mean': 35.339999999999996, 'img_r1': 11.82327069172331, 'img_r5': 30.3718512594962, 'img_r10': 41.74330267892843, 'img_r_mean': 27.979474876715983, 'r_mean': 31.65973743835799}
Epoch: 9 iteration: 0 lr: 0.00024022886158240857 loss: -0.7354167699813843
tau_img: 0.0067, tau_txt: 0.0064
Epoch: 9 iteration: 50 lr: 0.00024022886158240857 loss: -0.14618906378746033
tau_img: 0.0067, tau_txt: 0.0064
Epoch: 9 iteration: 100 lr: 0.00024022886158240857 loss: 0.12334905564785004
tau_img: 0.0066, tau_txt: 0.0064
Epoch: 9 iteration: 150 lr: 0.00024022886158240857 loss: -0.45143190026283264
tau_img: 0.0067, tau_txt: 0.0065
Epoch: 9 iteration: 200 lr: 0.00024022886158240857 loss: 0.06901969015598297
tau_img: 0.0067, tau_txt: 0.0065
Epoch: 9 iteration: 250 lr: 0.00024022886158240857 loss: 0.02915862947702408
tau_img: 0.0067, tau_txt: 0.0064
Computing features for evaluation...
Computing features for evaluation...
Epoch: 9
coco val: {'txt_r1': 17.24, 'txt_r5': 39.68, 'txt_r10': 52.52, 'txt_r_mean': 36.48, 'img_r1': 11.943222710915634, 'img_r5': 30.279888044782087, 'img_r10': 42.059176329468215, 'img_r_mean': 28.094095695055312, 'r_mean': 32.28704784752765}
coco test: {'txt_r1': 17.64, 'txt_r5': 39.44, 'txt_r10': 50.9, 'txt_r_mean': 35.99333333333333, 'img_r1': 11.975209916033586, 'img_r5': 30.463814474210317, 'img_r10': 41.97920831667333, 'img_r_mean': 28.13941090230574, 'r_mean': 32.06637211781954}
Epoch: 10 iteration: 0 lr: 0.00022749999999999997 loss: -0.9465005993843079
tau_img: 0.0066, tau_txt: 0.0064
Epoch: 10 iteration: 50 lr: 0.00022749999999999997 loss: -0.1919674426317215
tau_img: 0.0066, tau_txt: 0.0064
Epoch: 10 iteration: 100 lr: 0.00022749999999999997 loss: 0.0656488761305809
tau_img: 0.0066, tau_txt: 0.0063
Epoch: 10 iteration: 150 lr: 0.00022749999999999997 loss: 0.15473569929599762
tau_img: 0.0066, tau_txt: 0.0063
Epoch: 10 iteration: 200 lr: 0.00022749999999999997 loss: 0.048671215772628784
tau_img: 0.0066, tau_txt: 0.0064
Epoch: 10 iteration: 250 lr: 0.00022749999999999997 loss: 0.05919775739312172
tau_img: 0.0066, tau_txt: 0.0063
Computing features for evaluation...
Computing features for evaluation...
Epoch: 10
coco val: {'txt_r1': 17.54, 'txt_r5': 39.96, 'txt_r10': 52.46, 'txt_r_mean': 36.653333333333336, 'img_r1': 12.039184326269492, 'img_r5': 30.89564174330268, 'img_r10': 42.55897640943623, 'img_r_mean': 28.497934159669466, 'r_mean': 32.5756337465014}
coco test: {'txt_r1': 17.24, 'txt_r5': 38.94, 'txt_r10': 51.24, 'txt_r_mean': 35.806666666666665, 'img_r1': 12.191123550579768, 'img_r5': 30.947620951619353, 'img_r10': 42.958816473410636, 'img_r_mean': 28.69918699186992, 'r_mean': 32.25292682926829}
Epoch: 11 iteration: 0 lr: 0.00021397681324599103 loss: -0.8527200222015381
tau_img: 0.0066, tau_txt: 0.0064
Epoch: 11 iteration: 50 lr: 0.00021397681324599103 loss: -0.310724675655365
tau_img: 0.0066, tau_txt: 0.0064
Epoch: 11 iteration: 100 lr: 0.00021397681324599103 loss: -0.18071337044239044
tau_img: 0.0066, tau_txt: 0.0064
Epoch: 11 iteration: 150 lr: 0.00021397681324599103 loss: -0.15896828472614288
tau_img: 0.0067, tau_txt: 0.0064
Epoch: 11 iteration: 200 lr: 0.00021397681324599103 loss: 0.125459223985672
tau_img: 0.0066, tau_txt: 0.0064
Epoch: 11 iteration: 250 lr: 0.00021397681324599103 loss: 0.005948394536972046
tau_img: 0.0066, tau_txt: 0.0064
Computing features for evaluation...
Computing features for evaluation...
Epoch: 11
coco val: {'txt_r1': 18.22, 'txt_r5': 40.72, 'txt_r10': 53.08, 'txt_r_mean': 37.339999999999996, 'img_r1': 12.367053178728508, 'img_r5': 31.231507397041185, 'img_r10': 42.890843662534984, 'img_r_mean': 28.829801412768223, 'r_mean': 33.08490070638411}
coco test: {'txt_r1': 19.12, 'txt_r5': 40.38, 'txt_r10': 52.52, 'txt_r_mean': 37.34, 'img_r1': 12.29908036785286, 'img_r5': 31.215513794482206, 'img_r10': 43.082766893242706, 'img_r_mean': 28.865787018525925, 'r_mean': 33.10289350926296}
Epoch: 12 iteration: 0 lr: 0.00019980746418436736 loss: -0.8759943246841431
tau_img: 0.0066, tau_txt: 0.0064
Epoch: 12 iteration: 50 lr: 0.00019980746418436736 loss: -0.6733912229537964
tau_img: 0.0067, tau_txt: 0.0064
Epoch: 12 iteration: 100 lr: 0.00019980746418436736 loss: 0.007951691746711731
tau_img: 0.0066, tau_txt: 0.0064
Epoch: 12 iteration: 150 lr: 0.00019980746418436736 loss: -0.27293896675109863
tau_img: 0.0066, tau_txt: 0.0064
Epoch: 12 iteration: 200 lr: 0.00019980746418436736 loss: -0.604184627532959
tau_img: 0.0067, tau_txt: 0.0065
Epoch: 12 iteration: 250 lr: 0.00019980746418436736 loss: -0.08432623744010925
tau_img: 0.0066, tau_txt: 0.0064
Computing features for evaluation...
Computing features for evaluation...
Epoch: 12
coco val: {'txt_r1': 18.26, 'txt_r5': 40.38, 'txt_r10': 53.12, 'txt_r_mean': 37.25333333333333, 'img_r1': 12.522990803678528, 'img_r5': 31.70731707317073, 'img_r10': 43.122750899640145, 'img_r_mean': 29.117686258829803, 'r_mean': 33.18550979608157}
coco test: {'txt_r1': 17.34, 'txt_r5': 39.08, 'txt_r10': 52.32, 'txt_r_mean': 36.24666666666667, 'img_r1': 12.798880447820872, 'img_r5': 31.759296281487405, 'img_r10': 43.05477808876449, 'img_r_mean': 29.204318272690927, 'r_mean': 32.7254924696788}
Epoch: 13 iteration: 0 lr: 0.00018514719516857505 loss: -1.3101189136505127
tau_img: 0.0069, tau_txt: 0.0066
Epoch: 13 iteration: 50 lr: 0.00018514719516857505 loss: -0.5373433828353882
tau_img: 0.0068, tau_txt: 0.0065
Epoch: 13 iteration: 100 lr: 0.00018514719516857505 loss: -0.2286771833896637
tau_img: 0.0068, tau_txt: 0.0065
Epoch: 13 iteration: 150 lr: 0.00018514719516857505 loss: -0.17678964138031006
tau_img: 0.0067, tau_txt: 0.0064
Epoch: 13 iteration: 200 lr: 0.00018514719516857505 loss: -0.24495404958724976
tau_img: 0.0068, tau_txt: 0.0066
Epoch: 13 iteration: 250 lr: 0.00018514719516857505 loss: -0.5934573411941528
tau_img: 0.0068, tau_txt: 0.0066
Computing features for evaluation...
Computing features for evaluation...
Epoch: 13
coco val: {'txt_r1': 19.1, 'txt_r5': 40.84, 'txt_r10': 53.04, 'txt_r_mean': 37.660000000000004, 'img_r1': 12.538984406237505, 'img_r5': 31.36345461815274, 'img_r10': 42.94282287085166, 'img_r_mean': 28.9484206317473, 'r_mean': 33.304210315873654}
coco test: {'txt_r1': 18.26, 'txt_r5': 40.74, 'txt_r10': 53.12, 'txt_r_mean': 37.373333333333335, 'img_r1': 12.810875649740105, 'img_r5': 31.955217912834865, 'img_r10': 43.398640543782484, 'img_r_mean': 29.388244702119152, 'r_mean': 33.380789017726244}
Epoch: 14 iteration: 0 lr: 0.00017015662717380974 loss: -1.136932611465454
tau_img: 0.0069, tau_txt: 0.0067
Epoch: 14 iteration: 50 lr: 0.00017015662717380974 loss: -1.2352209091186523
tau_img: 0.0071, tau_txt: 0.0068
Epoch: 14 iteration: 100 lr: 0.00017015662717380974 loss: -0.3656700551509857
tau_img: 0.0069, tau_txt: 0.0067
Epoch: 14 iteration: 150 lr: 0.00017015662717380974 loss: -0.7482412457466125
tau_img: 0.0068, tau_txt: 0.0066
Epoch: 14 iteration: 200 lr: 0.00017015662717380974 loss: -0.6269024014472961
tau_img: 0.0070, tau_txt: 0.0068
Epoch: 14 iteration: 250 lr: 0.00017015662717380974 loss: -0.8550422191619873
tau_img: 0.0070, tau_txt: 0.0067
Computing features for evaluation...
Computing features for evaluation...
Epoch: 14
coco val: {'txt_r1': 18.52, 'txt_r5': 39.8, 'txt_r10': 52.78, 'txt_r_mean': 37.03333333333333, 'img_r1': 12.758896441423431, 'img_r5': 32.279088364654136, 'img_r10': 44.17433026789284, 'img_r_mean': 29.73743835799014, 'r_mean': 33.38538584566174}
coco test: {'txt_r1': 17.52, 'txt_r5': 39.96, 'txt_r10': 51.94, 'txt_r_mean': 36.473333333333336, 'img_r1': 12.902838864454218, 'img_r5': 31.887245101959216, 'img_r10': 43.750499800079965, 'img_r_mean': 29.513527922164467, 'r_mean': 32.9934306277489}
Epoch: 15 iteration: 0 lr: 0.000155 loss: -1.8559613227844238
tau_img: 0.0072, tau_txt: 0.0069
Epoch: 15 iteration: 50 lr: 0.000155 loss: -1.2427170276641846
tau_img: 0.0073, tau_txt: 0.0070
Epoch: 15 iteration: 100 lr: 0.000155 loss: -1.1395246982574463
tau_img: 0.0072, tau_txt: 0.0070
Epoch: 15 iteration: 150 lr: 0.000155 loss: -1.4752817153930664
tau_img: 0.0072, tau_txt: 0.0069
Epoch: 15 iteration: 200 lr: 0.000155 loss: -1.8828952312469482
tau_img: 0.0072, tau_txt: 0.0070
Epoch: 15 iteration: 250 lr: 0.000155 loss: -1.181127905845642
tau_img: 0.0072, tau_txt: 0.0070
Computing features for evaluation...
Computing features for evaluation...
Epoch: 15
coco val: {'txt_r1': 19.9, 'txt_r5': 43.36, 'txt_r10': 55.22, 'txt_r_mean': 39.49333333333333, 'img_r1': 13.478608556577369, 'img_r5': 32.810875649740105, 'img_r10': 44.40223910435826, 'img_r_mean': 30.230574436891914, 'r_mean': 34.86195388511263}
coco test: {'txt_r1': 19.58, 'txt_r5': 43.1, 'txt_r10': 54.84, 'txt_r_mean': 39.17333333333334, 'img_r1': 13.642542982806876, 'img_r5': 33.218712514994, 'img_r10': 44.718112754898044, 'img_r_mean': 30.526456084232976, 'r_mean': 34.849894708783154}
Epoch: 16 iteration: 0 lr: 0.00013984337282619026 loss: -2.054107189178467
tau_img: 0.0073, tau_txt: 0.0072
Epoch: 16 iteration: 50 lr: 0.00013984337282619026 loss: -1.3603992462158203
tau_img: 0.0073, tau_txt: 0.0071
Epoch: 16 iteration: 100 lr: 0.00013984337282619026 loss: -1.8992851972579956
tau_img: 0.0074, tau_txt: 0.0071
Epoch: 16 iteration: 150 lr: 0.00013984337282619026 loss: -1.8692710399627686
tau_img: 0.0074, tau_txt: 0.0072
Epoch: 16 iteration: 200 lr: 0.00013984337282619026 loss: -1.7104038000106812
tau_img: 0.0075, tau_txt: 0.0072
Epoch: 16 iteration: 250 lr: 0.00013984337282619026 loss: -1.380126953125
tau_img: 0.0073, tau_txt: 0.0071
Computing features for evaluation...
Computing features for evaluation...
Epoch: 16
coco val: {'txt_r1': 20.58, 'txt_r5': 43.24, 'txt_r10': 55.3, 'txt_r_mean': 39.70666666666667, 'img_r1': 13.15873650539784, 'img_r5': 32.99480207916833, 'img_r10': 44.586165533786485, 'img_r_mean': 30.24656803945088, 'r_mean': 34.97661735305878}
coco test: {'txt_r1': 19.36, 'txt_r5': 42.4, 'txt_r10': 54.48, 'txt_r_mean': 38.74666666666666, 'img_r1': 13.666533386645343, 'img_r5': 33.2546981207517, 'img_r10': 44.65413834466214, 'img_r_mean': 30.525123284019724, 'r_mean': 34.63589497534319}
Epoch: 17 iteration: 0 lr: 0.00012485280483142487 loss: -2.5637669563293457
tau_img: 0.0075, tau_txt: 0.0073
Epoch: 17 iteration: 50 lr: 0.00012485280483142487 loss: -2.191415309906006
tau_img: 0.0078, tau_txt: 0.0075
Epoch: 17 iteration: 100 lr: 0.00012485280483142487 loss: -2.321763515472412
tau_img: 0.0077, tau_txt: 0.0074
Epoch: 17 iteration: 150 lr: 0.00012485280483142487 loss: -1.8449326753616333
tau_img: 0.0075, tau_txt: 0.0073
Epoch: 17 iteration: 200 lr: 0.00012485280483142487 loss: -2.31805157661438
tau_img: 0.0077, tau_txt: 0.0075
Epoch: 17 iteration: 250 lr: 0.00012485280483142487 loss: -2.372451066970825
tau_img: 0.0075, tau_txt: 0.0073
Computing features for evaluation...
Computing features for evaluation...
Epoch: 17
coco val: {'txt_r1': 19.98, 'txt_r5': 42.34, 'txt_r10': 54.76, 'txt_r_mean': 39.02666666666667, 'img_r1': 13.554578168732506, 'img_r5': 33.04278288684526, 'img_r10': 44.470211915233904, 'img_r_mean': 30.35585765693722, 'r_mean': 34.691262161801944}
coco test: {'txt_r1': 19.42, 'txt_r5': 42.38, 'txt_r10': 54.96, 'txt_r_mean': 38.92, 'img_r1': 13.838464614154338, 'img_r5': 33.334666133546584, 'img_r10': 44.92602958816473, 'img_r_mean': 30.69972011195522, 'r_mean': 34.80986005597761}
Epoch: 18 iteration: 0 lr: 0.00011019253581563262 loss: -4.260552406311035
tau_img: 0.0081, tau_txt: 0.0079
Epoch: 18 iteration: 50 lr: 0.00011019253581563262 loss: -2.9299917221069336
tau_img: 0.0081, tau_txt: 0.0078
Epoch: 18 iteration: 100 lr: 0.00011019253581563262 loss: -3.3400635719299316
tau_img: 0.0080, tau_txt: 0.0077
Epoch: 18 iteration: 150 lr: 0.00011019253581563262 loss: -3.453747510910034
tau_img: 0.0079, tau_txt: 0.0077
Epoch: 18 iteration: 200 lr: 0.00011019253581563262 loss: -3.1733462810516357
tau_img: 0.0081, tau_txt: 0.0078
Epoch: 18 iteration: 250 lr: 0.00011019253581563262 loss: -2.6329762935638428
tau_img: 0.0079, tau_txt: 0.0076
Computing features for evaluation...
Computing features for evaluation...
Epoch: 18
coco val: {'txt_r1': 20.76, 'txt_r5': 43.36, 'txt_r10': 55.6, 'txt_r_mean': 39.906666666666666, 'img_r1': 14.226309476209517, 'img_r5': 33.80647740903638, 'img_r10': 45.27788884446222, 'img_r_mean': 31.103558576569373, 'r_mean': 35.50511262161802}
coco test: {'txt_r1': 20.6, 'txt_r5': 43.26, 'txt_r10': 55.16, 'txt_r_mean': 39.67333333333333, 'img_r1': 14.406237504998002, 'img_r5': 34.25029988004798, 'img_r10': 45.76169532187125, 'img_r_mean': 31.47274423563908, 'r_mean': 35.57303878448621}
Epoch: 19 iteration: 0 lr: 9.602318675400897e-05 loss: -4.915426254272461
tau_img: 0.0085, tau_txt: 0.0082
Epoch: 19 iteration: 50 lr: 9.602318675400897e-05 loss: -3.8118224143981934
tau_img: 0.0083, tau_txt: 0.0082
Epoch: 19 iteration: 100 lr: 9.602318675400897e-05 loss: -3.6978960037231445
tau_img: 0.0083, tau_txt: 0.0080
Epoch: 19 iteration: 150 lr: 9.602318675400897e-05 loss: -3.7106001377105713
tau_img: 0.0082, tau_txt: 0.0080
Epoch: 19 iteration: 200 lr: 9.602318675400897e-05 loss: -4.195495128631592
tau_img: 0.0083, tau_txt: 0.0080
Epoch: 19 iteration: 250 lr: 9.602318675400897e-05 loss: -4.262701034545898
tau_img: 0.0083, tau_txt: 0.0081
Computing features for evaluation...
Computing features for evaluation...
Epoch: 19
coco val: {'txt_r1': 19.98, 'txt_r5': 43.22, 'txt_r10': 55.22, 'txt_r_mean': 39.473333333333336, 'img_r1': 14.058376649340264, 'img_r5': 33.310675729708116, 'img_r10': 44.96601359456218, 'img_r_mean': 30.778355324536857, 'r_mean': 35.125844328935095}
coco test: {'txt_r1': 19.94, 'txt_r5': 43.16, 'txt_r10': 55.72, 'txt_r_mean': 39.60666666666666, 'img_r1': 13.94642143142743, 'img_r5': 33.65053978408636, 'img_r10': 45.33386645341863, 'img_r_mean': 30.976942556310807, 'r_mean': 35.291804611488736}
Epoch: 20 iteration: 0 lr: 8.250000000000001e-05 loss: -4.490512371063232
tau_img: 0.0085, tau_txt: 0.0084
Epoch: 20 iteration: 50 lr: 8.250000000000001e-05 loss: -5.540229320526123
tau_img: 0.0088, tau_txt: 0.0085
Epoch: 20 iteration: 100 lr: 8.250000000000001e-05 loss: -5.427042484283447
tau_img: 0.0088, tau_txt: 0.0085
Epoch: 20 iteration: 150 lr: 8.250000000000001e-05 loss: -5.009304046630859
tau_img: 0.0087, tau_txt: 0.0085
Epoch: 20 iteration: 200 lr: 8.250000000000001e-05 loss: -5.154559135437012
tau_img: 0.0088, tau_txt: 0.0084
Epoch: 20 iteration: 250 lr: 8.250000000000001e-05 loss: -5.245851993560791
tau_img: 0.0087, tau_txt: 0.0085
Computing features for evaluation...
Computing features for evaluation...
Epoch: 20
coco val: {'txt_r1': 21.16, 'txt_r5': 43.64, 'txt_r10': 55.96, 'txt_r_mean': 40.25333333333333, 'img_r1': 13.914434226309476, 'img_r5': 33.954418232706914, 'img_r10': 45.64574170331867, 'img_r_mean': 31.171531387445018, 'r_mean': 35.71243236038917}
coco test: {'txt_r1': 20.46, 'txt_r5': 43.9, 'txt_r10': 55.6, 'txt_r_mean': 39.98666666666667, 'img_r1': 14.166333466613354, 'img_r5': 34.44622151139544, 'img_r10': 45.7936825269892, 'img_r_mean': 31.46874583499933, 'r_mean': 35.727706250833}
Epoch: 21 iteration: 0 lr: 6.97711384175914e-05 loss: -6.665648460388184
tau_img: 0.0093, tau_txt: 0.0089
Epoch: 21 iteration: 50 lr: 6.97711384175914e-05 loss: -5.873527526855469
tau_img: 0.0089, tau_txt: 0.0088
Epoch: 21 iteration: 100 lr: 6.97711384175914e-05 loss: -6.627588272094727
tau_img: 0.0091, tau_txt: 0.0090
Epoch: 21 iteration: 150 lr: 6.97711384175914e-05 loss: -6.532419204711914
tau_img: 0.0093, tau_txt: 0.0091
Epoch: 21 iteration: 200 lr: 6.97711384175914e-05 loss: -6.612300395965576
tau_img: 0.0092, tau_txt: 0.0090
Epoch: 21 iteration: 250 lr: 6.97711384175914e-05 loss: -5.026062965393066
tau_img: 0.0088, tau_txt: 0.0085
Computing features for evaluation...
Computing features for evaluation...
Epoch: 21
coco val: {'txt_r1': 21.2, 'txt_r5': 42.88, 'txt_r10': 55.18, 'txt_r_mean': 39.75333333333333, 'img_r1': 13.858456617353058, 'img_r5': 33.5265893642543, 'img_r10': 45.12994802079168, 'img_r_mean': 30.838331334133013, 'r_mean': 35.29583233373317}
coco test: {'txt_r1': 19.56, 'txt_r5': 42.92, 'txt_r10': 54.92, 'txt_r_mean': 39.13333333333333, 'img_r1': 14.082367053178729, 'img_r5': 33.506597361055576, 'img_r10': 45.16993202718913, 'img_r_mean': 30.919632147141144, 'r_mean': 35.02648274023724}
Epoch: 22 iteration: 0 lr: 5.797606207796559e-05 loss: -7.0506157875061035
tau_img: 0.0095, tau_txt: 0.0091
Epoch: 22 iteration: 50 lr: 5.797606207796559e-05 loss: -7.07581901550293
tau_img: 0.0093, tau_txt: 0.0090
Epoch: 22 iteration: 100 lr: 5.797606207796559e-05 loss: -7.153095245361328
tau_img: 0.0096, tau_txt: 0.0093
Epoch: 22 iteration: 150 lr: 5.797606207796559e-05 loss: -7.888920307159424
tau_img: 0.0096, tau_txt: 0.0094
Epoch: 22 iteration: 200 lr: 5.797606207796559e-05 loss: -6.130715847015381
tau_img: 0.0092, tau_txt: 0.0090
Epoch: 22 iteration: 250 lr: 5.797606207796559e-05 loss: -6.484936714172363
tau_img: 0.0093, tau_txt: 0.0089
Computing features for evaluation...
Computing features for evaluation...
Epoch: 22
coco val: {'txt_r1': 20.78, 'txt_r5': 43.78, 'txt_r10': 55.14, 'txt_r_mean': 39.9, 'img_r1': 14.338264694122351, 'img_r5': 34.038384646141544, 'img_r10': 45.71771291483407, 'img_r_mean': 31.36478741836599, 'r_mean': 35.63239370918299}
coco test: {'txt_r1': 20.3, 'txt_r5': 42.74, 'txt_r10': 55.12, 'txt_r_mean': 39.38666666666666, 'img_r1': 14.326269492203119, 'img_r5': 34.3062774890044, 'img_r10': 45.649740103958415, 'img_r_mean': 31.427429028388644, 'r_mean': 35.40704784752765}
Epoch: 23 iteration: 0 lr: 4.724400030577786e-05 loss: -9.242505073547363
tau_img: 0.0100, tau_txt: 0.0099
Epoch: 23 iteration: 50 lr: 4.724400030577786e-05 loss: -8.627782821655273
tau_img: 0.0097, tau_txt: 0.0094
Epoch: 23 iteration: 100 lr: 4.724400030577786e-05 loss: -8.229507446289062
tau_img: 0.0098, tau_txt: 0.0095
Epoch: 23 iteration: 150 lr: 4.724400030577786e-05 loss: -8.095161437988281
tau_img: 0.0101, tau_txt: 0.0099
Epoch: 23 iteration: 200 lr: 4.724400030577786e-05 loss: -7.361606597900391
tau_img: 0.0099, tau_txt: 0.0096
Epoch: 23 iteration: 250 lr: 4.724400030577786e-05 loss: -8.183349609375
tau_img: 0.0096, tau_txt: 0.0095
Computing features for evaluation...
Computing features for evaluation...
Epoch: 23
coco val: {'txt_r1': 20.56, 'txt_r5': 43.82, 'txt_r10': 55.32, 'txt_r_mean': 39.9, 'img_r1': 14.066373450619752, 'img_r5': 33.7984806077569, 'img_r10': 45.86965213914434, 'img_r_mean': 31.244835399173667, 'r_mean': 35.572417699586836}
coco test: {'txt_r1': 19.68, 'txt_r5': 43.02, 'txt_r10': 54.9, 'txt_r_mean': 39.199999999999996, 'img_r1': 14.374250299880048, 'img_r5': 34.16233506597361, 'img_r10': 45.71771291483407, 'img_r_mean': 31.418099426895907, 'r_mean': 35.30904971344795}
Epoch: 24 iteration: 0 lr: 3.769253581563263e-05 loss: -10.245454788208008
tau_img: 0.0102, tau_txt: 0.0099
Epoch: 24 iteration: 50 lr: 3.769253581563263e-05 loss: -9.013447761535645
tau_img: 0.0102, tau_txt: 0.0100
Epoch: 24 iteration: 100 lr: 3.769253581563263e-05 loss: -10.611595153808594
tau_img: 0.0104, tau_txt: 0.0101
Epoch: 24 iteration: 150 lr: 3.769253581563263e-05 loss: -8.743675231933594
tau_img: 0.0102, tau_txt: 0.0102
Epoch: 24 iteration: 200 lr: 3.769253581563263e-05 loss: -8.715897560119629
tau_img: 0.0102, tau_txt: 0.0099
Epoch: 24 iteration: 250 lr: 3.769253581563263e-05 loss: -10.123720169067383
tau_img: 0.0102, tau_txt: 0.0101
Computing features for evaluation...
Computing features for evaluation...
Epoch: 24
coco val: {'txt_r1': 20.3, 'txt_r5': 43.44, 'txt_r10': 54.48, 'txt_r_mean': 39.406666666666666, 'img_r1': 14.110355857656938, 'img_r5': 33.662534986005596, 'img_r10': 45.59776089564174, 'img_r_mean': 31.123550579768093, 'r_mean': 35.26510862321738}
coco test: {'txt_r1': 19.24, 'txt_r5': 42.34, 'txt_r10': 55.02, 'txt_r_mean': 38.86666666666667, 'img_r1': 14.466213514594163, 'img_r5': 33.75449820071971, 'img_r10': 45.529788084766096, 'img_r_mean': 31.250166600026656, 'r_mean': 35.05841663334666}
Epoch: 25 iteration: 0 lr: 2.9426316451256386e-05 loss: -11.852662086486816
tau_img: 0.0108, tau_txt: 0.0105
Epoch: 25 iteration: 50 lr: 2.9426316451256386e-05 loss: -11.105792045593262
tau_img: 0.0108, tau_txt: 0.0105
Epoch: 25 iteration: 100 lr: 2.9426316451256386e-05 loss: -9.328715324401855
tau_img: 0.0103, tau_txt: 0.0100
Epoch: 25 iteration: 150 lr: 2.9426316451256386e-05 loss: -10.47180461883545
tau_img: 0.0105, tau_txt: 0.0101
Epoch: 25 iteration: 200 lr: 2.9426316451256386e-05 loss: -9.260772705078125
tau_img: 0.0104, tau_txt: 0.0103
Epoch: 25 iteration: 250 lr: 2.9426316451256386e-05 loss: -10.207618713378906
tau_img: 0.0103, tau_txt: 0.0102
Computing features for evaluation...
Computing features for evaluation...
Epoch: 25
coco val: {'txt_r1': 20.44, 'txt_r5': 43.78, 'txt_r10': 55.58, 'txt_r_mean': 39.93333333333333, 'img_r1': 14.146341463414634, 'img_r5': 33.81447421031587, 'img_r10': 46.00559776089564, 'img_r_mean': 31.32213781154205, 'r_mean': 35.627735572437686}
coco test: {'txt_r1': 19.66, 'txt_r5': 42.9, 'txt_r10': 55.24, 'txt_r_mean': 39.26666666666667, 'img_r1': 14.47421031587365, 'img_r5': 34.27828868452619, 'img_r10': 45.725709716113556, 'img_r_mean': 31.492736238837796, 'r_mean': 35.379701452752236}
Epoch: 26 iteration: 0 lr: 2.2535908641822855e-05 loss: -10.570426940917969
tau_img: 0.0106, tau_txt: 0.0105
Epoch: 26 iteration: 50 lr: 2.2535908641822855e-05 loss: -11.204402923583984
tau_img: 0.0110, tau_txt: 0.0107
Epoch: 26 iteration: 100 lr: 2.2535908641822855e-05 loss: -12.513148307800293
tau_img: 0.0110, tau_txt: 0.0108
Epoch: 26 iteration: 150 lr: 2.2535908641822855e-05 loss: -11.783784866333008
tau_img: 0.0110, tau_txt: 0.0108
Epoch: 26 iteration: 200 lr: 2.2535908641822855e-05 loss: -11.702966690063477
tau_img: 0.0111, tau_txt: 0.0107
Epoch: 26 iteration: 250 lr: 2.2535908641822855e-05 loss: -11.340032577514648
tau_img: 0.0111, tau_txt: 0.0110
Computing features for evaluation...
Computing features for evaluation...
Epoch: 26
coco val: {'txt_r1': 20.7, 'txt_r5': 43.74, 'txt_r10': 55.58, 'txt_r_mean': 40.00666666666667, 'img_r1': 14.134346261495402, 'img_r5': 33.78248700519792, 'img_r10': 45.657736905237904, 'img_r_mean': 31.19152339064374, 'r_mean': 35.599095028655206}
coco test: {'txt_r1': 19.48, 'txt_r5': 42.86, 'txt_r10': 55.32, 'txt_r_mean': 39.22, 'img_r1': 14.29828068772491, 'img_r5': 33.98640543782487, 'img_r10': 45.55377848860456, 'img_r_mean': 31.27948820471811, 'r_mean': 35.24974410235905}
Epoch: 27 iteration: 0 lr: 1.7096805137202738e-05 loss: -12.180134773254395
tau_img: 0.0114, tau_txt: 0.0113
Epoch: 27 iteration: 50 lr: 1.7096805137202738e-05 loss: -12.57005500793457
tau_img: 0.0112, tau_txt: 0.0110
Epoch: 27 iteration: 100 lr: 1.7096805137202738e-05 loss: -12.195676803588867
tau_img: 0.0115, tau_txt: 0.0113
Epoch: 27 iteration: 150 lr: 1.7096805137202738e-05 loss: -13.575706481933594
tau_img: 0.0116, tau_txt: 0.0113
Epoch: 27 iteration: 200 lr: 1.7096805137202738e-05 loss: -14.225406646728516
tau_img: 0.0115, tau_txt: 0.0113
Epoch: 27 iteration: 250 lr: 1.7096805137202738e-05 loss: -11.519415855407715
tau_img: 0.0113, tau_txt: 0.0111
Computing features for evaluation...
Computing features for evaluation...
Epoch: 27
coco val: {'txt_r1': 20.72, 'txt_r5': 44.04, 'txt_r10': 55.4, 'txt_r_mean': 40.053333333333335, 'img_r1': 14.186325469812076, 'img_r5': 33.71451419432227, 'img_r10': 45.63374650139944, 'img_r_mean': 31.178195388511266, 'r_mean': 35.6157643609223}
coco test: {'txt_r1': 19.42, 'txt_r5': 42.88, 'txt_r10': 55.08, 'txt_r_mean': 39.126666666666665, 'img_r1': 14.50219912035186, 'img_r5': 33.982407037185126, 'img_r10': 45.569772091163536, 'img_r_mean': 31.351459416233507, 'r_mean': 35.23906304145009}
Epoch: 28 iteration: 0 lr: 1.3168597893598175e-05 loss: -14.22984790802002
tau_img: 0.0116, tau_txt: 0.0115
Epoch: 28 iteration: 50 lr: 1.3168597893598175e-05 loss: -12.658186912536621
tau_img: 0.0117, tau_txt: 0.0115
Epoch: 28 iteration: 100 lr: 1.3168597893598175e-05 loss: -14.149580001831055
tau_img: 0.0117, tau_txt: 0.0114
Epoch: 28 iteration: 150 lr: 1.3168597893598175e-05 loss: -14.180305480957031
tau_img: 0.0119, tau_txt: 0.0115
Epoch: 28 iteration: 200 lr: 1.3168597893598175e-05 loss: -14.528634071350098
tau_img: 0.0121, tau_txt: 0.0118
Epoch: 28 iteration: 250 lr: 1.3168597893598175e-05 loss: -14.142889022827148
tau_img: 0.0120, tau_txt: 0.0116
Computing features for evaluation...
Computing features for evaluation...
Epoch: 28
coco val: {'txt_r1': 20.56, 'txt_r5': 43.92, 'txt_r10': 55.18, 'txt_r_mean': 39.88666666666666, 'img_r1': 14.378248700519793, 'img_r5': 33.990403838464616, 'img_r10': 45.81767293082767, 'img_r_mean': 31.39544182327069, 'r_mean': 35.64105424496868}
coco test: {'txt_r1': 19.56, 'txt_r5': 42.92, 'txt_r10': 55.0, 'txt_r_mean': 39.160000000000004, 'img_r1': 14.550179928028788, 'img_r5': 34.11435425829668, 'img_r10': 45.765693722510996, 'img_r_mean': 31.476742636278818, 'r_mean': 35.31837131813941}
Epoch: 29 iteration: 0 lr: 1.0794325171600358e-05 loss: -14.580052375793457
tau_img: 0.0120, tau_txt: 0.0117
Epoch: 29 iteration: 50 lr: 1.0794325171600358e-05 loss: -14.782979965209961
tau_img: 0.0124, tau_txt: 0.0122
Epoch: 29 iteration: 100 lr: 1.0794325171600358e-05 loss: -13.903106689453125
tau_img: 0.0121, tau_txt: 0.0118
Epoch: 29 iteration: 150 lr: 1.0794325171600358e-05 loss: -15.160087585449219
tau_img: 0.0125, tau_txt: 0.0121
Epoch: 29 iteration: 200 lr: 1.0794325171600358e-05 loss: -14.430315017700195
tau_img: 0.0118, tau_txt: 0.0117
Epoch: 29 iteration: 250 lr: 1.0794325171600358e-05 loss: -14.369138717651367
tau_img: 0.0120, tau_txt: 0.0118
Computing features for evaluation...
Computing features for evaluation...
Epoch: 29
coco val: {'txt_r1': 20.42, 'txt_r5': 43.82, 'txt_r10': 55.34, 'txt_r_mean': 39.86000000000001, 'img_r1': 14.234306277489004, 'img_r5': 33.750499800079965, 'img_r10': 45.48180727708917, 'img_r_mean': 31.155537784886047, 'r_mean': 35.507768892443025}
coco test: {'txt_r1': 19.4, 'txt_r5': 42.76, 'txt_r10': 55.08, 'txt_r_mean': 39.08, 'img_r1': 14.434226309476209, 'img_r5': 33.8984406237505, 'img_r10': 45.577768892443025, 'img_r_mean': 31.303478608556578, 'r_mean': 35.191739304278286}

Visualization

Here we demonstrate the training curves of the mean validation recall values for CLIP and iSogCLR.

clip_recall_vals = [9.56793, 26.4037, 29.3343, 29.7682, 30.5586, 30.8398, 30.8938, 31.5624, 30.5864, 31.1057, 31.775, 31.0977, 31.8895, 31.0098, 31.2123, 31.745, 31.8129, 32.0983, 31.163, 31.7523, 32.1975, 32.2302, 32.5441, 32.3621, 32.6608, 32.7541, 32.7528, 32.4954, 32.9994, 32.7601]
isogclr_recall_vals = [10.3831, 22.1452, 26.797, 29.2797, 30.6854, 31.7613, 32.4364, 32.3897, 31.5938, 32.287, 32.5756, 33.0849, 33.1855, 33.3042, 33.3853, 34.8619, 34.9766, 34.6912, 35.5051, 35.1258, 35.7124, 35.2958, 35.6323, 35.5724, 35.2651, 35.6277, 35.599, 35.6157, 35.641, 35.5077]
import matplotlib.pyplot as plt
import numpy as np

epochs = np.arange(1, 31)

plt.plot(epochs, clip_recall_vals, label='CLIP', ls=':', marker='+', color='blue')
plt.plot(epochs, isogclr_recall_vals, label='iSogCLR', marker='*', color='orange')

plt.ylabel('Mean Validation Recall', fontsize=18)
plt.xlabel('Epoch', fontsize=18)

plt.title('CLIP vs. iSogCLR', fontsize=20)
plt.legend(fontsize=20)

plt.show()
../_images/Bimodal_iSogCLR_Tutorial.png