using reduceLRplateau, adam,LRwarmup using in pytorch lightning

465 views Asked by At

I am trying to implement mobilnetv2 in imagenet2012. my model is exactly defaul, and used learning rate warmup and reduceLRplateau as lr scheduler, and adam. but i am having fluctuated learning rate which I am not expecting. is there any wrong in my code?

I have defined warmupLR like this

class WarmUpLR(torch.optim.lr_scheduler._LRScheduler):
    """warmup_training learning rate scheduler
    Args:
        optimizer: optimzier(e.g. SGD)
        total_iters: totoal_iters of warmup phase
    """
    def __init__(self, optimizer, total_iters, last_epoch=-1, verbose=False):

        self.total_iters = total_iters
        super(WarmUpLR, self).__init__(optimizer, last_epoch, verbose)

    def get_lr(self):
        """we will use the first m batches, and set the learning
        rate to base_lr * m / total_iters
        """
        return [base_lr * self.last_epoch / (self.total_iters + 1e-8) for base_lr in self.base_lrs]

configure optimizer part

def configure_optimizers(self):
        optimizer = torch.optim.Adam(
            self.parameters(),
            weight_decay = 0.0005, 
            betas=(0.9, 0.999),
            lr=0.001,
            eps = 1e-8
        )
        reduce_lr_plateau = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, verbose=True)
        warmup = WarmUpLR(optimizer, total_iters=len(self.train_dataloader()) * self.warmup_step)
        return [optimizer], [{
            'scheduler': reduce_lr_plateau,
            'monitor': 'val_loss', 
        }, {    
            'scheduler': warmup,
        }]

training_step part

def training_step(self, batch, batch_idx): # iteration 단위로 해당 함수가 호출된다. 
        #   args는 파라미터들을 list 형태로 받는다.
        #   kwargs는 파라미터들을 dict 형태로 받는다.
        optimizer = self.optimizers() # bring the optimizer
        if self.warmup_step != 0 :
            _, warmup = self.lr_schedulers() # bring the scheduler(warmup)
        
        x, y = batch    #   batch => (inputs, labels)
        logits = self.model(x)
        loss = self.loss(logits, y)
        optimizer.zero_grad()
        # self.manual_backward(loss)
        optimizer.step()
        
        self.log('train_loss', loss, sync_dist=True)
        self.log('train_step_metric', self.train_acc(logits, y), sync_dist=True)
        if self.warmup_step !=0:
            if self.current_epoch < self.warmup_step:
                warmup.step()
        return loss

validation_epoch_end part

def on_validation_epoch_end(self):
        self.log('val_epoch_metric', self.val_acc, sync_dist=True)
        if self.current_epoch >= self.warmup_step:
            if self.warmup_step !=0:
                scheduler, _= self.lr_schedulers()
            else:
                scheduler = self.lr_schedulers()
            
            scheduler.step(self.trainer.callback_metrics['val_loss'])
            
        return super().on_validation_epoch_end()

the learning rate wandb I don't think the learning rate sould go like this but decrease gradually without jumping up... enter image description here

0

There are 0 answers