I am trying to implement mobilnetv2 in imagenet2012. my model is exactly defaul, and used learning rate warmup and reduceLRplateau as lr scheduler, and adam. but i am having fluctuated learning rate which I am not expecting. is there any wrong in my code?
I have defined warmupLR like this
class WarmUpLR(torch.optim.lr_scheduler._LRScheduler):
"""warmup_training learning rate scheduler
Args:
optimizer: optimzier(e.g. SGD)
total_iters: totoal_iters of warmup phase
"""
def __init__(self, optimizer, total_iters, last_epoch=-1, verbose=False):
self.total_iters = total_iters
super(WarmUpLR, self).__init__(optimizer, last_epoch, verbose)
def get_lr(self):
"""we will use the first m batches, and set the learning
rate to base_lr * m / total_iters
"""
return [base_lr * self.last_epoch / (self.total_iters + 1e-8) for base_lr in self.base_lrs]
configure optimizer part
def configure_optimizers(self):
optimizer = torch.optim.Adam(
self.parameters(),
weight_decay = 0.0005,
betas=(0.9, 0.999),
lr=0.001,
eps = 1e-8
)
reduce_lr_plateau = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, verbose=True)
warmup = WarmUpLR(optimizer, total_iters=len(self.train_dataloader()) * self.warmup_step)
return [optimizer], [{
'scheduler': reduce_lr_plateau,
'monitor': 'val_loss',
}, {
'scheduler': warmup,
}]
training_step part
def training_step(self, batch, batch_idx): # iteration 단위로 해당 함수가 호출된다.
# args는 파라미터들을 list 형태로 받는다.
# kwargs는 파라미터들을 dict 형태로 받는다.
optimizer = self.optimizers() # bring the optimizer
if self.warmup_step != 0 :
_, warmup = self.lr_schedulers() # bring the scheduler(warmup)
x, y = batch # batch => (inputs, labels)
logits = self.model(x)
loss = self.loss(logits, y)
optimizer.zero_grad()
# self.manual_backward(loss)
optimizer.step()
self.log('train_loss', loss, sync_dist=True)
self.log('train_step_metric', self.train_acc(logits, y), sync_dist=True)
if self.warmup_step !=0:
if self.current_epoch < self.warmup_step:
warmup.step()
return loss
validation_epoch_end part
def on_validation_epoch_end(self):
self.log('val_epoch_metric', self.val_acc, sync_dist=True)
if self.current_epoch >= self.warmup_step:
if self.warmup_step !=0:
scheduler, _= self.lr_schedulers()
else:
scheduler = self.lr_schedulers()
scheduler.step(self.trainer.callback_metrics['val_loss'])
return super().on_validation_epoch_end()
the learning rate wandb
I don't think the learning rate sould go like this but decrease gradually without jumping up...
