I encountered an issue while reproducing the code for mmdetection, warning me that the _sigmoid_focal_loss function contains an inplace operation. (Actually this problem have little and little influence about mmdetection. it is a probelm about my code have inplace operation but I cannot find it.) here is my version:
mmcv-1.4.5
mmdetection-2.19.0
torch-1.10.1
And I installed it by MMCV_WITH_OPS=1 pip install -e .
So I am very sure it is not caused by version.
I put torch.autograd.set_detect_anomaly(True)
at the begining of train.py
And it said:
`
[W python_anomaly_mode.cpp:104] Warning: Error detected in SigmoidFocalLossFunctionBackward. Traceback of forward call that caused the error:
File "/home/aaa/disk1/CalibratedTeacher-main/tools/train.py", line 199, in <module>
main()
File "/home/aaa/disk1/CalibratedTeacher-main/tools/train.py", line 188, in main
train_detector(
File "/home/aaa/disk1/CalibratedTeacher-main/src/apis/train.py", line 191, in train_detector
runner.run(data_loaders, cfg.workflow)
File "/home/aaa/disk1/mmcv-1.4.5/mmcv/runner/iter_based_runner.py", line 134, in run
iter_runner(iter_loaders[i], **kwargs)
File "/home/aaa/disk1/mmcv-1.4.5/mmcv/runner/iter_based_runner.py", line 61, in train
outputs = self.model.train_step(data_batch, self.optimizer, **kwargs)
File "/home/aaa/disk1/mmcv-1.4.5/mmcv/parallel/data_parallel.py", line 75, in train_step
return self.module.train_step(*inputs[0], **kwargs[0])
File "/home/aaa/disk1/mmdetection-2.19.0/mmdet/models/detectors/base.py", line 248, in train_step
losses = self(**data)
File "/home/aaa/anaconda3/envs/calibrate_teacher1/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/home/aaa/disk1/mmcv-1.4.5/mmcv/runner/fp16_utils.py", line 139, in new_func
output = old_func(*new_args, **new_kwargs)
File "/home/aaa/disk1/mmdetection-2.19.0/mmdet/models/detectors/base.py", line 172, in forward
return self.forward_train(img, img_metas, **kwargs)
File "/home/aaa/disk1/CalibratedTeacher-main/src/models/cali_read_and_cali_full.py", line 185, in forward_train
losses = self.student.forward_train(**data_groups["strong"])
File "/home/aaa/disk1/mmdetection-2.19.0/mmdet/models/detectors/single_stage.py", line 83, in forward_train
losses = self.bbox_head.forward_train(x, img_metas, gt_bboxes,
File "/home/aaa/disk1/CalibratedTeacher-main/src/models/retinahead_adaptnegweiht2_focaliou.py", line 178, in forward_train
losses = self.loss(*loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
File "/home/aaa/disk1/mmcv-1.4.5/mmcv/runner/fp16_utils.py", line 225, in new_func
output = old_func(*new_args, **new_kwargs)
File "/home/aaa/disk1/CalibratedTeacher-main/src/models/retinahead_adaptnegweiht2_focaliou.py", line 345, in loss
losses_cls, losses_bbox = multi_apply(
File "/home/aaa/disk1/mmdetection-2.19.0/mmdet/core/utils/misc.py", line 30, in multi_apply
return tuple(map(list, zip(*map_results)))
File "/home/aaa/disk1/CalibratedTeacher-main/src/models/retinahead_adaptnegweiht2_focaliou.py", line 223, in loss_single
loss_cls = self.loss_cls(
File "/home/aaa/anaconda3/envs/calibrate_teacher1/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/home/aaa/disk1/mmdetection-2.19.0/mmdet/models/losses/focal_loss.py", line 178, in forward
loss_cls = self.loss_weight * calculate_loss_func(
File "/home/aaa/disk1/mmdetection-2.19.0/mmdet/models/losses/focal_loss.py", line 92, in sigmoid_focal_loss
loss = _sigmoid_focal_loss(pred.contiguous(), target.contiguous(), gamma,
(function _print_stack)
Traceback (most recent call last):
File "/home/aaa/disk1/CalibratedTeacher-main/tools/train.py", line 199, in <module>
main()
File "/home/aaa/disk1/CalibratedTeacher-main/tools/train.py", line 188, in main
train_detector(
File "/home/aaa/disk1/CalibratedTeacher-main/src/apis/train.py", line 191, in train_detector
runner.run(data_loaders, cfg.workflow)
File "/home/aaa/disk1/mmcv-1.4.5/mmcv/runner/iter_based_runner.py", line 134, in run
iter_runner(iter_loaders[i], **kwargs)
File "/home/aaa/disk1/mmcv-1.4.5/mmcv/runner/iter_based_runner.py", line 67, in train
self.call_hook('after_train_iter')
File "/home/aaa/disk1/mmcv-1.4.5/mmcv/runner/base_runner.py", line 309, in call_hook
getattr(hook, fn_name)(self)
File "/home/aaa/disk1/mmcv-1.4.5/mmcv/runner/hooks/optimizer.py", line 272, in after_train_iter
self.loss_scaler.scale(runner.outputs['loss']).backward()
File "/home/aaa/anaconda3/envs/calibrate_teacher1/lib/python3.8/site-packages/torch/_tensor.py", line 307, in backward
torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)
File "/home/aaa/anaconda3/envs/calibrate_teacher1/lib/python3.8/site-packages/torch/autograd/__init__.py", line 154, in backward
Variable._execution_engine.run_backward(
File "/home/aaa/anaconda3/envs/calibrate_teacher1/lib/python3.8/site-packages/torch/autograd/function.py", line 199, in apply
return user_fn(self, *args)
File "/home/aaa/anaconda3/envs/calibrate_teacher1/lib/python3.8/site-packages/torch/autograd/function.py", line 340, in wrapper
outputs = fn(ctx, *args)
File "/home/aaa/disk1/mmcv-1.4.5/mmcv/ops/focal_loss.py", line 73, in backward
input, target, weight = ctx.saved_tensors
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.cuda.LongTensor [729]] is at version 5; expected version 4 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in qu
estion was changed in there or anywhere later. Good luck! `
So, I opened up /home/aaa/disk1/mmdetection-2.19.0/mmdet/models/losses/focal_loss.py
and I find it have many inplace operation.
But after I fix this problem, it still have this problem.
Here is the code after I fixed it:
`
# Copyright (c) OpenMMLab. All rights reserved.
import torch
import torch.nn as nn
from torch.autograd import Function
from torch.autograd.function import once_differentiable
from ..utils import ext_loader
import copy
ext_module = ext_loader.load_ext('_ext', [
'sigmoid_focal_loss_forward', 'sigmoid_focal_loss_backward',
'softmax_focal_loss_forward', 'softmax_focal_loss_backward'
])
class SigmoidFocalLossFunction(Function):
@staticmethod
def symbolic(g, input, target, gamma, alpha, weight, reduction):
return g.op(
'mmcv::MMCVSigmoidFocalLoss',
input,
target,
gamma_f=gamma,
alpha_f=alpha,
weight_f=weight,
reduction_s=reduction)
@staticmethod
def forward(ctx,
input,
target,
gamma=2.0,
alpha=0.25,
weight=None,
reduction='mean'):
assert isinstance(target, (torch.LongTensor, torch.cuda.LongTensor))
assert input.dim() == 2
assert target.dim() == 1
assert input.size(0) == target.size(0)
if weight is None:
# weight = input.new_empty(0)
weight = torch.empty_like(input).new_empty(0)
else:
assert weight.dim() == 1
assert input.size(1) == weight.size(0)
# ctx.reduction_dict = {'none': 0, 'mean': 1, 'sum': 2}
ctx.reduction_dict=copy.deepcopy({'none': 0, 'mean': 1, 'sum': 2})
assert reduction in ctx.reduction_dict.keys()
ctx.gamma = float(gamma)
ctx.alpha = float(alpha)
ctx.reduction = ctx.reduction_dict[reduction]
# output = input.new_zeros(input.size())
output=torch.zeros(input.shape).cuda().clone()
ext_module.sigmoid_focal_loss_forward(
input, target, weight, output, gamma=ctx.gamma, alpha=ctx.alpha)
if ctx.reduction == ctx.reduction_dict['mean']:
output = output.sum() / input.size(0)
elif ctx.reduction == ctx.reduction_dict['sum']:
output = output.sum()
ctx.save_for_backward(input, target, weight)
return output
@staticmethod
@once_differentiable
def backward(ctx, grad_output):
input, target, weight = ctx.saved_tensors
grad_input = torch.zeros(input.size()).cuda().clone()
ext_module.sigmoid_focal_loss_backward(
input,
target,
weight,
grad_input,
gamma=ctx.gamma,
alpha=ctx.alpha)
grad_input =grad_input* grad_output
if ctx.reduction == ctx.reduction_dict['mean']:
grad_input = grad_input /input.size(0)
return grad_input, None, None, None, None, None
sigmoid_focal_loss = SigmoidFocalLossFunction.apply
class SigmoidFocalLoss(nn.Module):
def __init__(self, gamma, alpha, weight=None, reduction='mean'):
super(SigmoidFocalLoss, self).__init__()
self.gamma = gamma
self.alpha = alpha
self.register_buffer('weight', weight)
self.reduction = reduction
def forward(self, input, target):
return sigmoid_focal_loss(input, target, self.gamma, self.alpha,
self.weight, self.reduction)
def __repr__(self):
s = self.__class__.__name__
s = s+f'(gamma={self.gamma}, '
s = s+f'alpha={self.alpha}, '
s = s+f'reduction={self.reduction})'
return s
class SoftmaxFocalLossFunction(Function):
@staticmethod
def symbolic(g, input, target, gamma, alpha, weight, reduction):
return g.op(
'mmcv::MMCVSoftmaxFocalLoss',
input,
target,
gamma_f=gamma,
alpha_f=alpha,
weight_f=weight,
reduction_s=reduction)
@staticmethod
def forward(ctx,
input,
target,
gamma=2.0,
alpha=0.25,
weight=None,
reduction='mean'):
assert isinstance(target, (torch.LongTensor, torch.cuda.LongTensor))
assert input.dim() == 2
assert target.dim() == 1
assert input.size(0) == target.size(0)
if weight is None:
weight = input.new_empty(0)
else:
assert weight.dim() == 1
assert input.size(1) == weight.size(0)
# ctx.reduction_dict = dict({'none': 0, 'mean': 1, 'sum': 2})
ctx.reduction_dict = copy.deepcopy({'none': 0, 'mean': 1, 'sum': 2})
assert reduction in ctx.reduction_dict.keys()
ctx.gamma = float(gamma)
ctx.alpha = float(alpha)
ctx.reduction = ctx.reduction_dict[reduction]
channel_stats, _ = torch.max(input, dim=1)
input_softmax = input - channel_stats.unsqueeze(1).expand_as(input)
# input_softmax.exp_()
input_softmax = torch.exp(input_softmax)
channel_stats = input_softmax.sum(dim=1)
input_softmax = input_softmax /channel_stats.unsqueeze(1).expand_as(input)
# output = input.new_zeros(input.size(0))
output = torch.zeros(input.size(0)).cuda().clone()
ext_module.softmax_focal_loss_forward(
input_softmax,
target,
weight,
output,
gamma=ctx.gamma,
alpha=ctx.alpha)
if ctx.reduction == ctx.reduction_dict['mean']:
output = output.sum() / input.size(0)
elif ctx.reduction == ctx.reduction_dict['sum']:
output = output.sum()
ctx.save_for_backward(input_softmax, target, weight)
return output
@staticmethod
def backward(ctx, grad_output):
input_softmax, target, weight = ctx.saved_tensors
# buff = input_softmax.new_zeros(input_softmax.size(0))
buff = torch.zeros(input_softmax.size(0)).cuda().clone()
# grad_input = input_softmax.new_zeros(input_softmax.size())
grad_input = torch.zeros(input_softmax.size()).cuda().clone()
ext_module.softmax_focal_loss_backward(
input_softmax,
target,
weight,
buff,
grad_input,
gamma=ctx.gamma,
alpha=ctx.alpha)
grad_input = grad_input *grad_output
if ctx.reduction == ctx.reduction_dict['mean']:
grad_input = grad_input /input_softmax.size(0)
return grad_input, None, None, None, None, None
softmax_focal_loss = SoftmaxFocalLossFunction.apply
class SoftmaxFocalLoss(nn.Module):
def __init__(self, gamma, alpha, weight=None, reduction='mean'):
super(SoftmaxFocalLoss, self).__init__()
self.gamma = gamma
self.alpha = alpha
self.register_buffer('weight', weight)
self.reduction = reduction
def forward(self, input, target):
return softmax_focal_loss(input, target, self.gamma, self.alpha,
self.weight, self.reduction)
def __repr__(self):
s = self.__class__.__name__
s = s +f'(gamma={self.gamma}, '
s = s +f'alpha={self.alpha}, '
s = s +f'reduction={self.reduction})'
return s
I have no idea if there are any other inplace operations