I cannot find other inplace operation

81 views Asked by At

I encountered an issue while reproducing the code for mmdetection, warning me that the _sigmoid_focal_loss function contains an inplace operation. (Actually this problem have little and little influence about mmdetection. it is a probelm about my code have inplace operation but I cannot find it.) here is my version:


    mmcv-1.4.5
    mmdetection-2.19.0
    torch-1.10.1 

And I installed it by MMCV_WITH_OPS=1 pip install -e . So I am very sure it is not caused by version. I put torch.autograd.set_detect_anomaly(True) at the begining of train.py And it said: `

[W python_anomaly_mode.cpp:104] Warning: Error detected in SigmoidFocalLossFunctionBackward. Traceback of forward call that caused the error:
  File "/home/aaa/disk1/CalibratedTeacher-main/tools/train.py", line 199, in <module>
    main()
  File "/home/aaa/disk1/CalibratedTeacher-main/tools/train.py", line 188, in main
    train_detector(
  File "/home/aaa/disk1/CalibratedTeacher-main/src/apis/train.py", line 191, in train_detector
    runner.run(data_loaders, cfg.workflow)
  File "/home/aaa/disk1/mmcv-1.4.5/mmcv/runner/iter_based_runner.py", line 134, in run
    iter_runner(iter_loaders[i], **kwargs)
  File "/home/aaa/disk1/mmcv-1.4.5/mmcv/runner/iter_based_runner.py", line 61, in train
    outputs = self.model.train_step(data_batch, self.optimizer, **kwargs)
  File "/home/aaa/disk1/mmcv-1.4.5/mmcv/parallel/data_parallel.py", line 75, in train_step
    return self.module.train_step(*inputs[0], **kwargs[0])
  File "/home/aaa/disk1/mmdetection-2.19.0/mmdet/models/detectors/base.py", line 248, in train_step
    losses = self(**data)
  File "/home/aaa/anaconda3/envs/calibrate_teacher1/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
    return forward_call(*input, **kwargs)
  File "/home/aaa/disk1/mmcv-1.4.5/mmcv/runner/fp16_utils.py", line 139, in new_func
    output = old_func(*new_args, **new_kwargs)
  File "/home/aaa/disk1/mmdetection-2.19.0/mmdet/models/detectors/base.py", line 172, in forward
    return self.forward_train(img, img_metas, **kwargs)
  File "/home/aaa/disk1/CalibratedTeacher-main/src/models/cali_read_and_cali_full.py", line 185, in forward_train
    losses = self.student.forward_train(**data_groups["strong"])
  File "/home/aaa/disk1/mmdetection-2.19.0/mmdet/models/detectors/single_stage.py", line 83, in forward_train
    losses = self.bbox_head.forward_train(x, img_metas, gt_bboxes,
  File "/home/aaa/disk1/CalibratedTeacher-main/src/models/retinahead_adaptnegweiht2_focaliou.py", line 178, in forward_train
    losses = self.loss(*loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
  File "/home/aaa/disk1/mmcv-1.4.5/mmcv/runner/fp16_utils.py", line 225, in new_func
    output = old_func(*new_args, **new_kwargs)
  File "/home/aaa/disk1/CalibratedTeacher-main/src/models/retinahead_adaptnegweiht2_focaliou.py", line 345, in loss
    losses_cls, losses_bbox = multi_apply(
  File "/home/aaa/disk1/mmdetection-2.19.0/mmdet/core/utils/misc.py", line 30, in multi_apply
    return tuple(map(list, zip(*map_results)))
  File "/home/aaa/disk1/CalibratedTeacher-main/src/models/retinahead_adaptnegweiht2_focaliou.py", line 223, in loss_single
    loss_cls = self.loss_cls(
  File "/home/aaa/anaconda3/envs/calibrate_teacher1/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
    return forward_call(*input, **kwargs)
  File "/home/aaa/disk1/mmdetection-2.19.0/mmdet/models/losses/focal_loss.py", line 178, in forward
    loss_cls = self.loss_weight * calculate_loss_func(
  File "/home/aaa/disk1/mmdetection-2.19.0/mmdet/models/losses/focal_loss.py", line 92, in sigmoid_focal_loss
    loss = _sigmoid_focal_loss(pred.contiguous(), target.contiguous(), gamma,
 (function _print_stack)
Traceback (most recent call last):
  File "/home/aaa/disk1/CalibratedTeacher-main/tools/train.py", line 199, in <module>
    main()
  File "/home/aaa/disk1/CalibratedTeacher-main/tools/train.py", line 188, in main
    train_detector(
  File "/home/aaa/disk1/CalibratedTeacher-main/src/apis/train.py", line 191, in train_detector
    runner.run(data_loaders, cfg.workflow)
  File "/home/aaa/disk1/mmcv-1.4.5/mmcv/runner/iter_based_runner.py", line 134, in run
    iter_runner(iter_loaders[i], **kwargs)
  File "/home/aaa/disk1/mmcv-1.4.5/mmcv/runner/iter_based_runner.py", line 67, in train
    self.call_hook('after_train_iter')
  File "/home/aaa/disk1/mmcv-1.4.5/mmcv/runner/base_runner.py", line 309, in call_hook
    getattr(hook, fn_name)(self)
  File "/home/aaa/disk1/mmcv-1.4.5/mmcv/runner/hooks/optimizer.py", line 272, in after_train_iter
    self.loss_scaler.scale(runner.outputs['loss']).backward()
  File "/home/aaa/anaconda3/envs/calibrate_teacher1/lib/python3.8/site-packages/torch/_tensor.py", line 307, in backward
    torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)
  File "/home/aaa/anaconda3/envs/calibrate_teacher1/lib/python3.8/site-packages/torch/autograd/__init__.py", line 154, in backward
    Variable._execution_engine.run_backward(
  File "/home/aaa/anaconda3/envs/calibrate_teacher1/lib/python3.8/site-packages/torch/autograd/function.py", line 199, in apply
    return user_fn(self, *args)
  File "/home/aaa/anaconda3/envs/calibrate_teacher1/lib/python3.8/site-packages/torch/autograd/function.py", line 340, in wrapper
    outputs = fn(ctx, *args)
  File "/home/aaa/disk1/mmcv-1.4.5/mmcv/ops/focal_loss.py", line 73, in backward
    input, target, weight = ctx.saved_tensors
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.cuda.LongTensor [729]] is at version 5; expected version 4 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in qu

estion was changed in there or anywhere later. Good luck! `

So, I opened up /home/aaa/disk1/mmdetection-2.19.0/mmdet/models/losses/focal_loss.py and I find it have many inplace operation. But after I fix this problem, it still have this problem. Here is the code after I fixed it: `

# Copyright (c) OpenMMLab. All rights reserved.
import torch
import torch.nn as nn
from torch.autograd import Function
from torch.autograd.function import once_differentiable

from ..utils import ext_loader

import copy

ext_module = ext_loader.load_ext('_ext', [
    'sigmoid_focal_loss_forward', 'sigmoid_focal_loss_backward',
    'softmax_focal_loss_forward', 'softmax_focal_loss_backward'
])


class SigmoidFocalLossFunction(Function):

    @staticmethod
    def symbolic(g, input, target, gamma, alpha, weight, reduction):
        return g.op(
            'mmcv::MMCVSigmoidFocalLoss',
            input,
            target,
            gamma_f=gamma,
            alpha_f=alpha,
            weight_f=weight,
            reduction_s=reduction)

    @staticmethod
    def forward(ctx,
                input,
                target,
                gamma=2.0,
                alpha=0.25,
                weight=None,
                reduction='mean'):

        assert isinstance(target, (torch.LongTensor, torch.cuda.LongTensor))
        assert input.dim() == 2
        assert target.dim() == 1
        assert input.size(0) == target.size(0)
        if weight is None:
            # weight = input.new_empty(0)
            weight = torch.empty_like(input).new_empty(0)
        else:
            assert weight.dim() == 1
            assert input.size(1) == weight.size(0)
        # ctx.reduction_dict = {'none': 0, 'mean': 1, 'sum': 2}
        ctx.reduction_dict=copy.deepcopy({'none': 0, 'mean': 1, 'sum': 2})
        assert reduction in ctx.reduction_dict.keys()

        ctx.gamma = float(gamma)
        ctx.alpha = float(alpha)
        ctx.reduction = ctx.reduction_dict[reduction]

        # output = input.new_zeros(input.size())
        output=torch.zeros(input.shape).cuda().clone()

        ext_module.sigmoid_focal_loss_forward(
            input, target, weight, output, gamma=ctx.gamma, alpha=ctx.alpha)
        if ctx.reduction == ctx.reduction_dict['mean']:
            output = output.sum() / input.size(0)
        elif ctx.reduction == ctx.reduction_dict['sum']:
            output = output.sum()
        ctx.save_for_backward(input, target, weight)
        return output

    @staticmethod
    @once_differentiable
    def backward(ctx, grad_output):

        input, target, weight = ctx.saved_tensors

        grad_input = torch.zeros(input.size()).cuda().clone()

        ext_module.sigmoid_focal_loss_backward(
            input,
            target,
            weight,
            grad_input,
            gamma=ctx.gamma,
            alpha=ctx.alpha)

        grad_input =grad_input* grad_output
        if ctx.reduction == ctx.reduction_dict['mean']:
            grad_input = grad_input /input.size(0)
        return grad_input, None, None, None, None, None


sigmoid_focal_loss = SigmoidFocalLossFunction.apply


class SigmoidFocalLoss(nn.Module):

    def __init__(self, gamma, alpha, weight=None, reduction='mean'):
        super(SigmoidFocalLoss, self).__init__()
        self.gamma = gamma
        self.alpha = alpha
        self.register_buffer('weight', weight)
        self.reduction = reduction

    def forward(self, input, target):
        return sigmoid_focal_loss(input, target, self.gamma, self.alpha,
                                  self.weight, self.reduction)

    def __repr__(self):
        s = self.__class__.__name__
        s = s+f'(gamma={self.gamma}, '
        s = s+f'alpha={self.alpha}, '
        s = s+f'reduction={self.reduction})'
        return s


class SoftmaxFocalLossFunction(Function):

    @staticmethod
    def symbolic(g, input, target, gamma, alpha, weight, reduction):
        return g.op(
            'mmcv::MMCVSoftmaxFocalLoss',
            input,
            target,
            gamma_f=gamma,
            alpha_f=alpha,
            weight_f=weight,
            reduction_s=reduction)

    @staticmethod
    def forward(ctx,
                input,
                target,
                gamma=2.0,
                alpha=0.25,
                weight=None,
                reduction='mean'):

        assert isinstance(target, (torch.LongTensor, torch.cuda.LongTensor))
        assert input.dim() == 2
        assert target.dim() == 1
        assert input.size(0) == target.size(0)
        if weight is None:
            weight = input.new_empty(0)
        else:
            assert weight.dim() == 1
            assert input.size(1) == weight.size(0)
        # ctx.reduction_dict = dict({'none': 0, 'mean': 1, 'sum': 2})
        ctx.reduction_dict = copy.deepcopy({'none': 0, 'mean': 1, 'sum': 2})

        assert reduction in ctx.reduction_dict.keys()

        ctx.gamma = float(gamma)
        ctx.alpha = float(alpha)
        ctx.reduction = ctx.reduction_dict[reduction]

        channel_stats, _ = torch.max(input, dim=1)
        input_softmax = input - channel_stats.unsqueeze(1).expand_as(input)
        # input_softmax.exp_()
        input_softmax = torch.exp(input_softmax)



        channel_stats = input_softmax.sum(dim=1)
        input_softmax = input_softmax /channel_stats.unsqueeze(1).expand_as(input)

        # output = input.new_zeros(input.size(0))
        output = torch.zeros(input.size(0)).cuda().clone()
        ext_module.softmax_focal_loss_forward(
            input_softmax,
            target,
            weight,
            output,
            gamma=ctx.gamma,
            alpha=ctx.alpha)

        if ctx.reduction == ctx.reduction_dict['mean']:
            output = output.sum() / input.size(0)
        elif ctx.reduction == ctx.reduction_dict['sum']:
            output = output.sum()
        ctx.save_for_backward(input_softmax, target, weight)
        return output

    @staticmethod
    def backward(ctx, grad_output):
        input_softmax, target, weight = ctx.saved_tensors
        # buff = input_softmax.new_zeros(input_softmax.size(0))
        buff = torch.zeros(input_softmax.size(0)).cuda().clone()

        # grad_input = input_softmax.new_zeros(input_softmax.size())
        grad_input = torch.zeros(input_softmax.size()).cuda().clone()

        ext_module.softmax_focal_loss_backward(
            input_softmax,
            target,
            weight,
            buff,
            grad_input,
            gamma=ctx.gamma,
            alpha=ctx.alpha)

        grad_input = grad_input *grad_output
        if ctx.reduction == ctx.reduction_dict['mean']:
            grad_input = grad_input /input_softmax.size(0)
        return grad_input, None, None, None, None, None


softmax_focal_loss = SoftmaxFocalLossFunction.apply


class SoftmaxFocalLoss(nn.Module):

    def __init__(self, gamma, alpha, weight=None, reduction='mean'):
        super(SoftmaxFocalLoss, self).__init__()
        self.gamma = gamma
        self.alpha = alpha
        self.register_buffer('weight', weight)
        self.reduction = reduction

    def forward(self, input, target):
        return softmax_focal_loss(input, target, self.gamma, self.alpha,
                                  self.weight, self.reduction)

    def __repr__(self):
        s = self.__class__.__name__
        s = s +f'(gamma={self.gamma}, '
        s = s +f'alpha={self.alpha}, '
        s = s +f'reduction={self.reduction})'
        return s
I have no idea if there are any other inplace operations
0

There are 0 answers