Error in exporting detectron2 trained model to onnx or torch script model

254 views Asked by At

I'm trying to export a model trained in detectron2 using export_model.py. My model's configuration and the way I'm building it are as follows:

def setup_cfg():
    cfg = get_cfg()
    cfg_file = model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
    cfg.merge_from_file(cfg_file)
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = 2
    cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
    return cfg

Which then I build:

cfg = setup_cfg()
model_ = build_model(cfg) 
DetectionCheckpointer(model_).resume_or_load("./model/model_final.pth", resume=False)
model_.eval()

Where model_final.pth is my trained weight. Below is the truncated output of this cell which I believe means so far so good:

GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
      (res2): Sequential(
        (0): BottleneckBlock(
          (shortcut): Conv2d(
            64, 256, kernel_size=(1`

Then I used two following functions from export_model.py with some minor modifications only to run on notebook.

def export_scripting(torch_model, format, output):
    assert TORCH_VERSION >= (1, 8)
    fields = {
        "proposal_boxes": Boxes,
        "objectness_logits": Tensor,
        "pred_boxes": Boxes,
        "scores": Tensor,
        "pred_classes": Tensor,
        "pred_masks": Tensor,
        "pred_keypoints": torch.Tensor,
        "pred_keypoint_heatmaps": torch.Tensor,
    }
    assert format == "torchscript", "Scripting only supports torchscript format."

    class ScriptableAdapterBase(nn.Module):
        # Use this adapter to workaround https://github.com/pytorch/pytorch/issues/46944
        # by not retuning instances but dicts. Otherwise the exported model is not deployable
        def __init__(self):
            super().__init__()
            self.model = torch_model
            self.eval()

    if isinstance(torch_model, GeneralizedRCNN):

        class ScriptableAdapter(ScriptableAdapterBase):
            def forward(self, inputs: Tuple[Dict[str, torch.Tensor]]) -> List[Dict[str, Tensor]]:
                instances = self.model.inference(inputs, do_postprocess=False)
                return [i.get_fields() for i in instances]

    else:

        class ScriptableAdapter(ScriptableAdapterBase):
            def forward(self, inputs: Tuple[Dict[str, torch.Tensor]]) -> List[Dict[str, Tensor]]:
                instances = self.model(inputs)
                return [i.get_fields() for i in instances]

    ts_model = scripting_with_instances(ScriptableAdapter(), fields)
    with PathManager.open(os.path.join(output, "model.ts"), "wb") as f:
       torch.jit.save(ts_model, f)
    dump_torchscript_IR(ts_model, output)
    # TODO inference in Python now missing postprocessing glue code
    return None
def export_tracing(torch_model, inputs, output, format):
    logger = setup_logger()
    assert TORCH_VERSION >= (1, 8)
    image = inputs[0]["image"]
    inputs = [{"image": image}]  # remove other unused keys

    if isinstance(torch_model, GeneralizedRCNN):

        def inference(model, inputs):
            # use do_postprocess=False so it returns ROI mask
            inst = model.inference(inputs, do_postprocess=False)[0]
            return [{"instances": inst}]

    else:
        inference = None  # assume that we just call the model directly

    traceable_model = TracingAdapter(torch_model, inputs, inference)

    if format == "torchscript":
        ts_model = torch.jit.trace(traceable_model, (image,))
        with PathManager.open(os.path.join(output, "model.ts"), "wb") as f:
            torch.jit.save(ts_model, f)
        dump_torchscript_IR(ts_model, output)
    elif format == "onnx":
        with PathManager.open(os.path.join(output, "model.onnx"), "wb") as f:
            torch.onnx.export(traceable_model, (image,), f, opset_version=STABLE_ONNX_OPSET_VERSION)
    logger.info("Inputs schema: " + str(traceable_model.inputs_schema))
    logger.info("Outputs schema: " + str(traceable_model.outputs_schema))

    if format != "torchscript":
        return None
    if not isinstance(torch_model, (GeneralizedRCNN, RetinaNet)):
        return None

    def eval_wrapper(inputs):
        """
        The exported model does not contain the final resize step, which is typically
        unused in deployment but needed for evaluation. We add it manually here.
        """
        input = inputs[0]
        instances = traceable_model.outputs_schema(ts_model(input["image"]))[0]["instances"]
        postprocessed = detector_postprocess(instances, input["height"], input["width"])
        return [{"instances": postprocessed}]

    return eval_wrapper

I then used export_tracing(model_, inputs, "./", "onnx") for the second function. inputs is the output of inputs = get_sample_inputs("./sample_img.jpg"). The output of this cell is given below:

RuntimeError: 
object has no attribute nms:
  File "/home/ora-rnd/.local/lib/python3.10/site-packages/torchvision/ops/boxes.py", line 41
        _log_api_usage_once(nms)
    _assert_has_ops()
    return torch.ops.torchvision.nms(boxes, scores, iou_threshold)
           ~~~~~~~~~~~~~~~~~~~~~~~~~ <--- HERE
'nms' is being compiled since it was called from '_batched_nms_coordinate_trick'
  File "/home/ora-rnd/.local/lib/python3.10/site-packages/torchvision/ops/boxes.py", line 94
    offsets = idxs.to(boxes) * (max_coordinate + torch.tensor(1).to(boxes))
    boxes_for_nms = boxes + offsets[:, None]
    keep = nms(boxes_for_nms, scores, iou_threshold)
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ <--- HERE
    return keep

Also before this Runtime Error I'm getting warnings like below:

TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
  assert t.shape[:-2] == tensors[0].shape[:-2], t.shape
python3.10/site-packages/detectron2/structures/boxes.py:151): TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
  if tensor.numel() == 0:

For first function I used export_scripting(model_, "torchscript", "./") and the truncated Runtime Error is as follows:

RuntimeError: 
object has no attribute nms:
  File "/home/ora-rnd/.local/lib/python3.10/site-packages/torchvision/ops/boxes.py", line 41
        _log_api_usage_once(nms)
    _assert_has_ops()
    return torch.ops.torchvision.nms(boxes, scores, iou_threshold)
           ~~~~~~~~~~~~~~~~~~~~~~~~~ <--- HERE
'nms' is being compiled since it was called from '_batched_nms_vanilla'
  File "/home/ora-rnd/.local/lib/python3.10/site-packages/torchvision/ops/boxes.py", line 109
    for class_id in torch.unique(idxs):
        curr_indices = torch.where(idxs == class_id)[0]
        curr_keep_indices = nms(boxes[curr_indices], scores[curr_indices], iou_threshold)
        ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ <--- HERE
        keep_mask[curr_indices[curr_keep_indices]] = True
    keep_indices = torch.where(keep_mask)[0]
'_batched_nms_vanilla' is being compiled since it was called from 'batched_nms'
  File "/home/ora-rnd/.local/lib/python3.10/site-packages/torchvision/ops/boxes.py", line 73
    # https://github.com/pytorch/vision/issues/1311#issuecomment-781329339
    if boxes.numel() > (4000 if boxes.device.type == "cpu" else 20000) and not torchvision._is_tracing():
        return _batched_nms_vanilla(boxes, scores, idxs, iou_threshold)
               ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ <--- HERE
    else:
        return _batched_nms_coordinate_trick(boxes, scores, idxs, iou_threshold)
'batched_nms' is being compiled since it was called from 'batched_nms'
  File "/home/ora-rnd/.local/lib/python3.10/site-packages/detectron2/layers/nms.py", line 20
    # just call it directly.
    # Fp16 does not have enough range for batched NMS, so adding float().
    return box_ops.batched_nms(boxes.float(), scores, idxs, iou_threshold)
           ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ <--- HERE
'batched_nms' is being compiled since it was called from 'find_top_rpn_proposals'
  File "/home/ora-rnd/.local/lib/python3.10/site-packages/detectron2/modeling/proposal_generator/proposal_utils.py", line 121
            boxes, scores_per_img, lvl = boxes[keep], scores_per_img[keep], lvl[keep]

        keep = batched_nms(boxes.tensor, scores_per_img, lvl, nms_thresh)
        ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ <--- HERE
        # In Detectron1, there was different behavior during training vs. testing.
        # (https://github.com/facebookresearch/Detectron/issues/459)
'find_top_rpn_proposals' is being compiled since it was called from 'RPN.predict_proposals'
  File "/home/ora-rnd/.local/lib/python3.10/site-packages/detectron2/modeling/proposal_generator/rpn.py", line 503
        with torch.no_grad():
            pred_proposals = self._decode_proposals(anchors, pred_anchor_deltas)
            return find_top_rpn_proposals(
                   ~~~~~~~~~~~~~~~~~~~~~~~
                pred_proposals,
                ~~~~~~~~~~~~~~~
                pred_objectness_logits,
                ~~~~~~~~~~~~~~~~~~~~~~~
                image_sizes,
                ~~~~~~~~~~~~
                self.nms_thresh,
                ~~~~~~~~~~~~~~~~
                self.pre_nms_topk[self.training],
                ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                self.post_nms_topk[self.training],
                ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                self.min_box_size,
                ~~~~~~~~~~~~~~~~~~
                self.training,
                ~~~~~~~~~~~~~ <--- HERE
            )
'RPN.predict_proposals' is being compiled since it was called from 'RPN.forward'
  File "/home/ora-rnd/.local/lib/python3.10/site-packages/detectron2/modeling/proposal_generator/rpn.py", line 477
        else:
            losses = {}
        proposals = self.predict_proposals(
        ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            anchors, pred_objectness_logits, pred_anchor_deltas, images.image_sizes
            ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ <--- HERE
        )
        return proposals, losses

Could you please help me to find what I'm doing wrong and in general what is the proper way to convert a detectron2 trained model to vanilla pytorch model or export it to onnx? Information of my working environment:

sys.platform                     linux
Python                           3.10.12 (main, Jun 11 2023, 05:26:28) [GCC 11.4.0]
numpy                            1.24.1
detectron2                       0.6 @/home/xxx/.local/lib/python3.10/site-packages/detectron2
Compiler                         GCC 11.4
CUDA compiler                    not available
DETECTRON2_ENV_MODULE            <not set>
PyTorch                          2.1.0+cu121 @/home/xxx/.local/lib/python3.10/site-packages/torch
PyTorch debug build              False
torch._C._GLIBCXX_USE_CXX11_ABI  False
GPU available                    Yes
GPU 0                            NVIDIA GeForce RTX 3050 Laptop GPU (arch=8.6)
Driver version                   535.113.01
CUDA_HOME                        None - invalid!
Pillow                           9.0.1
torchvision                      0.16.0+cu118 @/home/xxx/.local/lib/python3.10/site-packages/torchvision
torchvision arch flags           /home/xxx/.local/lib/python3.10/site-packages/torchvision/_C.so
fvcore                           0.1.5.post20221221
iopath                           0.1.9
cv2                              4.8.1
-------------------------------  ---------------------------------------------------------------------------
PyTorch built with:
  - GCC 9.3
  - C++ Version: 201703
  - Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications
  - Intel(R) MKL-DNN v3.1.1 
  - OpenMP 201511 (a.k.a. OpenMP 4.5)
  - LAPACK is enabled (usually provided by MKL)
  - NNPACK is enabled
  - CPU capability usage: AVX2
  - CUDA Runtime 12.1
  - NVCC architecture flags: -gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_90,code=sm_90
  - CuDNN 8.9.2
  - Magma 2.6.1
  - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=12.1, CUDNN_VERSION=8.9.2, CXX_COMPILER=/opt/rh/devtoolset-9/root/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_DISABLE_GPU_ASSERTS=ON, TORCH_VERSION=2.1.0, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, 
0

There are 0 answers