pytorch is converted to onnx for inference verification, loading onnx inference results are much different than directly loading pth inference results, initial judgment onnx has a problem, but not sure what the specific problem, who can help answer it, thank you!! The model structure is as follows:
import os
import sys
import copy
import math
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import time
from torch.autograd import Variable
def knn(x, k):
inner = -2 * torch.matmul(x.transpose(2, 1), x)
xx = torch.sum(x ** 2, dim=1, keepdim=True)
pairwise_distance = -xx - inner - xx.transpose(2, 1)
idx = pairwise_distance.topk(k=k+1, dim=-1)[1][:,:,1:] # (batch_size, num_points, k)
return idx
def index_points(points, idx):
"""
Input:
points: input points data, [B, N, C]
idx: sample index data, [B, S]
Return:
new_points:, indexed points data, [B, S, C]
"""
device = points.device
B = points.shape[0]
view_shape = list(idx.shape)
view_shape[1:] = [1] * (len(view_shape) - 1)
repeat_shape = list(idx.shape)
repeat_shape[0] = 1
batch_indices = torch.arange(B, dtype=torch.long).to(device).view(view_shape).repeat(repeat_shape)
new_points = points[batch_indices, idx, :]
return new_points
class STNkd(nn.Module):
def __init__(self, k=64):
super(STNkd, self).__init__()
self.conv1 = torch.nn.Conv1d(k, 64, 1)
self.conv2 = torch.nn.Conv1d(64, 128, 1)
self.conv3 = torch.nn.Conv1d(128, 1024, 1)
self.fc1 = nn.Linear(1024, 512)
self.fc2 = nn.Linear(512, 256)
self.fc3 = nn.Linear(256, k * k)
self.relu = nn.ReLU()
self.bn1 = nn.BatchNorm1d(64)
self.bn2 = nn.BatchNorm1d(128)
self.bn3 = nn.BatchNorm1d(1024)
self.k = k
def forward(self, x):
batchsize = x.size()[0]
x = F.relu(self.bn1(self.conv1(x)))
x = F.relu(self.bn2(self.conv2(x)))
x = F.relu(self.bn3(self.conv3(x)))
x = torch.max(x, 2, keepdim=True)[0]
x = x.view(-1, 1024)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
iden = Variable(torch.from_numpy(np.eye(self.k).flatten().astype(np.float32))).view(1, self.k * self.k).repeat(
batchsize, 1)
if x.is_cuda:
iden = iden.cuda()
x = x + iden
x = x.view(-1, self.k, self.k)
return x
def get_graph_feature(coor, nor, k=10):
batch_size, num_dims, num_points = coor.shape
coor = coor.view(batch_size, -1, num_points)
idx = knn(coor, k=k)
index = idx
device = torch.device('cuda')
idx_base = torch.arange(0, batch_size, device=device).view(-1, 1, 1) * num_points
idx = idx + idx_base
idx = idx.view(-1)
_, num_dims, _ = coor.size()
_, num_dims2, _ = nor.size()
coor = coor.transpose(2,1).contiguous()
nor = nor.transpose(2,1).contiguous()
# coordinate
coor_feature = coor.view(batch_size * num_points, -1)[idx, :]
coor_feature = coor_feature.view(batch_size, num_points, k, num_dims)
coor = coor.view(batch_size, num_points, 1, num_dims).repeat(1, 1, k, 1)
coor_feature = torch.cat((coor_feature, coor), dim=3).permute(0, 3, 1, 2).contiguous()
# normal vector
nor_feature = nor.view(batch_size * num_points, -1)[idx, :]
nor_feature = nor_feature.view(batch_size, num_points, k, num_dims2)
nor = nor.view(batch_size, num_points, 1, num_dims2).repeat(1, 1, k, 1)
nor_feature = torch.cat((nor_feature, nor), dim=3).permute(0, 3, 1, 2).contiguous()
return coor_feature, nor_feature, index
class GraphAttention(nn.Module):
def __init__(self,feature_dim,out_dim, K):
super(GraphAttention, self).__init__()
self.dropout = 0.6
self.conv = nn.Sequential(nn.Conv2d(feature_dim * 2, out_dim, kernel_size=1, bias=False),
nn.BatchNorm2d(out_dim),
nn.LeakyReLU(negative_slope=0.2))
self.K=K
def forward(self, Graph_index, x, feature):
B, C, N = x.shape
x = x.contiguous().view(B, N, C)
feature = feature.permute(0,2,3,1)
neighbor_feature = index_points(x, Graph_index)
centre = x.view(B, N, 1, C).expand(B, N, self.K, C)
delta_f = torch.cat([centre-neighbor_feature, neighbor_feature], dim=3).permute(0,3,2,1)
e = self.conv(delta_f)
e = e.permute(0,3,2,1)
attention = F.softmax(e, dim=2) # [B, npoint, nsample,D]
graph_feature = torch.sum(torch.mul(attention, feature),dim = 2) .permute(0,2,1)
return graph_feature
class TSGCNet(nn.Module):
def __init__(self, k=32, in_channels=12, output_channels=17):
super(TSGCNet, self).__init__()
self.k = k
self.output_channels = output_channels
''' coordinate stream '''
self.bn1_c = nn.BatchNorm2d(64)
self.bn2_c = nn.BatchNorm2d(128)
self.bn3_c = nn.BatchNorm2d(256)
self.bn4_c = nn.BatchNorm1d(512)
self.conv1_c = nn.Sequential(nn.Conv2d(in_channels*2, 64, kernel_size=1, bias=False),
self.bn1_c,
nn.LeakyReLU(negative_slope=0.2))
self.conv2_c = nn.Sequential(nn.Conv2d(64*2, 128, kernel_size=1, bias=False),
self.bn2_c,
nn.LeakyReLU(negative_slope=0.2))
self.conv3_c = nn.Sequential(nn.Conv2d(128*2, 256, kernel_size=1, bias=False),
self.bn3_c,
nn.LeakyReLU(negative_slope=0.2))
self.conv4_c = nn.Sequential(nn.Conv1d(448, 512, kernel_size=1, bias=False),
self.bn4_c,
nn.LeakyReLU(negative_slope=0.2))
self.attention_layer1_c = GraphAttention(feature_dim=12, out_dim=64, K=self.k)
self.attention_layer2_c = GraphAttention(feature_dim=64, out_dim=128, K=self.k)
self.attention_layer3_c = GraphAttention(feature_dim=128, out_dim=256, K=self.k)
self.FTM_c1 = STNkd(k=12)
''' normal stream '''
self.bn1_n = nn.BatchNorm2d(64)
self.bn2_n = nn.BatchNorm2d(128)
self.bn3_n = nn.BatchNorm2d(256)
self.bn4_n = nn.BatchNorm1d(512)
self.conv1_n = nn.Sequential(nn.Conv2d((in_channels)*2, 64, kernel_size=1, bias=False),
self.bn1_n,
nn.LeakyReLU(negative_slope=0.2))
self.conv2_n = nn.Sequential(nn.Conv2d(64*2, 128, kernel_size=1, bias=False),
self.bn2_n,
nn.LeakyReLU(negative_slope=0.2))
self.conv3_n = nn.Sequential(nn.Conv2d(128*2, 256, kernel_size=1, bias=False),
self.bn3_n,
nn.LeakyReLU(negative_slope=0.2))
self.conv4_n = nn.Sequential(nn.Conv1d(448, 512, kernel_size=1, bias=False),
self.bn4_n,
nn.LeakyReLU(negative_slope=0.2))
self.FTM_n1 = STNkd(k=12)
'''feature-wise attention'''
self.fa = nn.Sequential(nn.Conv1d(1024, 1024, kernel_size=1, bias=False),
nn.BatchNorm1d(1024),
nn.LeakyReLU(0.2))
''' feature fusion '''
self.pred1 = nn.Sequential(nn.Conv1d(1024, 512, kernel_size=1, bias=False),
nn.BatchNorm1d(512),
nn.LeakyReLU(negative_slope=0.2))
self.pred2 = nn.Sequential(nn.Conv1d(512, 256, kernel_size=1, bias=False),
nn.BatchNorm1d(256),
nn.LeakyReLU(negative_slope=0.2))
self.pred3 = nn.Sequential(nn.Conv1d(256, 128, kernel_size=1, bias=False),
nn.BatchNorm1d(128),
nn.LeakyReLU(negative_slope=0.2))
self.pred4 = nn.Sequential(nn.Conv1d(128, output_channels, kernel_size=1, bias=False))
self.dp1 = nn.Dropout(p=0.6)
self.dp2 = nn.Dropout(p=0.6)
self.dp3 = nn.Dropout(p=0.6)
def forward(self, x):
batchsize = x.size()[0]
n_pts = x.size()[2]
coor = x[:, :12, :]
nor = x[:, 12:, :]
# transform
trans_c = self.FTM_c1(coor)
coor = coor.transpose(2, 1)
coor = torch.bmm(coor, trans_c)
coor = coor.transpose(2, 1)
trans_n = self.FTM_n1(nor)
nor = nor.transpose(2, 1)
nor = torch.bmm(nor, trans_n)
nor = nor.transpose(2, 1)
coor1, nor1, index = get_graph_feature(coor, nor, k=self.k)
coor1 = self.conv1_c(coor1)
nor1 = self.conv1_n(nor1)
coor1 = self.attention_layer1_c(index, coor, coor1)
nor1 = nor1.max(dim=-1, keepdim=False)[0]
coor2, nor2, index = get_graph_feature(coor1, nor1, k=self.k)
coor2 = self.conv2_c(coor2)
nor2 = self.conv2_n(nor2)
coor2 = self.attention_layer2_c(index, coor1, coor2)
nor2 = nor2.max(dim=-1, keepdim=False)[0]
coor3, nor3, index = get_graph_feature(coor2, nor2, k=self.k)
coor3 = self.conv3_c(coor3)
nor3 = self.conv3_n(nor3)
coor3 = self.attention_layer3_c(index, coor2, coor3)
nor3 = nor3.max(dim=-1, keepdim=False)[0]
coor = torch.cat((coor1, coor2, coor3), dim=1)
coor = self.conv4_c(coor)
nor = torch.cat((nor1, nor2, nor3), dim=1)
nor = self.conv4_n(nor)
avgSum_coor = coor.sum(1)/512
avgSum_nor = nor.sum(1)/512
avgSum = avgSum_coor+avgSum_nor
weight_coor = (avgSum_coor / avgSum).reshape(1, 1, x.size(2))
weight_nor = (avgSum_nor / avgSum).reshape(1, 1, x.size(2))
x = torch.cat((coor*weight_coor, nor*weight_nor), dim=1)
weight = self.fa(x)
x = weight*x
x = self.pred1(x)
self.dp1(x)
x = self.pred2(x)
self.dp2(x)
x = self.pred3(x)
self.dp3(x)
score = self.pred4(x)
score = score.transpose(2, 1).contiguous()
score = torch.nn.Softmax(dim=-1)(score.view(-1, self.output_channels))
score = score.view(batchsize, n_pts, self.output_channels)
return score
The pytorch conversion onnx process is as follows:
import torch
from TSGCNet import TSGCNet
# 加载模型
model = TSGCNet()
checkpoint = torch.load(r'weights\model_100.pth')
# 加载模型参数,如果模型是通过 DataParallel 进行保存的,则需要手动处理参数名
if "module" in list(checkpoint['model_state_dict'].keys())[0]:
new_state_dict = {k.replace('module.', ''): v for k, v in checkpoint['model_state_dict'].items()}
model.load_state_dict(new_state_dict)
else:
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()
# 将模型和相关张量移动到 GPU 上
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
# 定义示例输入数据并移动到 GPU 上
input_shape = (1, 24, 10000) # 注意这里只是示例数据的形状
dummy_input = torch.randn(*input_shape, device=device)
# 将输入形状设置为可变长度
input_names = ["input"]
dynamic_axes = {"input": {0: "batch_size", 2: "sequence_dim"}}
dummy_input_shape = [(input_names, input_shape)]
model_path = r'weights\model_100.onnx'
# 将模型导出为ONNX格式
torch.onnx.export(model, dummy_input, model_path, input_names=input_names,
dynamic_axes=dynamic_axes, verbose=True, opset_version=11)
I hope to find out the specific reasons for this difference. I don't know if there is a problem with my conversion method. I hope that someone with ability can give a good solution and get a correct onnx model