Model exported from python gives different results when loaded in C++

985 views Asked by At

Problem description

So I have model class written in python using PyTorch:

class ResBlock(nn.Module, ABC):
    def __init__(self, in_channels: int, filters: int, conv_num: int):
        super(ResBlock, self).__init__()
        self.filters = filters
        self.conv_num = conv_num

        self.input_conv = torch.nn.Conv1d(in_channels=in_channels, out_channels=filters, kernel_size=1)
        self.inner_conv = torch.nn.Conv1d(in_channels=in_channels, out_channels=filters, kernel_size=3, padding=1)
        self.outer_conv = torch.nn.Conv1d(in_channels=filters, out_channels=filters, kernel_size=3, padding=1)
        self.max_pool = torch.nn.MaxPool1d(kernel_size=2, stride=2)

    def forward(self, x):
        y = x

        for i in range(self.conv_num - 1):
            if i == 0:
                y = self.inner_conv(y)
            else:
                y = self.outer_conv(y)

            y = torch.relu(y)

        y = self.outer_conv(y)

        s = self.input_conv(x)
        y = s + y
        y = torch.relu(y)

        return self.max_pool(y)


class Net(nn.Module, ABC):
    def __init__(self, num_of_classes: int):
        super(Net, self).__init__()
        self.block_1 = ResBlock(1, 16, 2)
        self.block_2 = ResBlock(16, 32, 2)
        self.block_3 = ResBlock(32, 64, 3)
        self.block_4 = ResBlock(64, 128, 3)
        self.block_5 = ResBlock(128, 128, 3)

        self.avg_pool = torch.nn.AvgPool1d(kernel_size=3, stride=3)
        self.flatten = torch.nn.Flatten()
        self.dense_1 = torch.nn.Linear(
            in_features=self.block_5.filters * (249 // self.avg_pool.kernel_size[0]),
            out_features=256
        )
        self.dense_2 = torch.nn.Linear(in_features=256, out_features=128)
        self.classifier = torch.nn.Linear(in_features=128, out_features=num_of_classes)

    def forward(self, x):
        x = self.block_1(x)
        x = self.block_2(x)
        x = self.block_3(x)
        x = self.block_4(x)
        x = self.block_5(x)

        x = self.avg_pool(x)
        x = self.flatten(x)

        x = self.dense_1(x)
        x = self.dense_2(x)
        x = self.classifier(x)

        return x

It works perfectly fine when using in the python environment. I have trained it, acquired ~65% of accuracy on the test set and wanted to export it using TorchScript, then import in the C++ app.

Here is the code used for exporting:

# Training code skipped for simplification...
# Here the model is actually trained, weights are updated and so on

jit_model = torch.jit.script(model)
jit_model.save('torchscript-model.pt')

Before importing it to the C++ I've checked if the model exported correctly by importing it to the python script using torch.jit.load and checked it against my test dataset again to get the same ~65% of accuracy which was expected.

So the next logical step was to import the model into the C++ program, and evaluate the loaded model using the same data. Here is the code:

#include <torch/torch.h>
#include <torch/script.h>
#include "constants.hh"

int main() {
    torch::manual_seed(1);
    torch::Device device(torch::kCPU);

    auto model = torch::jit::load("./torchscript-model.pt");
    model.to(device);

    auto test_raw_dataset = CsvDataset(constants::kTestCsv);
    auto test_dataset = test_raw_dataset.map(torch::data::transforms::Stack<>());
    auto test_data_loader = torch::data::make_data_loader<torch::data::samplers::SequentialSampler>(std::move(test_dataset), torch::data::DataLoaderOptions(constants::kBatchSize));

    size_t correct_count = 0;
    for (const auto& batch : *test_data_loader) {
        auto inputs = batch.data.to(device);
        auto labels = batch.target.to(device);

        inputs = inputs.unsqueeze(1);
        labels = labels.squeeze(1);

        auto outputs = model.forward(inputs).toTensor();

        auto prediction = outputs.argmax(1);
        correct_count += prediction.eq(labels).sum().item<int64_t>();
    }

    auto accuracy = correct_count / test_dataset.size().value();
    // Rest of code removed for clarity...
}

Actual question

But the thing that happens is the accuracy calculated in the C++ is equal to ~12%. Why?

What have I tried/found so far:

  1. All of the predictions when model is loaded into C++ are the same (equal to 6). When model is loaded in python everything is good.
  2. All of the calculations after acquiring outputs are the same in Python and C++, so problem is not in the way I'm calculating accuracy.
  3. Using torch::manual_seed does nothing. (It should not change anything, but why not to try...)
  4. torch::NoGradGuard also does nothing.
  5. I have also ensured that the test data gets into the model in the same order in Python and in C++.
  6. Model is running in the eval mode in both cases

Update 1

Checked loaded model against static input using torch.ones and python and c++ still yield different results.

Update 2 Python code as @gspr requested:

dataset = CsvDataset(os.path.join(constants.CSV_DIR, 'train.csv'))
sampler = torch.utils.data.sampler.RandomSampler(dataset)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=constants.BATCH_SIZE, sampler=sampler)
train_test_size = len(dataset)
batch_count = math.ceil(train_test_size / constants.BATCH_SIZE)

test_dataset = CsvDataset(os.path.join(constants.CSV_DIR, 'test.csv'))
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=constants.BATCH_SIZE)
test_set_size = len(test_dataset)

model = Net(dataset.num_classes())

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=constants.LEARNING_RATE)

for epoch in range(constants.EPOCHS):
    running_loss = 0.0
    correct_count = 0

    for i, data in enumerate(dataloader, 1):
        inputs, labels = data
        inputs = inputs.unsqueeze(1)
        labels = labels.squeeze(1)
 
        outputs = model.forward(inputs)

        loss = criterion(outputs, labels)
        running_loss += loss.item()
        prediction = outputs.argmax(1)
        correct_count += sum(prediction.eq(labels)).item()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()


test_loss = 0.0
test_correct = 0.0

with torch.no_grad():
    for i, data in enumerate(test_dataloader, 1):
        inputs, labels = data
        inputs = inputs.unsqueeze(1)
        labels = labels.squeeze(1)

        outputs = model.forward(inputs)

        loss = criterion(outputs, labels)
        test_loss += loss.item()
        prediction = outputs.argmax(1)
        test_correct += sum(prediction.eq(labels)).item()

    test_sample_loss = test_loss / test_set_size
    test_accuracy = test_correct / test_set_size


script_module = torch.jit.script(model)
torch.jit.save(script_module, 'traced_model.torch')
0

There are 0 answers