Problem description
So I have model class written in python using PyTorch:
class ResBlock(nn.Module, ABC):
def __init__(self, in_channels: int, filters: int, conv_num: int):
super(ResBlock, self).__init__()
self.filters = filters
self.conv_num = conv_num
self.input_conv = torch.nn.Conv1d(in_channels=in_channels, out_channels=filters, kernel_size=1)
self.inner_conv = torch.nn.Conv1d(in_channels=in_channels, out_channels=filters, kernel_size=3, padding=1)
self.outer_conv = torch.nn.Conv1d(in_channels=filters, out_channels=filters, kernel_size=3, padding=1)
self.max_pool = torch.nn.MaxPool1d(kernel_size=2, stride=2)
def forward(self, x):
y = x
for i in range(self.conv_num - 1):
if i == 0:
y = self.inner_conv(y)
else:
y = self.outer_conv(y)
y = torch.relu(y)
y = self.outer_conv(y)
s = self.input_conv(x)
y = s + y
y = torch.relu(y)
return self.max_pool(y)
class Net(nn.Module, ABC):
def __init__(self, num_of_classes: int):
super(Net, self).__init__()
self.block_1 = ResBlock(1, 16, 2)
self.block_2 = ResBlock(16, 32, 2)
self.block_3 = ResBlock(32, 64, 3)
self.block_4 = ResBlock(64, 128, 3)
self.block_5 = ResBlock(128, 128, 3)
self.avg_pool = torch.nn.AvgPool1d(kernel_size=3, stride=3)
self.flatten = torch.nn.Flatten()
self.dense_1 = torch.nn.Linear(
in_features=self.block_5.filters * (249 // self.avg_pool.kernel_size[0]),
out_features=256
)
self.dense_2 = torch.nn.Linear(in_features=256, out_features=128)
self.classifier = torch.nn.Linear(in_features=128, out_features=num_of_classes)
def forward(self, x):
x = self.block_1(x)
x = self.block_2(x)
x = self.block_3(x)
x = self.block_4(x)
x = self.block_5(x)
x = self.avg_pool(x)
x = self.flatten(x)
x = self.dense_1(x)
x = self.dense_2(x)
x = self.classifier(x)
return x
It works perfectly fine when using in the python environment. I have trained it, acquired ~65% of accuracy on the test set and wanted to export it using TorchScript, then import in the C++ app.
Here is the code used for exporting:
# Training code skipped for simplification...
# Here the model is actually trained, weights are updated and so on
jit_model = torch.jit.script(model)
jit_model.save('torchscript-model.pt')
Before importing it to the C++ I've checked if the model exported correctly by importing it to the python script using torch.jit.load
and checked it against my test dataset again to get the same ~65% of accuracy which was expected.
So the next logical step was to import the model into the C++ program, and evaluate the loaded model using the same data. Here is the code:
#include <torch/torch.h>
#include <torch/script.h>
#include "constants.hh"
int main() {
torch::manual_seed(1);
torch::Device device(torch::kCPU);
auto model = torch::jit::load("./torchscript-model.pt");
model.to(device);
auto test_raw_dataset = CsvDataset(constants::kTestCsv);
auto test_dataset = test_raw_dataset.map(torch::data::transforms::Stack<>());
auto test_data_loader = torch::data::make_data_loader<torch::data::samplers::SequentialSampler>(std::move(test_dataset), torch::data::DataLoaderOptions(constants::kBatchSize));
size_t correct_count = 0;
for (const auto& batch : *test_data_loader) {
auto inputs = batch.data.to(device);
auto labels = batch.target.to(device);
inputs = inputs.unsqueeze(1);
labels = labels.squeeze(1);
auto outputs = model.forward(inputs).toTensor();
auto prediction = outputs.argmax(1);
correct_count += prediction.eq(labels).sum().item<int64_t>();
}
auto accuracy = correct_count / test_dataset.size().value();
// Rest of code removed for clarity...
}
Actual question
But the thing that happens is the accuracy calculated in the C++ is equal to ~12%. Why?
What have I tried/found so far:
- All of the predictions when model is loaded into C++ are the same (equal to
6
). When model is loaded in python everything is good. - All of the calculations after acquiring
outputs
are the same in Python and C++, so problem is not in the way I'm calculating accuracy. - Using
torch::manual_seed
does nothing. (It should not change anything, but why not to try...) torch::NoGradGuard
also does nothing.- I have also ensured that the test data gets into the model in the same order in Python and in C++.
- Model is running in the eval mode in both cases
Update 1
Checked loaded model against static input using torch.ones
and python and c++ still yield different results.
Update 2 Python code as @gspr requested:
dataset = CsvDataset(os.path.join(constants.CSV_DIR, 'train.csv'))
sampler = torch.utils.data.sampler.RandomSampler(dataset)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=constants.BATCH_SIZE, sampler=sampler)
train_test_size = len(dataset)
batch_count = math.ceil(train_test_size / constants.BATCH_SIZE)
test_dataset = CsvDataset(os.path.join(constants.CSV_DIR, 'test.csv'))
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=constants.BATCH_SIZE)
test_set_size = len(test_dataset)
model = Net(dataset.num_classes())
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=constants.LEARNING_RATE)
for epoch in range(constants.EPOCHS):
running_loss = 0.0
correct_count = 0
for i, data in enumerate(dataloader, 1):
inputs, labels = data
inputs = inputs.unsqueeze(1)
labels = labels.squeeze(1)
outputs = model.forward(inputs)
loss = criterion(outputs, labels)
running_loss += loss.item()
prediction = outputs.argmax(1)
correct_count += sum(prediction.eq(labels)).item()
optimizer.zero_grad()
loss.backward()
optimizer.step()
test_loss = 0.0
test_correct = 0.0
with torch.no_grad():
for i, data in enumerate(test_dataloader, 1):
inputs, labels = data
inputs = inputs.unsqueeze(1)
labels = labels.squeeze(1)
outputs = model.forward(inputs)
loss = criterion(outputs, labels)
test_loss += loss.item()
prediction = outputs.argmax(1)
test_correct += sum(prediction.eq(labels)).item()
test_sample_loss = test_loss / test_set_size
test_accuracy = test_correct / test_set_size
script_module = torch.jit.script(model)
torch.jit.save(script_module, 'traced_model.torch')