I'm trying to train data (velocity spectrum panel; sgy file) to coordinates (velocity structure profile; csv file) through a simple CNN by PyTorch. In order to test whether the training model is working or not, I tested with 10 data with its labels.
But it yields a weird result of:
Epoch 1
loss: nan [ 1/ 10]
Test Error: Accuracy: 0.0%, Avg loss: nan
Epoch 2
loss: nan [ 1/ 10] Test Error: Accuracy: 0.0%, Avg loss: nan
...
Epoch 10
loss: nan [ 1/ 10] Test Error: Accuracy: 0.0%, Avg loss: nan
Done!
And if I change "batch_size=1" to "batch_size=3", it will show the following error:
Epoch 1
loss: nan [ 3/ 10]
-
RuntimeError Traceback (most recent call last) <ipython-input-9-3c78b7430da0> in <cell line: 144>() 145 print(f"Epoch {t+1}\n-------------------------------") 146 train_loop(train_dataloader, model, loss_fn, optimizer) --> 147 test_loop(test_dataloader, model, loss_fn) 148 print("Done!")
<ipython-input-9-3c78b7430da0> in test_loop(test_dataloader, model, loss_fn) 135 pred = model(data) 136 test_loss += loss_fn(pred, label).item() --> 137 correct += (pred.argmax(1) == label).type(torch.float).sum().item() 138 139 test_loss /= num_batches
RuntimeError: The size of tensor a (3) must match the size of tensor b (40) at non-singleton dimension 1
How can I change my code for making the training works? And how to make my code works no matter what number is in "batch_size="?
The data and label is like:
print(data)
print(data.shape)
tensor([[[0.0000, 0.0000, 1.0000, ..., 0.0184, 0.0348, 0.0492], [0.0000, 0.0000, 1.0000, ..., 0.0442, 0.0363, 0.0250], [0.0000, 0.0000, 1.0000, ..., 0.0564, 0.0388, 0.0295], ..., [1.0000, 0.9606, 0.8394, ..., 0.0093, 0.0152, 0.0153], [1.0000, 0.9524, 0.8419, ..., 0.0091, 0.0151, 0.0160], [1.0000, 0.9305, 0.8363, ..., 0.0093, 0.0146, 0.0157]]]) torch.Size([1, 50, 200])
print(label)
print(label.shape)
tensor([ 178., 1878., 822., 1814., 1375., 2162., 1669., 2304., 2065., 2736., 2528., 2780., 2836., 3008., 3396., 3490., 4013., 3518., nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan], dtype=torch.float64) torch.Size([40])
The full code would be:
# custom dataset <- revised from "https://pytorch.org/tutorials/beginner/data_loading_tutorial.html"
import torch
from torch.utils.data import Dataset
import pandas as pd
import os
import segyio
import numpy as np
class cvspanel_dataset(Dataset):
def __init__(self, csv_file, root_dir, transform=None):
self.dv_label = pd.read_csv(csv_file)
self.root_dir = root_dir
self.transform = transform
def __len__(self):
return len(self.dv_label)
def __getitem__(self, idx):
if torch.is_tensor(idx):
idx = idx.tolist()
data_name = os.path.join(self.root_dir,
self.dv_label.iloc[idx, 0])
gth = segyio.open(data_name, ignore_geometry=True)
data = gth.trace.raw[:]
data = torch.tensor(data[:, :200])
data = data.unsqueeze(0)
arr = self.dv_label.iloc[idx, 1:]
arr = np.asarray(arr)
label = arr.astype('float').reshape(-1, 2)
label = torch.tensor(label)
label = label.view([-1, 1])
label = label.squeeze()
if self.transform:
data = self.transform(data)
if self.transform:
label = self.transform(label)
return data, label
train_dataset = cvspanel_dataset(csv_file='/content/drive/MyDrive/Colab Notebooks/research_data/synthetic_1D/d-v_label.csv',
root_dir='/content/drive/MyDrive/Colab Notebooks/research_data/synthetic_1D/sgy_cvs_panel',
transform=None)
test_dataset = cvspanel_dataset(csv_file='/content/drive/MyDrive/Colab Notebooks/research_data/synthetic_1D/d-v_label.csv',
root_dir='/content/drive/MyDrive/Colab Notebooks/research_data/synthetic_1D/sgy_cvs_panel',
transform=None)
# dataloader
batch_size = 1
train_dataloader = torch.utils.data.DataLoader(train_dataset,
batch_size)
test_dataloader = torch.utils.data.DataLoader(test_dataset,
batch_size)
# model building
import torch
import torch.nn as nn
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(in_channels=1, out_channels=32, kernel_size=17, stride=1, padding=3),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(kernel_size=10, stride=5, padding=0)
)
self.layer2 = nn.Sequential(
nn.Conv2d(in_channels=32, out_channels=64, kernel_size=6, stride=1, padding=2),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=3, padding=1)
)
self.fc1 = nn.Linear(in_features=64*3*13, out_features=512)
self.drop = nn.Dropout(0.25)
self.fc2 = nn.Linear(in_features=512, out_features=128)
self.fc3 = nn.Linear(in_features=128, out_features=40)
def forward(self, x):
out = self.layer1(x)
out = self.layer2(out)
out1 = out.view(out.size(0), -1)
out = self.fc1(out1)
out = self.drop(out)
out = self.fc2(out)
out = self.fc3(out)
return out
# hyperparameter
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = CNN();
model.to(device)
learning_rate = 0.001;
loss_fn = nn.CrossEntropyLoss();
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate);
# Training <- revised from "https://pytorch.org/tutorials/beginner/basics/optimization_tutorial.html"
def train_loop(train_dataloader, model, loss_fn, optimizer):
size = len(train_dataloader.dataset)
model.train()
for batch, (data, label) in enumerate(train_dataloader):
# Compute prediction and loss
pred = model(data)
loss = loss_fn(pred, label)
# Backpropagation
loss.backward()
optimizer.step()
optimizer.zero_grad()
if batch % 100 == 0:
loss, current = loss.item(), (batch + 1) * len(data)
print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")
def test_loop(test_dataloader, model, loss_fn):
model.eval()
size = len(test_dataloader.dataset)
num_batches = len(test_dataloader)
test_loss, correct = 0, 0
with torch.no_grad():
for data, label in test_dataloader:
pred = model(data)
test_loss += loss_fn(pred, label).item()
correct += (pred.argmax(1) == label).type(torch.float).sum().item()
test_loss /= num_batches
correct /= size
print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
epochs = 10
for t in range(epochs):
print(f"Epoch {t+1}\n-------------------------------")
train_loop(train_dataloader, model, loss_fn, optimizer)
test_loop(test_dataloader, model, loss_fn)
print("Done!")