I tried to run a multilayer perceptron (MLP) regression model written in PyTorch through GPU in Google Colab. However, I encountered a bunch of errors with different approaches. Below is the code that works fine for CPU.
import torch
from torch import nn
from torch.utils.data import DataLoader
# from sklearn.datasets import load_boston
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np
class BostonDataset(torch.utils.data.Dataset):
'''
Prepare the Boston dataset for regression
'''
def __init__(self, X, y, scale_data=True):
if not torch.is_tensor(X) and not torch.is_tensor(y):
# Apply scaling if necessary
if scale_data:
X = StandardScaler().fit_transform(X)
self.X = torch.from_numpy(X)
self.y = torch.from_numpy(y)
def __len__(self):
return len(self.X)
def __getitem__(self, i):
return self.X[i], self.y[i]
class MLP(nn.Module):
'''
Multilayer Perceptron for regression.
'''
def __init__(self):
super().__init__()
self.layers = nn.Sequential(
nn.Linear(13, 64),
nn.ReLU(),
nn.Linear(64, 32),
nn.ReLU(),
nn.Linear(32, 1)
)
def forward(self, x):
'''
Forward pass
'''
return self.layers(x)
if __name__ == '__main__':
# Set fixed random number seed
torch.manual_seed(42)
# Load Boston dataset
# X, y = load_boston(return_X_y=True)
data_url = "http://lib.stat.cmu.edu/datasets/boston"
raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
X = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
y = raw_df.values[1::2, 2]
# Prepare Boston dataset
dataset = BostonDataset(X, y)
trainloader = torch.utils.data.DataLoader(dataset, batch_size=10, shuffle=True, num_workers=1)
# Initialize the MLP
mlp = MLP()
# Define the loss function and optimizer
loss_function = nn.L1Loss()
optimizer = torch.optim.Adam(mlp.parameters(), lr=1e-4)
# Run the training loop
for epoch in range(0, 5): # 5 epochs at maximum
# Print epoch
print(f'Starting epoch {epoch+1}')
# Set current loss value
current_loss = 0.0
# Iterate over the DataLoader for training data
for i, data in enumerate(trainloader):
# Get and prepare inputs
inputs, targets = data
inputs, targets = inputs.float(), targets.float()
targets = targets.reshape((targets.shape[0], 1))
# Zero the gradients
optimizer.zero_grad()
# Perform forward pass
outputs = mlp(inputs)
# Compute loss
loss = loss_function(outputs, targets)
# Perform backward pass
loss.backward()
# Perform optimization
optimizer.step()
# Print statistics
current_loss += loss.item()
if i % 10 == 0:
print('Loss after mini-batch %5d: %.3f' %
(i + 1, current_loss / 500))
current_loss = 0.0
# Process is complete.
print('Training process has finished.')
My questions are as follows:
- What's wrong with the current approach shown below and how to fix it?
- Are there any general rules for running a model on GPU?
- Should I specify the device globally at the very beginning? If so, why did the initial approach not work?
- Should I specfiy the device for each new-defined tensor & model and loaded data?
I started with setting the default device to GPU at the very beginning:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
torch.set_default_device(device)
but it has the following error:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-18-8054e92f6fef> in <cell line: 59>()
94
95 # Iterate over the DataLoader for training data
---> 96 for i, data in enumerate(trainloader):
97
98 # Get and prepare inputs
8 frames
/usr/local/lib/python3.10/dist-packages/torch/utils/_device.py in __torch_function__(self, func, types, args, kwargs)
75 if func in _device_constructors() and kwargs.get('device') is None:
76 kwargs['device'] = self.device
---> 77 return func(*args, **kwargs)
78
79 # NB: This is directly called from C++ in torch/csrc/Device.cpp
RuntimeError: Expected a 'cuda' device type for generator but found 'cpu'
Then I tried to specify the device for model and data one by one because I realized that variable dataset
is not on GPU even if I set the default device to GPU. Below are my modifications:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
...
self.X = torch.from_numpy(X).to(device)
self.y = torch.from_numpy(y).to(device)
...
self.layers = nn.Sequential(nn.Linear(13, 64), nn.ReLU(), nn.Linear(64, 32), nn.ReLU(), nn.Linear(32, 1)).to(device)
but it has the following error:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-4-be16076aba77> in <cell line: 55>()
86
87 # Iterate over the DataLoader for training data
---> 88 for i, data in enumerate(trainloader):
89
90 # Get and prepare inputs
8 frames
/usr/local/lib/python3.10/dist-packages/torch/utils/_device.py in __torch_function__(self, func, types, args, kwargs)
75 if func in _device_constructors() and kwargs.get('device') is None:
76 kwargs['device'] = self.device
---> 77 return func(*args, **kwargs)
78
79 # NB: This is directly called from C++ in torch/csrc/Device.cpp
RuntimeError: Expected a 'cuda' device type for generator but found 'cpu'
I did some search and made the following modification:
trainloader = torch.utils.data.DataLoader(dataset, batch_size=10, shuffle=True, num_workers=1, generator=torch.Generator(device=device))
but it has the following error:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-9-24d5f71c82a6> in <cell line: 55>()
86
87 # Iterate over the DataLoader for training data
---> 88 for i, data in enumerate(trainloader):
89
90 # Get and prepare inputs
8 frames
/usr/local/lib/python3.10/dist-packages/torch/utils/_device.py in __torch_function__(self, func, types, args, kwargs)
75 if func in _device_constructors() and kwargs.get('device') is None:
76 kwargs['device'] = self.device
---> 77 return func(*args, **kwargs)
78
79 # NB: This is directly called from C++ in torch/csrc/Device.cpp
TypeError: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.
I searched a lot but people have this problem when they try to plot something. I don't know why I have the same problem when trying to enumerate through torch.utils.data.DataLoader
object. Nevertheless I tried one final thing (on top of the original CPU version) that only applies .to(device)
to data in the training loop and hoped at least there is no error during the enumeration:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
...
self.layers = nn.Sequential(nn.Linear(13, 64), nn.ReLU(), nn.Linear(64, 32), nn.ReLU(), nn.Linear(32, 1)).to(device)
...
inputs, targets = inputs.float().to(device), targets.float().to(device)
But it gives a similar error:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-22-cf2797cc4e83> in <cell line: 55>()
86
87 # Iterate over the DataLoader for training data
---> 88 for i, data in enumerate(trainloader):
89
90 # Get and prepare inputs
8 frames
/usr/local/lib/python3.10/dist-packages/torch/utils/_device.py in __torch_function__(self, func, types, args, kwargs)
75 if func in _device_constructors() and kwargs.get('device') is None:
76 kwargs['device'] = self.device
---> 77 return func(*args, **kwargs)
78
79 # NB: This is directly called from C++ in torch/csrc/Device.cpp
RuntimeError: Expected a 'cuda' device type for generator but found 'cpu'
I have been trying for hours and really appreciate any help on this problem!