Can't convert cuda:0 device type tensor to numpy When trying to enumerate through torch.utils.data.DataLoader object

236 views Asked by At

I tried to run a multilayer perceptron (MLP) regression model written in PyTorch through GPU in Google Colab. However, I encountered a bunch of errors with different approaches. Below is the code that works fine for CPU.

import torch
from torch import nn
from torch.utils.data import DataLoader
# from sklearn.datasets import load_boston
from sklearn.preprocessing import StandardScaler

import pandas as pd
import numpy as np


class BostonDataset(torch.utils.data.Dataset):
  '''
  Prepare the Boston dataset for regression
  '''
  def __init__(self, X, y, scale_data=True):
    if not torch.is_tensor(X) and not torch.is_tensor(y):
      # Apply scaling if necessary
      if scale_data:
          X = StandardScaler().fit_transform(X)
      self.X = torch.from_numpy(X)
      self.y = torch.from_numpy(y)

  def __len__(self):
      return len(self.X)

  def __getitem__(self, i):
      return self.X[i], self.y[i]


class MLP(nn.Module):
  '''
    Multilayer Perceptron for regression.
  '''
  def __init__(self):
    super().__init__()
    self.layers = nn.Sequential(
      nn.Linear(13, 64),
      nn.ReLU(),
      nn.Linear(64, 32),
      nn.ReLU(),
      nn.Linear(32, 1)
    )

  def forward(self, x):
    '''
      Forward pass
    '''
    return self.layers(x)


if __name__ == '__main__':

  # Set fixed random number seed
  torch.manual_seed(42)

  # Load Boston dataset
  # X, y = load_boston(return_X_y=True)
  data_url = "http://lib.stat.cmu.edu/datasets/boston"
  raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
  X = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
  y = raw_df.values[1::2, 2]

  # Prepare Boston dataset
  dataset = BostonDataset(X, y)
  trainloader = torch.utils.data.DataLoader(dataset, batch_size=10, shuffle=True, num_workers=1)

  # Initialize the MLP
  mlp = MLP()

  # Define the loss function and optimizer
  loss_function = nn.L1Loss()
  optimizer = torch.optim.Adam(mlp.parameters(), lr=1e-4)

  # Run the training loop
  for epoch in range(0, 5): # 5 epochs at maximum

    # Print epoch
    print(f'Starting epoch {epoch+1}')

    # Set current loss value
    current_loss = 0.0

    # Iterate over the DataLoader for training data
    for i, data in enumerate(trainloader):

      # Get and prepare inputs
      inputs, targets = data
      inputs, targets = inputs.float(), targets.float()
      targets = targets.reshape((targets.shape[0], 1))

      # Zero the gradients
      optimizer.zero_grad()

      # Perform forward pass
      outputs = mlp(inputs)

      # Compute loss
      loss = loss_function(outputs, targets)

      # Perform backward pass
      loss.backward()

      # Perform optimization
      optimizer.step()

      # Print statistics
      current_loss += loss.item()
      if i % 10 == 0:
          print('Loss after mini-batch %5d: %.3f' %
                (i + 1, current_loss / 500))
          current_loss = 0.0

  # Process is complete.
  print('Training process has finished.')

My questions are as follows:

  1. What's wrong with the current approach shown below and how to fix it?
  2. Are there any general rules for running a model on GPU?
  • Should I specify the device globally at the very beginning? If so, why did the initial approach not work?
  • Should I specfiy the device for each new-defined tensor & model and loaded data?

I started with setting the default device to GPU at the very beginning:

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
torch.set_default_device(device)

but it has the following error:

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-18-8054e92f6fef> in <cell line: 59>()
     94 
     95     # Iterate over the DataLoader for training data
---> 96     for i, data in enumerate(trainloader):
     97 
     98       # Get and prepare inputs

8 frames
/usr/local/lib/python3.10/dist-packages/torch/utils/_device.py in __torch_function__(self, func, types, args, kwargs)
     75         if func in _device_constructors() and kwargs.get('device') is None:
     76             kwargs['device'] = self.device
---> 77         return func(*args, **kwargs)
     78 
     79 # NB: This is directly called from C++ in torch/csrc/Device.cpp

RuntimeError: Expected a 'cuda' device type for generator but found 'cpu'

Then I tried to specify the device for model and data one by one because I realized that variable dataset is not on GPU even if I set the default device to GPU. Below are my modifications:

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
...
self.X = torch.from_numpy(X).to(device)
self.y = torch.from_numpy(y).to(device)
...
self.layers = nn.Sequential(nn.Linear(13, 64), nn.ReLU(), nn.Linear(64, 32), nn.ReLU(), nn.Linear(32, 1)).to(device)

but it has the following error:

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-4-be16076aba77> in <cell line: 55>()
     86 
     87     # Iterate over the DataLoader for training data
---> 88     for i, data in enumerate(trainloader):
     89 
     90       # Get and prepare inputs

8 frames
/usr/local/lib/python3.10/dist-packages/torch/utils/_device.py in __torch_function__(self, func, types, args, kwargs)
     75         if func in _device_constructors() and kwargs.get('device') is None:
     76             kwargs['device'] = self.device
---> 77         return func(*args, **kwargs)
     78 
     79 # NB: This is directly called from C++ in torch/csrc/Device.cpp

RuntimeError: Expected a 'cuda' device type for generator but found 'cpu'

I did some search and made the following modification:

trainloader = torch.utils.data.DataLoader(dataset, batch_size=10, shuffle=True, num_workers=1, generator=torch.Generator(device=device))

but it has the following error:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-9-24d5f71c82a6> in <cell line: 55>()
     86 
     87     # Iterate over the DataLoader for training data
---> 88     for i, data in enumerate(trainloader):
     89 
     90       # Get and prepare inputs

8 frames
/usr/local/lib/python3.10/dist-packages/torch/utils/_device.py in __torch_function__(self, func, types, args, kwargs)
     75         if func in _device_constructors() and kwargs.get('device') is None:
     76             kwargs['device'] = self.device
---> 77         return func(*args, **kwargs)
     78 
     79 # NB: This is directly called from C++ in torch/csrc/Device.cpp

TypeError: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.

I searched a lot but people have this problem when they try to plot something. I don't know why I have the same problem when trying to enumerate through torch.utils.data.DataLoader object. Nevertheless I tried one final thing (on top of the original CPU version) that only applies .to(device) to data in the training loop and hoped at least there is no error during the enumeration:

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
...
self.layers = nn.Sequential(nn.Linear(13, 64), nn.ReLU(), nn.Linear(64, 32), nn.ReLU(), nn.Linear(32, 1)).to(device)
...
inputs, targets = inputs.float().to(device), targets.float().to(device)

But it gives a similar error:

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-22-cf2797cc4e83> in <cell line: 55>()
     86 
     87     # Iterate over the DataLoader for training data
---> 88     for i, data in enumerate(trainloader):
     89 
     90       # Get and prepare inputs

8 frames
/usr/local/lib/python3.10/dist-packages/torch/utils/_device.py in __torch_function__(self, func, types, args, kwargs)
     75         if func in _device_constructors() and kwargs.get('device') is None:
     76             kwargs['device'] = self.device
---> 77         return func(*args, **kwargs)
     78 
     79 # NB: This is directly called from C++ in torch/csrc/Device.cpp

RuntimeError: Expected a 'cuda' device type for generator but found 'cpu'

I have been trying for hours and really appreciate any help on this problem!

0

There are 0 answers