PyTorch expected CPU got CUDA tensor

5.1k views Asked by At

I've been struggling to find what's wrong in my code. I'm trying to implement DCGAN paper and from the past 1 hour, I'm going through these errors. Could anyone please help me fix this?

I'm training this on Google colab with GPU runtime but I'm getting this error. Yesterday, I implemented the first GAN paper by Ian Goodfellow and I did not got this error. I don't know what's happening any help would be appreciated. Also, please check whether the gen_input is correct or not.

Here is the code:

import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torchvision.utils import save_image
import torch.optim as optim

#---------------configuration part------------------#
lr = 0.00002 #learning rate
nc = 3 #color channels
nz = 100 #size of latent vector or size of generator input
ngf = 64 #size of feature maps in generator
ndf = 64 #size of feature maps in discriminator
height = 128 #height of the image
width = 128 #width of the image
num_epochs = 100 #the variable name tells everything
workers = 2 #number of workers to load the data in batches
batch_size = 64 #batch size
image_size = 128 #resizing parameter
root = '/content/gdrive/My Drive/sharingans/' #path to the training directory
beta1 = 0.4

#---------------------------------------------------#
#define the shape of the image
img_shape = (nc, height, width)

#---------------------------------------------------#
#define the weights initialization function
#in the DCGAN paper they state that all weights should be
#randomly initialize weights from normal distribution
#the following function does that

def weights_init(m):
    classname = m.__class__.__name__ #returns the class name(eg: Conv2d or ConvTranspose2d)
    if classname.find('Conv') != -1:
        nn.init.normal_(m.weight.data, 0.0, 0.02) #0.0 is mean and 0.02 is standard deviation
    elif classname.find('BatchNorm') != -1:
        nn.init.normal_(m.weight.data, 1, 0.02) #1 is mean and 0.02 is standard deviation
        nn.init.constant_(m.bias.data, 0.0)

#---------------------------------------------------#


#implement the data loader function to load images

def load_data(image_size, root):
    transform = transforms.Compose([
        transforms.Resize(image_size),
        transforms.ToTensor(),
        transforms.Normalize((0.486, 0.486, 0.486), (0.486, 0.486, 0.486))
        ])

    train_set = torchvision.datasets.ImageFolder(root = root, transform = transform)

    return train_set
train_set = load_data(128, root)
#getting the batches of data
train_data = torch.utils.data.DataLoader(train_set, batch_size = batch_size, shuffle = True, num_workers = workers)

#---------------------------------------------------#
#implement the generator network

class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()
        self.convt1 = nn.ConvTranspose2d(in_channels = nz, out_channels = ngf*8, kernel_size = 4, stride = 1, padding = 0, bias = False)
        self.convt2 = nn.ConvTranspose2d(in_channels = ngf*8, out_channels = ngf*4, kernel_size = 4, stride = 2, padding = 1, bias = False)
        self.convt3 = nn.ConvTranspose2d(in_channels = ngf*4, out_channels = ngf*2, kernel_size = 4, stride = 2, padding = 1, bias = False)
        self.convt4 = nn.ConvTranspose2d(in_channels = ngf*2, out_channels = ngf, kernel_size = 4, stride = 2, padding = 1, bias = False)
        self.convt5 = nn.ConvTranspose2d(in_channels = ngf, out_channels = 3, kernel_size=4, stride = 2, padding = 1, bias = False)

    def forward(self, t):
        t = self.convt1(t)
        t = nn.BatchNorm2d(t)
        t = F.relu(t)

        t = self.convt2(t)
        t = nn.BatchNorm2d(t)
        t = F.relu(t)

        t = self.convt3(t)
        t = nn.BatchNorm2d(t)
        t = F.relu(t)

        t = self.convt4(t)
        t = nn.BatchNorm2d(t)
        t = F.relu(t)

        t = self.convt5(t)
        t = F.tanh(t)

        return t

#---------------------------------------------------#
#implement the discriminator network

class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        self.conv1 = nn.Conv2d(in_channels = 3, out_channels = ndf, kernel_size = 4, stride = 2, padding = 1, bias = False)
        self.conv2 = nn.Conv2d(in_channels = ndf, out_channels = ndf*2, kernel_size = 4, stride = 2, padding = 1, bias = False)
        self.conv3 = nn.Conv2d(in_channels = ndf*2, out_channels = ndf*4, kernel_size = 4, stride = 2, padding = 1, bias = False)
        self.conv4 = nn.Conv2d(in_channels = ndf*4, out_channels = ndf*8, kernel_size = 4, stride = 2, padding = 1, bias = False)
        self.conv5 = nn.Conv2d(in_channels = ndf*8, out_channels = 1, kernel_size = 4, stride = 1, padding = 0, bias = False)

    def forward(self, t):
        t = self.conv1(t)
        t = F.leaky_relu(t, 0.2)

        t = self.conv2(t)
        t = nn.BatchNorm2d(t)
        t = F.leaky_relu(t, 0.2)

        t = self.conv3(t)
        t = nn.BatchNorm2d(t)
        t = F.leaky_relu(t, 0.2)

        t = self.conv4(t)
        t = nn.BatchNorm2d(t)
        t = F.leaky_relu(t, 0.2)

        t = self.conv5(t)
        t = F.sigmoid(t)

        return t

#---------------------------------------------------#
#create the instances of networks

generator = Generator()
discriminator = Discriminator()

#apply the weights_init function to randomly initialize weights to mean = 0 and std = 0.02
generator.apply(weights_init)
discriminator.apply(weights_init)

print(generator)
print(discriminator)

#---------------------------------------------------#
#define the loss function
criterion = nn.BCELoss()

#fixed noise
noise = torch.randn(64, nz, 1, 1).cuda()

#conventions for fake and real labels
real_label = 1
fake_label = 0

#create the optimizer instances
optimizer_d = optim.Adam(discriminator.parameters(), lr = lr, betas = (beta1, 0.999))
optimizer_g = optim.Adam(generator.parameters(), lr = lr, betas = (beta1, 0.999))

#---------------------------------------------------#
if torch.cuda.is_available():
    generator = generator.cuda()
    discriminator = discriminator.cuda()
    criterion = criterion.cuda()

Tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor
#---------------------------------------------------#

#Training loop
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_data):
        
        #ones is passed when the data is coming from original dataset
        #zeros is passed when the data is coming from generator
        ones = Tensor(images.size(0), 1).fill_(1.0)
        zeros = Tensor(images.size(0),1).fill_(0.0)
        
        real_images = images.cuda()
        
        optimizer_g.zero_grad()
        
        #following is the input to the generator
        #we create tensor with random noise of size 100
        gen_input = np.random.normal(0,3,(512,100,4,4))
        gen_input = torch.tensor(gen_input, dtype = torch.float32)
        gen_input = gen_input.cuda()
        #we then pass it to generator()
        gen = generator(gen_input) #this returns a image
        
        #now calculate the loss wrt to discriminator output
        g_loss = criterion(discriminator(gen), ones)
        
        #backpropagation
        g_loss.backward()
        #update weights
        optimizer_g.step()
        
        #above was for generator network
        
        #now for the discriminator network
        optimizer_d.zero_grad()
        
        #calculate the real loss
        real_loss = criterion(discriminator(real_images), ones)
        #calculate the fake loss from the generated image
        fake_loss = criterion(discriminator(gen.detach()),zeros)
        #average out the losses
        d_loss = (real_loss + fake_loss)/2
        
        #backpropagation
        d_loss.backward()
        #update weights
        optimizer_d.step()
        
        if i%100 == 0:
            print("[EPOCH %d/%d] [Batch %d/%d] [D loss: %f] [G loss: %f]"%(epoch, epochs, i, len(dataset), d_loss.item(), g_loss.item()))

        total_batch = epoch * len(dataset) + i
        if total_batch%20 == 0:
            save_image(gen.data[:5], '/content/gdrive/My Drive/tttt/%d.png' % total_batch, nrow=5)

And here's the error:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-36-0af32f223344> in <module>()
     18         gen_input = gen_input.cuda()
     19         #we then pass it to generator()
---> 20         gen = generator(gen_input) #this returns a image
     21 
     22         #now calculate the loss wrt to discriminator output

/usr/local/lib/python3.6/dist-packages/torch/nn/modules/batchnorm.py in __init__(self, num_features, eps, momentum, affine, track_running_stats)
     40         self.track_running_stats = track_running_stats
     41         if self.affine:
---> 42             self.weight = Parameter(torch.Tensor(num_features))
     43             self.bias = Parameter(torch.Tensor(num_features))
     44         else:

TypeError: expected CPU (got CUDA)

Any help would be appreciated. Thank you!

2

There are 2 answers

3
Theodor Peifer On BEST ANSWER

Do you use colab? Then you should activate the GPU. But if you want to stay on the CPU:

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

Now do this on EVERY model or tensor you create, for example:

x = torch.tensor(...).to(device=device)
model = Model(...).to(device=device)

Then, if you switch around between cpu and gpu it handles it automaticaly for you. But as I said, you probably want to activate cuda by switching to colabs GPU

1
hasleron On
if torch.cuda.is_available():
    generator = generator.cuda()

Here you check whether cuda is available and if so set the generator to cuda

gen_input = gen_input.cuda()

The input float tensor is set to cuda no matter whether it is available or not. My bet is that cuda is not available (colab is not very consistent with it)

A change like the following could help clarify the issue:

if torch.cuda.is_available():
    generator = generator.cuda()
    discriminator = discriminator.cuda()
    criterion = criterion.cuda()
    print("CUDA active")
else:
    print("CPU active")

edit:

Tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor

This line prepares the tensor type for you, so just use

gen_input = Tensor(gen_input)

instead of

gen_input = torch.tensor(gen_input, dtype = torch.float32)
gen_input = gen_input.cuda()