does anyone know a solution for this Error? I am trying to switch my PyTorch network to an Federated Learning network but i always get this Error.

I'm using Google Colab an train on GPU. When I print the size of embeds I get 0, but I don't understand why the data is not used there.


RuntimeError                              Traceback (most recent call last)
<ipython-input-42-fd4a5223524b> in <module>()
----> 1 model, history = train_model(net, dataloaders_dict, criterion, optimizer, num_epochs=10)
      2 #model = train_model(net, dataloaders_dict, criterion, optimizer, num_epochs=10)

6 frames
<ipython-input-41-a386f044d41f> in train_model(model, dataloaders, criterion, optimizer, num_epochs, batch_size)
     68                         # detaching it from its history on the last instance.
     69 
---> 70                         outputs = model(inputs)
     71 
     72                         loss = criterion(outputs, labels)

/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    530             result = self._slow_forward(*input, **kwargs)
    531         else:
--> 532             result = self.forward(*input, **kwargs)
    533         for hook in self._forward_hooks.values():
    534             hook_result = hook(self, input, result)

<ipython-input-36-64e9a7d68b11> in forward(self, sentence)
     11     def forward(self, sentence):
     12         embeds = self.word_embeddings(sentence)
---> 13         lstm_out, (h,t) = self.lstm(embeds)
     14         lstm_out = self.dropout(lstm_out)
     15         tag_space = self.output(lstm_out[:,-1,:])

/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    530             result = self._slow_forward(*input, **kwargs)
    531         else:
--> 532             result = self.forward(*input, **kwargs)
    533         for hook in self._forward_hooks.values():
    534             hook_result = hook(self, input, result)

/usr/local/lib/python3.6/dist-packages/torch/nn/modules/rnn.py in forward(self, input, hx)
    554             hx = self.permute_hidden(hx, sorted_indices)
    555 
--> 556         self.check_forward_args(input, hx, batch_sizes)
    557         if batch_sizes is None:
    558             result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,

/usr/local/lib/python3.6/dist-packages/torch/nn/modules/rnn.py in check_forward_args(self, input, hidden, batch_sizes)
    506     def check_forward_args(self, input, hidden, batch_sizes):
    507         # type: (Tensor, Tuple[Tensor, Tensor], Optional[Tensor]) -> None
--> 508         self.check_input(input, batch_sizes)
    509         expected_hidden_size = self.get_expected_hidden_size(input, batch_sizes)
    510 

/usr/local/lib/python3.6/dist-packages/torch/nn/modules/rnn.py in check_input(self, input, batch_sizes)
    157             raise RuntimeError(
    158                 'input.size(-1) must be equal to input_size. Expected {}, got {}'.format(
--> 159                     self.input_size, input.size(-1)))
    160 
    161     def get_expected_hidden_size(self, input, batch_sizes):

RuntimeError: input.size(-1) must be equal to input_size. Expected 200, got 0

class LSTM(nn.Module):

def __init__(self, embedding_dim, hidden_layers,vocab_size,num_layers,pretrained_weights):
    super(LSTM, self).__init__()

    self.word_embeddings = nn.Embedding(vocab_size, embedding_dim,_weight=pretrained_weights, padding_idx=0)
    self.lstm = nn.LSTM(embedding_dim, hidden_size=hidden_layers,num_layers=num_layers, batch_first=True)
    self.output = nn.Linear(hidden_layers, vocab_size, bias=False)
    self.dropout = nn.Dropout(0.1)

def forward(self, sentence):
    embeds = self.word_embeddings(sentence)
    lstm_out, (h,t) = self.lstm(embeds)
    lstm_out = self.dropout(lstm_out)
    tag_space = self.output(lstm_out[:,-1,:])

    return tag_space

The Error throws in this line: lstm_out, (h,t) = self.lstm(embeds)

  for epoch in range(num_epochs):
    print('Epoch {}/{}'.format(epoch, num_epochs - 1))
    print('-' * 10)
    train_loss = 0
    acc_score = 0

    valid_loss = 0
    acc_valid_score = 0

    #Variables to store the losses temporary
    train_loss_result = 0
    acc_score_result = 0
    valid_loss_result = 0
    acc_valid_score_result = 0

    valid_loss_not_decreased = 0

    if valid_loss_not_decreased == 5:
      break

    # Each epoch has a training and validation phase
    for phase in ['train', 'val']:
        if phase == 'train':
            model.train()  # Set model to training mode
        else:
            model.eval()   # Set model to evaluate mode
        

        for inputs,labels in dataloaders[phase]:

            # Location of current batch
            worker = inputs.location  # <---- Where will send the model to
            
            #model.to(device)

            model = model.send(worker)   # <---- for Federated Learning

            inputs, labels = inputs.to(device), labels.to(device)
            
            print("--------> INPUT: ",inputs)
            print("--------> LABEL: ",labels)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward
            # track history if only in train
            with torch.set_grad_enabled(phase == 'train'):
                # Get model outputs and calculate loss

                # backward + optimize only if in training phase
                if phase == 'train':
                    # we need to clear out the hidden state of the LSTM,
                    # detaching it from its history on the last instance.

                    outputs = model(inputs)

                    loss = criterion(outputs, labels)
                    acc = binary_accuracy(outputs,labels)       
                    acc_score = acc_score + acc
                    train_loss = train_loss + loss.item()
                    loss.backward()
                    
                    optimizer.step()
1

There are 1 answers

0
TZSS On

I also have this error when I run LSTM model by using Pysyft.Besides this,the model of LSTM can run without using Pysyft. Actually,the model of CNN can run successfully by using Pysyft.So,I think there maybe some bugs in the Pysyft.