does anyone know a solution for this Error? I am trying to switch my PyTorch network to an Federated Learning network but i always get this Error.
I'm using Google Colab an train on GPU. When I print the size of embeds I get 0, but I don't understand why the data is not used there.
RuntimeError Traceback (most recent call last)
<ipython-input-42-fd4a5223524b> in <module>()
----> 1 model, history = train_model(net, dataloaders_dict, criterion, optimizer, num_epochs=10)
2 #model = train_model(net, dataloaders_dict, criterion, optimizer, num_epochs=10)
6 frames
<ipython-input-41-a386f044d41f> in train_model(model, dataloaders, criterion, optimizer, num_epochs, batch_size)
68 # detaching it from its history on the last instance.
69
---> 70 outputs = model(inputs)
71
72 loss = criterion(outputs, labels)
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
530 result = self._slow_forward(*input, **kwargs)
531 else:
--> 532 result = self.forward(*input, **kwargs)
533 for hook in self._forward_hooks.values():
534 hook_result = hook(self, input, result)
<ipython-input-36-64e9a7d68b11> in forward(self, sentence)
11 def forward(self, sentence):
12 embeds = self.word_embeddings(sentence)
---> 13 lstm_out, (h,t) = self.lstm(embeds)
14 lstm_out = self.dropout(lstm_out)
15 tag_space = self.output(lstm_out[:,-1,:])
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
530 result = self._slow_forward(*input, **kwargs)
531 else:
--> 532 result = self.forward(*input, **kwargs)
533 for hook in self._forward_hooks.values():
534 hook_result = hook(self, input, result)
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/rnn.py in forward(self, input, hx)
554 hx = self.permute_hidden(hx, sorted_indices)
555
--> 556 self.check_forward_args(input, hx, batch_sizes)
557 if batch_sizes is None:
558 result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/rnn.py in check_forward_args(self, input, hidden, batch_sizes)
506 def check_forward_args(self, input, hidden, batch_sizes):
507 # type: (Tensor, Tuple[Tensor, Tensor], Optional[Tensor]) -> None
--> 508 self.check_input(input, batch_sizes)
509 expected_hidden_size = self.get_expected_hidden_size(input, batch_sizes)
510
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/rnn.py in check_input(self, input, batch_sizes)
157 raise RuntimeError(
158 'input.size(-1) must be equal to input_size. Expected {}, got {}'.format(
--> 159 self.input_size, input.size(-1)))
160
161 def get_expected_hidden_size(self, input, batch_sizes):
RuntimeError: input.size(-1) must be equal to input_size. Expected 200, got 0
class LSTM(nn.Module):
def __init__(self, embedding_dim, hidden_layers,vocab_size,num_layers,pretrained_weights):
super(LSTM, self).__init__()
self.word_embeddings = nn.Embedding(vocab_size, embedding_dim,_weight=pretrained_weights, padding_idx=0)
self.lstm = nn.LSTM(embedding_dim, hidden_size=hidden_layers,num_layers=num_layers, batch_first=True)
self.output = nn.Linear(hidden_layers, vocab_size, bias=False)
self.dropout = nn.Dropout(0.1)
def forward(self, sentence):
embeds = self.word_embeddings(sentence)
lstm_out, (h,t) = self.lstm(embeds)
lstm_out = self.dropout(lstm_out)
tag_space = self.output(lstm_out[:,-1,:])
return tag_space
The Error throws in this line: lstm_out, (h,t) = self.lstm(embeds)
for epoch in range(num_epochs):
print('Epoch {}/{}'.format(epoch, num_epochs - 1))
print('-' * 10)
train_loss = 0
acc_score = 0
valid_loss = 0
acc_valid_score = 0
#Variables to store the losses temporary
train_loss_result = 0
acc_score_result = 0
valid_loss_result = 0
acc_valid_score_result = 0
valid_loss_not_decreased = 0
if valid_loss_not_decreased == 5:
break
# Each epoch has a training and validation phase
for phase in ['train', 'val']:
if phase == 'train':
model.train() # Set model to training mode
else:
model.eval() # Set model to evaluate mode
for inputs,labels in dataloaders[phase]:
# Location of current batch
worker = inputs.location # <---- Where will send the model to
#model.to(device)
model = model.send(worker) # <---- for Federated Learning
inputs, labels = inputs.to(device), labels.to(device)
print("--------> INPUT: ",inputs)
print("--------> LABEL: ",labels)
# zero the parameter gradients
optimizer.zero_grad()
# forward
# track history if only in train
with torch.set_grad_enabled(phase == 'train'):
# Get model outputs and calculate loss
# backward + optimize only if in training phase
if phase == 'train':
# we need to clear out the hidden state of the LSTM,
# detaching it from its history on the last instance.
outputs = model(inputs)
loss = criterion(outputs, labels)
acc = binary_accuracy(outputs,labels)
acc_score = acc_score + acc
train_loss = train_loss + loss.item()
loss.backward()
optimizer.step()
I also have this error when I run LSTM model by using Pysyft.Besides this,the model of LSTM can run without using Pysyft. Actually,the model of CNN can run successfully by using Pysyft.So,I think there maybe some bugs in the Pysyft.