I've used the following code to construct an LSTM model to do the graph generation task.
class GraphLSTM(nn.Module):
"""
"""
def __init__(self, input_size, batch_size,embedding_size, hidden_size, num_layers,
has_input=True, has_output=False, output_size=None, device='cuda'):
super(GraphLSTM, self).__init__()
self.num_layers = num_layers
self.hidden_size = hidden_size
self.has_input = has_input
self.has_output = has_output
self.device = device
self.batch_size = batch_size
if has_input:
self.input = nn.Linear(input_size, embedding_size)
self.rnn = nn.LSTM(input_size=embedding_size,
hidden_size=hidden_size,
num_layers=num_layers,
batch_first=True)
else:
self.rnn = nn.LSTM(input_size=input_size,
hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
if has_output:
# print("embedding_size: ", output_size)
self.output = nn.Sequential(
nn.Linear(hidden_size, embedding_size),
nn.ReLU(),
nn.Linear(embedding_size, output_size)
)
self.relu = nn.ReLU()
# initialize
self.hidden = None # need initialize before forward run
self.zero_tensor = Variable(torch.zeros(self.num_layers, batch_size,
self.hidden_size)).to(self.device)
for name, param in self.rnn.named_parameters():
if 'bias' in name:
nn.init.constant(param, 0.25)
elif 'weight' in name:
nn.init.xavier_uniform(param, gain=nn.init.calculate_gain('sigmoid'))
for m in self.modules():
if isinstance(m, nn.Linear):
m.weight.data = init.xavier_uniform(m.weight.data,
gain=nn.init.calculate_gain('relu'))
def init_hidden(self, batch_size):
return (Variable(torch.zeros(self.num_layers, batch_size, self.hidden_size)).to(self.device),
Variable(torch.zeros(self.num_layers, batch_size, self.hidden_size)).to(self.device))
def forward(self, input_raw, pack=False, input_len=None):
if self.has_input:
cell_input = self.input(input_raw)
cell_input = self.relu(cell_input)
else:
cell_input = input_raw
if pack:
cell_input = pack_padded_sequence(cell_input, input_len, batch_first=True)
output_raw, self.hidden = self.rnn(cell_input, self.hidden)
if pack:
output_raw = pad_packed_sequence(output_raw, batch_first=True)[0]
if self.has_output:
output_raw = self.output(output_raw)
# return hidden state at each time step
return output_raw
but when i run the code with this initialization of the hidden state:
self.node_rnn.hidden = self.node_rnn.init_hidden(batch_size=old_x.size(0))
the error occures :
raise RuntimeError(msg.format(expected_hidden_size, list(hx.size())))
RuntimeError: Expected hidden[0] size (4, 512, 16), got [512, 16]
i tried to modify the init code of the hidden state but it didn't work. can someone please tell me how to handle the problem and what is the meaning of the element inside that two tuples and why is this differ from my defined shape of the hidden state.i've print the hidden states shape using
print(self.node_rnn.hidden[0].shape)
and it gave me
torch.Size([4, 32, 128])
can someone please tell me how to solve this problem?