issues dealing with the init_hidden states of LSTM: Expected hidden[0] size (4, 512, 16), got [512, 16]

27 views Asked by At

I've used the following code to construct an LSTM model to do the graph generation task.

class GraphLSTM(nn.Module):
    """

    """
    def __init__(self, input_size, batch_size,embedding_size, hidden_size, num_layers,
                 has_input=True, has_output=False, output_size=None, device='cuda'):

        super(GraphLSTM, self).__init__()
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.has_input = has_input
        self.has_output = has_output
        self.device = device
        self.batch_size = batch_size

        if has_input:
            self.input = nn.Linear(input_size, embedding_size)
            self.rnn = nn.LSTM(input_size=embedding_size,
                               hidden_size=hidden_size,
                               num_layers=num_layers,
                               batch_first=True)
        else:
            self.rnn = nn.LSTM(input_size=input_size,
                               hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
        if has_output:
            # print("embedding_size: ", output_size)
            self.output = nn.Sequential(
                nn.Linear(hidden_size, embedding_size),
                nn.ReLU(),
                nn.Linear(embedding_size, output_size)
            )

        self.relu = nn.ReLU()
        # initialize
        self.hidden = None  # need initialize before forward run

        self.zero_tensor = Variable(torch.zeros(self.num_layers, batch_size,
                                                self.hidden_size)).to(self.device)

        for name, param in self.rnn.named_parameters():
            if 'bias' in name:
                nn.init.constant(param, 0.25)
            elif 'weight' in name:
                nn.init.xavier_uniform(param, gain=nn.init.calculate_gain('sigmoid'))
        for m in self.modules():
            if isinstance(m, nn.Linear):
                m.weight.data = init.xavier_uniform(m.weight.data,
                                                    gain=nn.init.calculate_gain('relu'))

    def init_hidden(self, batch_size):
        return (Variable(torch.zeros(self.num_layers, batch_size, self.hidden_size)).to(self.device),
                Variable(torch.zeros(self.num_layers, batch_size, self.hidden_size)).to(self.device))


    def forward(self, input_raw, pack=False, input_len=None):
        if self.has_input:
            cell_input = self.input(input_raw)
            cell_input = self.relu(cell_input)
        else:
            cell_input = input_raw
        if pack:
            cell_input = pack_padded_sequence(cell_input, input_len, batch_first=True)

        output_raw, self.hidden = self.rnn(cell_input, self.hidden)

        if pack:
            output_raw = pad_packed_sequence(output_raw, batch_first=True)[0]
        if self.has_output:
            output_raw = self.output(output_raw)
        # return hidden state at each time step
        return output_raw

but when i run the code with this initialization of the hidden state:

self.node_rnn.hidden = self.node_rnn.init_hidden(batch_size=old_x.size(0))

the error occures :

raise RuntimeError(msg.format(expected_hidden_size, list(hx.size())))
RuntimeError: Expected hidden[0] size (4, 512, 16), got [512, 16]

i tried to modify the init code of the hidden state but it didn't work. can someone please tell me how to handle the problem and what is the meaning of the element inside that two tuples and why is this differ from my defined shape of the hidden state.i've print the hidden states shape using

print(self.node_rnn.hidden[0].shape)

and it gave me

torch.Size([4, 32, 128])

can someone please tell me how to solve this problem?

0

There are 0 answers