I want to use transformer to build one translation model, and I have achieve one encoder-decoder model by pytorch.
but now I wounder could I use encoder only to achieve this, and I have write some code
class Transformer(nn.Module):
def __init__(self):
super(Transformer, self).__init__()
self.device = device
self.src_emb = nn.Embedding(src_vocab_size, d_model)
self.transformer_encoder = nn.TransformerEncoder(
nn.TransformerEncoderLayer(d_model=d_model, nhead=n_heads, dim_feedforward=d_ff, batch_first=True),
num_layers=n_layers,
)
self.projection = nn.Linear(d_model, tgt_vocab_size, bias=True)
def forward(self, src):
src_padding_mask = (src == 0)
# src_mask = nn.Transformer.generate_square_subsequent_mask(src.size(1)).bool().to(self.device)
memory = self.src_emb(src)
# memory = self.transformer_encoder(memory, src_key_padding_mask=src_padding_mask, mask=src_mask)
memory = self.transformer_encoder(memory, src_key_padding_mask=src_padding_mask)
output = self.projection(memory)
return output.view(-1, output.shape[-1])
but i don't know if I should use src_mask or not,
by reading other author answer, I know that we can use the nn.encoder with mask to achieve gpt like model.
so i think the nn.encoder only with mask may be correct, but i don't know without that mask this code is right or not.
that's all, thanks.
could u tell me something about this?