I'm trying to implement maximum likelihood learning for neural probability language model in python from code of log-bilinear model: https://github.com/wenjieguan/Log-bilinear-language-models/blob/master/lbl.py
I used grad function in theano to compute gradient and try using function train to update parameters of model, but it got errors. Here is my code:
def train(self, sentences, alpha = 0.001, batches = 1000):
print('Start training...')
self.alpha = alpha
count = 0
RARE = self.vocab['<>']
#print RARE
q = np.zeros(self.dim, np.float32)
#print q
delta_context = [np.zeros((self.dim, self.dim), np.float32) for i in range(self.context) ]
#print delta_context
delta_feature = np.zeros((len(self.vocab), self.dim), np.float32)
#print delta_feature
for sentence in sentences:
sentence = self.start_sen + sentence + self.end_sen
for pos in range(self.context, len(sentence) ):
count += 1
q.fill(0)
featureW = []
contextMatrix = []
indices = []
for i, r in enumerate(sentence[pos - self.context : pos]):
if r == '<_>':
continue
index = self.vocab.get(r, RARE)
print index
indices.append(index)
ri = self.featureVectors[index]
#print ri
ci = self.contextMatrix[i]
#print ci
featureW.append(ri)
contextMatrix.append(ci)
#Caculating predicted representation for the target word
q += np.dot(ci, ri)
#Computing energy function
energy = np.exp(np.dot(self.featureVectors, q) + self.biases)
#print energy
#Computing the conditional distribution
probs = energy / np.sum(energy)
#print probs
w_index = self.vocab.get(sentence[pos], RARE)
#Computing gradient
logProbs = T.log(probs[w_index])
print 'Gradient start...'
delta_context, delta_feature = T.grad(logProbs, [self.contextMatrix, self.featureVectors])
print 'Gradient completed!'
train = theano.function(
inputs = [self.vocab],
outputs = [logProbs],
updates=((self.featureVectors,self.featureVectors - self.alpha * delta_feature),
(self.contextMatrix,self.contextMatrix - self.alpha * delta_context)),
name="train"
)
print('Training is finished!')
I have just learnt about Python and neural probability language model, so it is quite difficult to me. Could you help me, please! Thank you!