Simple Q Learning Example in Python 3

697 views Asked by At

I am working on a simple q learning code in python. After running several iterations the program suggest a valid path, but not always the shortest -which is the point of the program. I am not sure what I am overlooking. I am using a jupyter notebook.

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

iterations = 200
goalState = 5
gamma = 0.8
qValues = []

#actions

R = np.array([[-1, 0,-1,-1, 0,-1],
              [ 0,-1,-1,-1,-1,100],
              [-1,-1,-1,-1,-1,100],
              [-1,-1,-1,-1, 0,100],
              [ 0,-1,-1, 0,-1,-1],
              [-1,-1,-1,-1,-1,100]])

#inital Q matrix

Q = np.zeros(R.shape)

for i in range(iterations):

    state = np.random.randint(goalState + 1)

    while state != goalState:

        possibleActions = np.where(R[state] >= 0)[0]

        action = possibleActions[np.random.randint(len(possibleActions+1))]

        nextPossibleActions = np.where(R[action] >= 0)[0]

        for k in nextPossibleActions:
            qValues.append(Q[action][k])

        qMax = max(qValues)

        Q[state][action] += R[state][action] + gamma * qMax

        state = action

Q = Q/Q.max()  #normalising the matrix to percentage values

sns.set()
f, ax = plt.subplots(figsize=(8, 6))
cmap = sns.diverging_palette(220, 10, as_cmap=True)
sns.heatmap(Q, cmap = cmap, annot=True, linewidths=.5, ax=ax)
0

There are 0 answers