I'm trying to convert my tensorflow code to tensorflow eager. The problem is the forward pass predicts only the same actions for different input values in eager mode. The normal tensorflow code with graph works fine. I've only changed the network. The agent is the same I'm used with normal tensorflow. What could be the problem with the network? The forward pass is in the function get_probs() Another issue is the eager network is very slow. I think the graph execution is 2-3 times faster.

Example probs for one episode in eager

               ...
[0.31471518 0.33622807 0.34905672]
[0.31472355 0.3363353  0.34894115]
[0.31482834 0.33600125 0.34917045]
[0.31461707 0.33643782 0.34894508]
[0.31466153 0.33620775 0.34913075]
[0.31461093 0.33637658 0.3490125 ]
[0.31452385 0.33623937 0.34923682]
[0.31438416 0.33645296 0.3491629 ]
[0.31471425 0.3363982  0.34888753]
[0.314866   0.33610862 0.34902537]
[0.31489033 0.33622313 0.34888652]
               ...

Example probs for one episode with tensorflow graph

               ...
[0.25704077 0.46056205 0.28239718]
[0.20610097 0.49288744 0.30101162]
[0.24638997 0.5338215  0.2197885 ]
[0.22581507 0.51206875 0.2621162 ]
[0.19064051 0.5398092  0.26955026]
[0.24399564 0.4424694  0.313535  ]
[0.25321653 0.48051655 0.26626688]
[0.2241595  0.43447506 0.3413655 ]
[0.20665398 0.5128011  0.28054494]
[0.2943201  0.39530927 0.3103706 ]
               ...

Network

import tensorflow as tf
from keras.layers import *
import numpy as np

tf.enable_eager_execution()
print(tf.executing_eagerly())


class PGEagerAtariNetwork:
    def __init__(self, state_space, action_space, lr):
        self.state_space = state_space
        self.action_space = action_space

        self.model = tf.keras.Sequential()
        self.model.add(InputLayer(input_shape=(84, 84, 4)))
        # Conv
        self.model.add(Conv2D(filters=32, kernel_size=[8, 8], strides=[4, 4], activation='relu', name='conv1'))
        self.model.add(Conv2D(filters=64, kernel_size=[4, 4], strides=[2, 2], activation='relu', name='conv2'))
        self.model.add(Conv2D(filters=128, kernel_size=[4, 4], strides=[2, 2], activation='relu', name='conv3'))
        # Flatten
        self.model.add(Flatten(name='flatten'))
        # Fully connected
        self.model.add(Dense(units=512, activation='relu', name='fc1'))
        # Logits
        self.model.add(Dense(units=self.action_space, activation=None, name='logits'))
        self.model.summary()
        # Optimizer
        self.optimizer = tf.train.AdamOptimizer(learning_rate=lr)

    def get_probs(self, s):
        s = s[np.newaxis, :]
        logits = self.model(s)
        probs = tf.nn.softmax(logits).numpy().squeeze()
        return probs

    def update_policy(self, s, r, a):
        with tf.GradientTape() as tape:
            loss = self.calc_loss(s, r, a)
        grads = tape.gradient(loss, self.model.trainable_variables)
        self.optimizer.apply_gradients(zip(grads, self.model.trainable_variables),
                                       global_step=tf.train.get_or_create_global_step())

    def calc_loss(self, s, r, a):
        logits = self.model(s)
        policy_loss = tf.nn.softmax_cross_entropy_with_logits_v2(labels=a, logits=logits)
        policy_loss = tf.reduce_mean(policy_loss * tf.stop_gradient(r))
        loss = tf.reduce_mean(policy_loss)
        return loss

0 Answers