TF-Agents TypeError: The two structures do not match, next_step_type, policy_info

157 views Asked by At

Hi and thank you in advance,

I'm trying to get to run a custom environment in tf-agents and getting the following

TypeError: The two structures do not match:
  Trajectory(
{'action': .,
 'discount': .,
 'next_step_type': .,
 'observation': {'Observation_1': .,
                 'Observation_2': .,
                 'Observation_3': .,
                 'Observation_4': .},
 'policy_info': (),
 'reward': .,
 'step_type': .})
vs.
  TimeStep(
{'discount': .,
 'observation': {'Observation_1': .,
                 'Observation_2': .,
                 'Observation_3': .,
                 'Observation_4': .},
 'reward': .,
 'step_type': .})
Values:
  Trajectory(
{'action': array([7.5856867, 5.9586177, 4.926343 ], dtype=float32),
 'discount': array(0.9, dtype=float32),
 'next_step_type': array(1),
 'observation': {'Observation_1': array([...]),
                 'Observation_2': array([...1]),
                 'Observation_3': array([...]),
                 'Observation_4': array([...])},
 'policy_info': (),
 'reward': array(0., dtype=float32),
 'step_type': array(0)})
vs.
  TimeStep(
{'discount': BoundedTensorSpec(shape=(), dtype=tf.float32, name='discount', minimum=array(0., dtype=float32), maximum=array(1., dtype=float32)),
 'observation': {'Observation_1': BoundedTensorSpec(shape=(48,), dtype=tf.float64, name='Observation_1', minimum=array(0.), maximum=array(1.79769313e+308)),
                 'Observation_2': BoundedTensorSpec(shape=(48,), dtype=tf.float64, name='Observation_2', minimum=array(0.), maximum=array(1.79769313e+308)),
                 'Observation_3': BoundedTensorSpec(shape=(21,), dtype=tf.float64, name='Observation_3', minimum=array(0.), maximum=array(1.79769313e+308)),
                 'Observation_4': BoundedTensorSpec(shape=(20,), dtype=tf.float64, name='Observation_4', minimum=array(0.), maximum=array(1.79769313e+308))},
 'reward': TensorSpec(shape=(), dtype=tf.float32, name='reward'),
 'step_type': TensorSpec(shape=(), dtype=tf.int32, name='step_type')}).

The only difference I could figure out is that my observations contain the values next_step_type and policy_info whicht are not in my time_step_spec. I'm not sure where those values are generated. Because the code is quite a lot, I ommited some of it please let me know what code is necessary to help solving the issue.


action_spec=tf_env.action_spec()
input_tensor_spec=tf_env.observation_spec()


action_net=ActionNet(input_tensor_spec, action_spec, preprocessing_layers=preprocessing_layers, preprocessing_combiner=preprocessing_combiner)

data_spec = tf_env.time_step_spec()
batch_size = 32
max_length = 1000

replay_buffer = tf_uniform_replay_buffer.TFUniformReplayBuffer(
    data_spec,
    batch_size=batch_size,
    max_length=max_length)


train_env = tf_env
eval_env = tf_py_environment.TFPyEnvironment(CustomEnv(dp=dp))


optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

train_step_counter = tf.Variable(0)

def printBatch(items):
  for item in items:
      print(item)

tf_agent = reinforce_agent.ReinforceAgent(
    train_env.time_step_spec(),
    train_env.action_spec(),
    actor_network=action_net,
    optimizer=optimizer,
    normalize_returns=True,
    train_step_counter=train_step_counter)
tf_agent.initialize()

class ActionNet(network.Network):

  def __init__(self,observation_spec,
               action_spec,
               preprocessing_layers=None,
               preprocessing_combiner=None,
               conv_layer_params=None,
               fc_layer_params=None,
               dropout_layer_params=None,
               activation_fn=tf.keras.activations.relu,
               enable_last_layer_zero_initializer=False,
               name='ActorNetwork'):
    super(ActionNet, self).__init__(
        input_tensor_spec=observation_spec,
        state_spec=(),
        name='ActionNet')


    kernel_initializer = tf.keras.initializers.VarianceScaling(
    scale=1. / 3., mode='fan_in', distribution='uniform')
    self._encoder = encoding_network.EncodingNetwork(
        observation_spec,
        preprocessing_layers=preprocessing_layers,
        preprocessing_combiner=preprocessing_combiner,
        conv_layer_params=conv_layer_params,
        fc_layer_params=fc_layer_params,
        dropout_layer_params=dropout_layer_params,
        activation_fn=activation_fn,
        kernel_initializer=kernel_initializer,
        batch_squash=False)
    self._output_tensor_spec = action_spec
    
    self._model = model


  def call(self, observations, step_type, network_state):
    del step_type
    output=observations


    modelInput=[]
    for name in names:
        if len((output[name].shape))==3:
            print("len 3")
            modelInput.append(output[name][0])
        else:

            modelInput.append(output[name])

    output = model( modelInput)

    actions = output

    # Scale and shift actions to the correct range if necessary.
    return actions, network_state
def collect_episode(environment, policy, num_episodes):

  driver = py_driver.PyDriver(
    environment,
    
    py_tf_eager_policy.PyTFEagerPolicy(
      policy, use_tf_function=True, batch_time_steps=True),
    [printBatch ,replay_buffer.add_batch],
    
    max_episodes=num_episodes)
  initial_time_step = environment.reset()
  driver.run(initial_time_step)

collect_episode(te, tf_agent.collect_policy, collect_episodes_per_iteration)

And the specs from the CustomEnv:

class CustomEnv(py_environment.PyEnvironment):

  names = ["Observation_1", "Observation_2", "Observation_3", "Observation_4"]
  def __init__(self,dp=None ):
    self._action_spec = BoundedArraySpec(
        shape=([3]), dtype=np.float32, minimum=1.0, maximum=10.0, name='action')

    #define shape dynamically, e.g. if hours 0 stay zero if >0 a value of 1 must be added because
    self._observation_spec = {}
    for name in TradingEnv.names:
      self._observation_spec[name] = BoundedArraySpec(
          shape=[dp.getObservationShape()[name]], dtype=np.float64, minimum=0.0, name=name)

1

There are 1 answers

0
Chris_S On

Solution:

replay_buffer = tf_uniform_replay_buffer.TFUniformReplayBuffer(
    data_spec= tf_agent.collect_data_spec,
    batch_size=train_env.batch_size,
    max_length=replay_buffer_capacity)

The issue was the line:

data_spec= data_spec

Mixed my own let's say pre-converted definition with a TF component that was expecting an converted definition.