Hi and thank you in advance,
I'm trying to get to run a custom environment in tf-agents and getting the following
TypeError: The two structures do not match:
Trajectory(
{'action': .,
'discount': .,
'next_step_type': .,
'observation': {'Observation_1': .,
'Observation_2': .,
'Observation_3': .,
'Observation_4': .},
'policy_info': (),
'reward': .,
'step_type': .})
vs.
TimeStep(
{'discount': .,
'observation': {'Observation_1': .,
'Observation_2': .,
'Observation_3': .,
'Observation_4': .},
'reward': .,
'step_type': .})
Values:
Trajectory(
{'action': array([7.5856867, 5.9586177, 4.926343 ], dtype=float32),
'discount': array(0.9, dtype=float32),
'next_step_type': array(1),
'observation': {'Observation_1': array([...]),
'Observation_2': array([...1]),
'Observation_3': array([...]),
'Observation_4': array([...])},
'policy_info': (),
'reward': array(0., dtype=float32),
'step_type': array(0)})
vs.
TimeStep(
{'discount': BoundedTensorSpec(shape=(), dtype=tf.float32, name='discount', minimum=array(0., dtype=float32), maximum=array(1., dtype=float32)),
'observation': {'Observation_1': BoundedTensorSpec(shape=(48,), dtype=tf.float64, name='Observation_1', minimum=array(0.), maximum=array(1.79769313e+308)),
'Observation_2': BoundedTensorSpec(shape=(48,), dtype=tf.float64, name='Observation_2', minimum=array(0.), maximum=array(1.79769313e+308)),
'Observation_3': BoundedTensorSpec(shape=(21,), dtype=tf.float64, name='Observation_3', minimum=array(0.), maximum=array(1.79769313e+308)),
'Observation_4': BoundedTensorSpec(shape=(20,), dtype=tf.float64, name='Observation_4', minimum=array(0.), maximum=array(1.79769313e+308))},
'reward': TensorSpec(shape=(), dtype=tf.float32, name='reward'),
'step_type': TensorSpec(shape=(), dtype=tf.int32, name='step_type')}).
The only difference I could figure out is that my observations contain the values next_step_type
and policy_info
whicht are not in my time_step_spec.
I'm not sure where those values are generated.
Because the code is quite a lot, I ommited some of it please let me know what code is necessary to help solving the issue.
action_spec=tf_env.action_spec()
input_tensor_spec=tf_env.observation_spec()
action_net=ActionNet(input_tensor_spec, action_spec, preprocessing_layers=preprocessing_layers, preprocessing_combiner=preprocessing_combiner)
data_spec = tf_env.time_step_spec()
batch_size = 32
max_length = 1000
replay_buffer = tf_uniform_replay_buffer.TFUniformReplayBuffer(
data_spec,
batch_size=batch_size,
max_length=max_length)
train_env = tf_env
eval_env = tf_py_environment.TFPyEnvironment(CustomEnv(dp=dp))
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
train_step_counter = tf.Variable(0)
def printBatch(items):
for item in items:
print(item)
tf_agent = reinforce_agent.ReinforceAgent(
train_env.time_step_spec(),
train_env.action_spec(),
actor_network=action_net,
optimizer=optimizer,
normalize_returns=True,
train_step_counter=train_step_counter)
tf_agent.initialize()
class ActionNet(network.Network):
def __init__(self,observation_spec,
action_spec,
preprocessing_layers=None,
preprocessing_combiner=None,
conv_layer_params=None,
fc_layer_params=None,
dropout_layer_params=None,
activation_fn=tf.keras.activations.relu,
enable_last_layer_zero_initializer=False,
name='ActorNetwork'):
super(ActionNet, self).__init__(
input_tensor_spec=observation_spec,
state_spec=(),
name='ActionNet')
kernel_initializer = tf.keras.initializers.VarianceScaling(
scale=1. / 3., mode='fan_in', distribution='uniform')
self._encoder = encoding_network.EncodingNetwork(
observation_spec,
preprocessing_layers=preprocessing_layers,
preprocessing_combiner=preprocessing_combiner,
conv_layer_params=conv_layer_params,
fc_layer_params=fc_layer_params,
dropout_layer_params=dropout_layer_params,
activation_fn=activation_fn,
kernel_initializer=kernel_initializer,
batch_squash=False)
self._output_tensor_spec = action_spec
self._model = model
def call(self, observations, step_type, network_state):
del step_type
output=observations
modelInput=[]
for name in names:
if len((output[name].shape))==3:
print("len 3")
modelInput.append(output[name][0])
else:
modelInput.append(output[name])
output = model( modelInput)
actions = output
# Scale and shift actions to the correct range if necessary.
return actions, network_state
def collect_episode(environment, policy, num_episodes):
driver = py_driver.PyDriver(
environment,
py_tf_eager_policy.PyTFEagerPolicy(
policy, use_tf_function=True, batch_time_steps=True),
[printBatch ,replay_buffer.add_batch],
max_episodes=num_episodes)
initial_time_step = environment.reset()
driver.run(initial_time_step)
collect_episode(te, tf_agent.collect_policy, collect_episodes_per_iteration)
And the specs from the CustomEnv:
class CustomEnv(py_environment.PyEnvironment):
names = ["Observation_1", "Observation_2", "Observation_3", "Observation_4"]
def __init__(self,dp=None ):
self._action_spec = BoundedArraySpec(
shape=([3]), dtype=np.float32, minimum=1.0, maximum=10.0, name='action')
#define shape dynamically, e.g. if hours 0 stay zero if >0 a value of 1 must be added because
self._observation_spec = {}
for name in TradingEnv.names:
self._observation_spec[name] = BoundedArraySpec(
shape=[dp.getObservationShape()[name]], dtype=np.float64, minimum=0.0, name=name)
Solution:
The issue was the line:
Mixed my own let's say pre-converted definition with a TF component that was expecting an converted definition.