I have a custom environment and the following standard code for training a PPO model
if __name__ == "__main__":
torch.set_num_threads(1)
WANDB_NOTEBOOK_NAME = 'name' #CHANGE
WANDB_API_KEY= 'code' #CHANGE
vec_env = CustomOfflineEnv(data="data.csv")
verbosity=2
config = {
"policy_type": "MlpPolicy",
"total_timesteps": 66000
}
name='66000-ppo'
run = wandb.init(
project="projectName",
config=config,
sync_tensorboard=True,
monitor_gym=True,
name=name
)
model = PPO(policy=config["policy_type"], env=vec_env,
batch_size=256,
verbose=verbosity,
tensorboard_log=f"wandb/runs/{name}{run.id}",
)
model.learn(
total_timesteps=config["total_timesteps"],
callback=[WandbCallback(
gradient_save_freq=100,
model_save_freq=100,
model_save_path=f"../../training/models/ppo/{name}{run.id}",
verbose=verbosity),]
)
run.finish()
When I open my wandb dashboard, I only see the loss function related plots: I would like to see the reward of the model to understand how the model performs but I do not find any useful material online.