Mujoco environment to gymnasium environment

791 views Asked by At

The problem I am facing is that when I am training my agent using PPO, the environment doesn't render using Pygame, but when I manually step through the environment using random actions, the rendering works fine. Here is my code for the environment,

import gymnasium as gym
from gymnasium import spaces
import numpy as np
import mujoco
import glfw
import cv2
import pygame

xml = """  
  -->

  <mujoco model="Humanoid">
    <option timestep="0.005"/>

    <visual>
      <map force="0.1" zfar="30"/>
      <rgba haze="0.15 0.25 0.35 1"/>
      <global offwidth="2560" offheight="1440" elevation="-20" azimuth="120"/>
    </visual>

    <statistic center="0 0 0.7"/>

    <asset>
      <texture type="skybox" builtin="gradient" rgb1=".3 .5 .7" rgb2="0 0 0" width="32" height="512"/>
      <texture name="body" type="cube" builtin="flat" mark="cross" width="128" height="128" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01"/>
      <material name="body" texture="body" texuniform="true" rgba="0.8 0.6 .4 1"/>
      <texture name="grid" type="2d" builtin="checker" width="512" height="512" rgb1=".1 .2 .3" rgb2=".2 .3 .4"/>
      <material name="grid" texture="grid" texrepeat="1 1" texuniform="true" reflectance=".2"/>
    </asset>

    <default>
      <motor ctrlrange="-1 1" ctrllimited="true"/>
      <default class="body">

        <!-- geoms -->
        <geom type="capsule" condim="1" friction=".7" solimp=".9 .99 .003" solref=".015 1" material="body"/>
        <default class="thigh">
          <geom size=".06"/>
        </default>
        <default class="shin">
          <geom fromto="0 0 0 0 0 -.3"  size=".049"/>
        </default>
        <default class="foot">
          <geom size=".027"/>
          <default class="foot1">
            <geom fromto="-.07 -.01 0 .14 -.03 0"/>
          </default>
          <default class="foot2">
            <geom fromto="-.07 .01 0 .14  .03 0"/>
          </default>
        </default>
        <default class="arm_upper">
          <geom size=".04"/>
        </default>
        <default class="arm_lower">
          <geom size=".031"/>
        </default>
        <default class="hand">
          <geom type="sphere" size=".04"/>
        </default>

        <!-- joints -->
        <joint type="hinge" damping=".2" stiffness="1" armature=".01" limited="true" solimplimit="0 .99 .01"/>
        <default class="joint_big">
          <joint damping="5" stiffness="10"/>
          <default class="hip_x">
            <joint range="-30 10"/>
          </default>
          <default class="hip_z">
            <joint range="-60 35"/>
          </default>
          <default class="hip_y">
            <joint axis="0 1 0" range="-150 20"/>
          </default>
          <default class="joint_big_stiff">
            <joint stiffness="20"/>
          </default>
        </default>
        <default class="knee">
          <joint pos="0 0 .02" axis="0 -1 0" range="-160 2"/>
        </default>
        <default class="ankle">
          <joint range="-50 50"/>
          <default class="ankle_y">
            <joint pos="0 0 .08" axis="0 1 0" stiffness="6"/>
          </default>
          <default class="ankle_x">
            <joint pos="0 0 .04" stiffness="3"/>
          </default>
        </default>
        <default class="shoulder">
          <joint range="-85 60"/>
        </default>
        <default class="elbow">
          <joint range="-100 50" stiffness="0"/>
        </default>
      </default>
    </default>

    <worldbody>
      <geom name="floor" size="0 0 .05" type="plane" material="grid" condim="3"/>
      <light name="spotlight" mode="targetbodycom" target="torso" diffuse=".8 .8 .8" specular="0.3 0.3 0.3" pos="0 -6 4" cutoff="30"/>
      <body name="torso" pos="0 0 1.282" childclass="body">
        <light name="top" pos="0 0 2" mode="trackcom"/>
        <camera name="back" pos="-3 0 1" xyaxes="0 -1 0 1 0 2" mode="trackcom"/>
        <camera name="side" pos="0 -3 1" xyaxes="1 0 0 0 1 2" mode="trackcom"/>
        <freejoint name="root"/>
        <geom name="torso" fromto="0 -.08 0 0 .08 0" size=".08"/>
        <geom name="waist_upper" fromto="-.01 -.06 -.12 -.01 .06 -.12" size=".06"/>
        <body name="head" pos="0 0 .19">
          <geom name="head" type="sphere" size=".09"/>
          <camera name="egocentric" pos=".09 0 0" xyaxes="0 -1 0 .1 0 1" fovy="80"/>
        </body>
        <body name="waist_lower" pos="-.01 0 -.26">
          <geom name="waist_lower" fromto="0 -.06 0 0 .06 0" size=".06"/>
          <joint name="abdomen_z" pos="0 0 .065" axis="0 0 1" range="-45 45" class="joint_big_stiff"/>
          <joint name="abdomen_y" pos="0 0 .065" axis="0 1 0" range="-75 30" class="joint_big"/>
          <body name="pelvis" pos="0 0 -.165">
            <joint name="abdomen_x" pos="0 0 .1" axis="1 0 0" range="-35 35" class="joint_big"/>
            <geom name="butt" fromto="-.02 -.07 0 -.02 .07 0" size=".09"/>
            <body name="thigh_right" pos="0 -.1 -.04">
              <joint name="hip_x_right" axis="1 0 0" class="hip_x"/>
              <joint name="hip_z_right" axis="0 0 1" class="hip_z"/>
              <joint name="hip_y_right" class="hip_y"/>
              <geom name="thigh_right" fromto="0 0 0 0 .01 -.44" class="thigh"/>
              <body name="shin_right" pos="0 .01 -.4">
                <joint name="knee_right" class="knee"/>
                <geom name="shin_right" class="shin"/>
                <body name="foot_right" pos="0 0 -.39">
                  <joint name="ankle_y_right" class="ankle_y"/>
                  <joint name="ankle_x_right" class="ankle_x" axis="1 0 .5"/>
                  <geom name="foot1_right" class="foot1"/>
                  <geom name="foot2_right" class="foot2"/>
                </body>
              </body>
            </body>
            <body name="thigh_left" pos="0 .1 -.04">
              <joint name="hip_x_left" axis="-1 0 0" class="hip_x"/>
              <joint name="hip_z_left" axis="0 0 -1" class="hip_z"/>
              <joint name="hip_y_left" class="hip_y"/>
              <geom name="thigh_left" fromto="0 0 0 0 -.01 -.44" class="thigh"/>
              <body name="shin_left" pos="0 -.01 -.4">
                <joint name="knee_left" class="knee"/>
                <geom name="shin_left" fromto="0 0 0 0 0 -.3" class="shin"/>
                <body name="foot_left" pos="0 0 -.39">
                  <joint name="ankle_y_left" class="ankle_y"/>
                  <joint name="ankle_x_left" class="ankle_x" axis="-1 0 -.5"/>
                  <geom name="foot1_left" class="foot1"/>
                  <geom name="foot2_left" class="foot2"/>
                </body>
              </body>
            </body>
          </body>
        </body>
        <body name="right_upper_arm" pos="0 -.17 .06">
          <joint name="right_shoulder1" axis="2 1 1"  class="shoulder"/>
          <joint name="right_shoulder2" axis="0 -1 1" class="shoulder"/>
          <geom name="right_upper_arm" fromto="0 0 0 .16 -.16 -.16" class="arm_upper"/>
          <body name="right_lower_arm" pos=".18 -.18 -.18">
            <joint name="right_elbow" axis="0 -1 1" class="elbow"/>
            <geom name="right_lower_arm" fromto=".01 .01 .01 .17 .17 .17" class="arm_lower"/>
            <body name="right_hand" pos=".18 .18 .18">
              <geom name="right_hand" zaxis="1 1 1" class="hand" size=".09"/>
            </body>
          </body>
        </body>
        <body name="left_upper_arm" pos="0 .17 .06">
          <joint name="left_shoulder1" axis="-2 1 -1" class="shoulder"/>
          <joint name="left_shoulder2" axis="0 -1 -1"  class="shoulder"/>
          <geom name="left_upper_arm" fromto="0 0 0 .16 .16 -.16" class="arm_upper"/>
          <body name="left_lower_arm" pos=".18 .18 -.18">
            <joint name="left_elbow" axis="0 -1 -1" class="elbow"/>
            <geom name="left_lower_arm" fromto=".01 -.01 .01 .17 -.17 .17" class="arm_lower"/>
            <body name="left_hand" pos=".18 -.18 .18">
              <geom name="left_hand" zaxis="1 -1 1" class="hand" size=".09"/>       
            </body>
          </body>
        </body>
      </body>

        <geom name="ring_floor" type="box" size="3 3 0.1" pos="0 0 -0.05" rgba="0.5 0.5 0.5 1"/>

        <!-- Front ropes -->
        <geom name="front_rope1" type="capsule" fromto="-3 3 0.5 3 3 0.5" size="0.05" rgba="1 1 1 1"/>
        <geom name="front_rope2" type="capsule" fromto="-3 3 1 3 3 1" size="0.05" rgba="1 1 1 1"/>
        <geom name="front_rope3" type="capsule" fromto="-3 3 1.5 3 3 1.5" size="0.05" rgba="1 1 1 1"/>

        <!-- Back ropes -->
        <geom name="back_rope1" type="capsule" fromto="-3 -3 0.5 3 -3 0.5" size="0.05" rgba="1 1 1 1"/>
        <geom name="back_rope2" type="capsule" fromto="-3 -3 1 3 -3 1" size="0.05" rgba="1 1 1 1"/>
        <geom name="back_rope3" type="capsule" fromto="-3 -3 1.5 3 -3 1.5" size="0.05" rgba="1 1 1 1"/>

        <!-- Left ropes -->
        <geom name="left_rope1" type="capsule" fromto="-3 -3 0.5 -3 3 0.5" size="0.05" rgba="1 1 1 1"/>
        <geom name="left_rope2" type="capsule" fromto="-3 -3 1 -3 3 1" size="0.05" rgba="1 1 1 1"/>
        <geom name="left_rope3" type="capsule" fromto="-3 -3 1.5 -3 3 1.5" size="0.05" rgba="1 1 1 1"/>

        <!-- Right ropes -->
        <geom name="right_rope1" type="capsule" fromto="3 -3 0.5 3 3 0.5" size="0.05" rgba="1 1 1 1"/>
        <geom name="right_rope2" type="capsule" fromto="3 -3 1 3 3 1" size="0.05" rgba="1 1 1 1"/>
        <geom name="right_rope3" type="capsule" fromto="3 -3 1.5 3 3 1.5" size="0.05" rgba="1 1 1 1"/>


        <geom name="corner1" type="cylinder" size="0.1 1" pos="3 3 1" rgba="0 0 0 1"/>
        <geom name="corner2" type="cylinder" size="0.1 1" pos="-3 3 1" rgba="0 0 0 1"/>
        <geom name="corner3" type="cylinder" size="0.1 1" pos="3 -3 1" rgba="0 0 0 1"/>
        <geom name="corner4" type="cylinder" size="0.1 1" pos="-3 -3 1" rgba="0 0 0 1"/>

    </worldbody>

    <contact>
      <exclude body1="waist_lower" body2="thigh_right"/>
      <exclude body1="waist_lower" body2="thigh_left"/>
    </contact>

    <tendon>
      <fixed name="hamstring_right" limited="true" range="-0.3 2">
        <joint joint="hip_y_right" coef=".5"/>
        <joint joint="knee_right" coef="-.5"/>
      </fixed>
      <fixed name="hamstring_left" limited="true" range="-0.3 2">
        <joint joint="hip_y_left" coef=".5"/>
        <joint joint="knee_left" coef="-.5"/>
      </fixed>
    </tendon>

    <actuator>
      <motor name="abdomen_y"       gear="40"  joint="abdomen_y"/>
      <motor name="abdomen_z"       gear="40"  joint="abdomen_z"/>
      <motor name="abdomen_x"       gear="40"  joint="abdomen_x"/>
      <motor name="hip_x_right"     gear="40"  joint="hip_x_right"/>
      <motor name="hip_z_right"     gear="40"  joint="hip_z_right"/>
      <motor name="hip_y_right"     gear="120" joint="hip_y_right"/>
      <motor name="knee_right"      gear="80"  joint="knee_right"/>
      <motor name="ankle_x_right"   gear="20"  joint="ankle_x_right"/>
      <motor name="ankle_y_right"   gear="20"  joint="ankle_y_right"/>
      <motor name="hip_x_left"      gear="40"  joint="hip_x_left"/>
      <motor name="hip_z_left"      gear="40"  joint="hip_z_left"/>
      <motor name="hip_y_left"      gear="120" joint="hip_y_left"/>
      <motor name="knee_left"       gear="80"  joint="knee_left"/>
      <motor name="ankle_x_left"    gear="20"  joint="ankle_x_left"/>
      <motor name="ankle_y_left"    gear="20"  joint="ankle_y_left"/>
      <motor name="right_shoulder1" gear="20"  joint="right_shoulder1"/>
      <motor name="right_shoulder2" gear="20"  joint="right_shoulder2"/>
      <motor name="right_elbow"     gear="40"  joint="right_elbow"/>
      <motor name="left_shoulder1"  gear="20"  joint="left_shoulder1"/> 
      <motor name="left_shoulder2"  gear="20"  joint="left_shoulder2"/>
      <motor name="left_elbow"      gear="40"  joint="left_elbow"/>
    </actuator>

    <keyframe>
      <!--
      The values below are split into rows for readibility:
        torso position
        torso orientation
        spinal
        right leg
        left leg
        arms
      -->
      <key name="squat" qpos="0 0 0.596
                              0.988015 0 0.154359 0
                              0 0.4 0
                              -0.25 -0.5 -2.5 -2.65 -0.8 0.56
                              -0.25 -0.5 -2.5 -2.65 -0.8 0.56
                              0 0 0 0 0 0"/>
      <key name="stand_on_left_leg" qpos="0 0 1.21948
                                          0.971588 -0.179973 0.135318 -0.0729076
                                          -0.0516 -0.202 0.23
                                          -0.24 -0.007 -0.34 -1.76 -0.466 -0.0415
                                          -0.08 -0.01 -0.37 -0.685 -0.35 -0.09
                                          0.109 -0.067 -0.7 -0.05 0.12 0.16"/>
    </keyframe>
  </mujoco>
"""

target_stance = [0, 0, 1.2, 
                0.5, 0, 0, 0,   # torso orientation
                0, 0, 0,        # spine
                -0.5, 0.2, -1.5, -1.7, -0.5, 0.3,   # right leg
                0.5, -0.2, -1.5, -1.7, -0.5, -0.3,  # left leg 
                0.1, -0.1, -0.7, -0.1, 0.2, 0.3]    # arms

class BoxingEnv(gym.Env):
    def __init__(self):
        # Load the MuJoCo model from XML string
        self.model = mujoco.MjModel.from_xml_string(xml)
        self.sim = mujoco.MjData(self.model)

        # Define action and observation spaces
        num_actuators = self.model.nu
        num_observations = self.model.nq  # This is just an example; adjust as needed
        self.action_space = spaces.Box(low=-1, high=1, shape=(num_actuators,), dtype=np.float32)
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(num_observations,), dtype=np.float32)

        self.renderer = mujoco.Renderer(model = self.model, height=720, width=1366)
        self.done = False
        self.fallen = bool(self.sim.qpos[2] < 0.3)
        self.timestep = 0

        num_observations = len(self._get_observation())
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(num_observations,), dtype=np.float32)

        pygame.init()
        self.screen = pygame.display.set_mode((1366, 720))  # Adjust the resolution as needed


    def step(self, action):
        self.timestep += 1

        # Apply the action to the environment
        self.sim.ctrl[:] = action
        mujoco.mj_step(self.model, self.sim)

        # Get the observation, reward, done, and info
        observation = self._get_observation()
        reward = self._get_reward()
        done = self._get_done()
        self.done = done
        trunctuated = False
        info = {}

        return observation, reward, done, trunctuated, info

    def reset(self, seed = None, **kwargs):
        # Set initial conditions
        init_qpos = [0, 0, 1.2, 0, 0, 0, 0, ...] 
        init_qvel = [0, 0, 0, 0, 0, 0, 0, ...]

        # Reset MuJoCo
        self.sim = mujoco.MjData(self.model)
        mujoco.mj_forward(self.model, self.sim)

        # Get observation 
        obs = self._get_observation()
        mujoco.mj_forward(self.model, self.sim)

        reset_info = {}  # This can be populated with any reset-specific info if needed
        return obs, reset_info


    def render(self, mode='human'):
        self.renderer.update_scene(self.sim)
        
        img = self.renderer.render()
        pygame_img = pygame.surfarray.make_surface(np.transpose(img, (1, 0, 2)))
        self.screen.blit(pygame_img, (0, 0))
        pygame.display.flip()

        if self.timestep >= 20 * 60: # 20 secs at 60 fps    
            obs = self.reset()
            self.timestep = 0

        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                pygame.quit()
                exit()


    def _get_observation(self):
      # Joint positions
      qpos = self.sim.qpos
      
      # Joint velocities
      qvel = self.sim.qvel

      contact_forces = np.sum(np.sqrt(np.sum(np.square(np.array(self.sim.cfrc_ext)), axis=1)))
      
      # Concatenate and return as a single observation vector
      observation = np.concatenate([qpos, qvel, [contact_forces]])
      
      return observation


    def _get_reward(self):
        stance_diff = np.linalg.norm(self.sim.qpos - target_stance)
        if self.fallen:
            return -10
        else: 
            return 0
        return -stance_diff

    def _get_done(self):
        if self.fallen:
            return True
        else:
            return False
    
    def sample_random_action(self):

      # Get action bounds
      action_low = self.action_space.low
      action_high = self.action_space.high
      
      # Sample random action as numpy array
      action = np.random.uniform(low=action_low, high=action_high)
      
      return action

class BoxingEnvWrapper(gym.Wrapper):

  def __init__(self, env):
    super().__init__(env)
  
  def reset(self, seed=None, **kwargs):
    obs = self.env.reset()
    return obs
  
  def step(self, action):
    obs, reward, done, trunctuated, info = self.env.step(action)
    
    # Calculate stance reward
    stance_diff = np.linalg.norm(self.env.sim.qpos - target_stance)
    reward = -stance_diff
    
    return obs, reward, trunctuated, done, info
  
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv

env = BoxingEnvWrapper(BoxingEnv())
from gymnasium.envs.registration import register

register(
    id='BoxingEnv-v2',
    entry_point='final_boxing:BoxingEnv', 
)


from stable_baselines3.common.vec_env import VecEnvWrapper

    
env = DummyVecEnv([lambda: BoxingEnvWrapper(BoxingEnv())])

model = PPO("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=10000)

  


I tried using other forms of rendering such as opencv but it doesnt even render at all unless I use pygame. I expect the code to render the mujoco environment while training so I can check in on the progress but the code just displays a black screen.

1

There are 1 answers

0
Luiz Afonso Marão On

You are rendering in human mode. As your env is a mujocoEnv type, this rendering mode should raise a mujoco rendering window. If you want an image to use as source for your pygame object, you should render the mujocoEnv using rgb_array mode, which will return you the environment's camera image in RGB format.