testGymDoublePendulumEnv.py

You can view and download this file on Github: testGymDoublePendulumEnv.py
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# This is an EXUDYN example
#
# Details:  This file serves as an input to testGymDoublePendulum.py
#
# Author:   Johannes Gerstmayr
# Date:     2022-05-18
# Update:   2023-05-20: derive from gym.Env to ensure compatibility with newer stable-baselines3
#
# Copyright:This file is part of Exudyn. Exudyn is free software. You can redistribute it and/or modify it under the terms of the Exudyn license. See 'LICENSE.txt' for more details.
#
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

import exudyn as exu
from exudyn.utilities import *
import math

import math
from typing import Optional, Union

import numpy as np

import gym
from gym import logger, spaces, Env
from gym.error import DependencyNotInstalled


class DoublePendulumEnv(Env):

    #metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 50}
    metadata = {"render_modes": ["human"], "render_fps": 50}

    def __init__(self, thresholdFactor = 1.):

        self.SC = exu.SystemContainer()
        self.mbs = self.SC.AddSystem()


        #%%+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
        #+++++++++++++++++++++++++++++++++++++++++++++++++++++
        self.gravity = 9.81
        self.length = 1.
        self.masscart = 1.
        self.massarm = 0.1
        self.total_mass = self.massarm + self.masscart
        self.armInertia = self.length**2*0.5*self.massarm
        self.force_mag = 10.0
        self.stepUpdateTime = 0.02  # seconds between state updates

        # Angle at which to fail the episode
        self.theta_threshold_radians = thresholdFactor* 12 * 2 * math.pi / 360
        self.x_threshold = thresholdFactor*2.4

        high = np.array(
            [
                self.x_threshold * 2,
                np.finfo(np.float32).max,
                self.theta_threshold_radians * 2,
                np.finfo(np.float32).max,
                self.theta_threshold_radians * 2,
                np.finfo(np.float32).max,
            ],
            dtype=np.float32,
        )

        #+++++++++++++++++++++++++++++++++++++++++++++++++++++
        #see https://github.com/openai/gym/blob/64b4b31d8245f6972b3d37270faf69b74908a67d/gym/core.py#L16
        #for Env:
        self.action_space = spaces.Discrete(2)
        self.observation_space = spaces.Box(-high, high, dtype=np.float32)
        #+++++++++++++++++++++++++++++++++++++++++++++++++++++
        self.state = None
        self.rendererRunning=None
        self.useRenderer = False #turn this on if needed

        background = GraphicsDataCheckerBoard(point= [0,0,0], normal= [0,0,1], size=4)

        oGround=self.mbs.AddObject(ObjectGround(referencePosition= [0,0,0],  #x-pos,y-pos,angle
                                           visualization=VObjectGround(graphicsData= [background])))
        nGround=self.mbs.AddNode(NodePointGround())

        gCart = GraphicsDataOrthoCubePoint(size=[0.5*self.length, 0.1*self.length, 0.1*self.length],
                                           color=color4dodgerblue)
        self.nCart = self.mbs.AddNode(Rigid2D(referenceCoordinates=[0,0,0]));
        oCart = self.mbs.AddObject(RigidBody2D(physicsMass=self.masscart,
                                          physicsInertia=0.1*self.masscart, #not needed
                                          nodeNumber=self.nCart,
                                          visualization=VObjectRigidBody2D(graphicsData= [gCart])))
        mCartCOM = self.mbs.AddMarker(MarkerNodePosition(nodeNumber=self.nCart))

        gArm1 = GraphicsDataOrthoCubePoint(size=[0.1*self.length, self.length, 0.1*self.length], color=color4red)
        self.nArm1 = self.mbs.AddNode(Rigid2D(referenceCoordinates=[0,0.5*self.length,0]));
        oArm1 = self.mbs.AddObject(RigidBody2D(physicsMass=self.massarm,
                                          physicsInertia=self.armInertia, #not included in original paper
                                          nodeNumber=self.nArm1,
                                          visualization=VObjectRigidBody2D(graphicsData= [gArm1])))

        mArm1COM = self.mbs.AddMarker(MarkerNodePosition(nodeNumber=self.nArm1))
        mArm1JointA = self.mbs.AddMarker(MarkerBodyPosition(bodyNumber=oArm1, localPosition=[0,-0.5*self.length,0]))
        mArm1JointB = self.mbs.AddMarker(MarkerBodyPosition(bodyNumber=oArm1, localPosition=[0, 0.5*self.length,0]))

        gArm2 = GraphicsDataOrthoCubePoint(size=[0.1*self.length, self.length, 0.1*self.length], color=color4red)
        self.nArm2 = self.mbs.AddNode(Rigid2D(referenceCoordinates=[0,1.5*self.length,0]));
        oArm2 = self.mbs.AddObject(RigidBody2D(physicsMass=self.massarm,
                                          physicsInertia=self.armInertia, #not included in original paper
                                          nodeNumber=self.nArm2,
                                          visualization=VObjectRigidBody2D(graphicsData= [gArm2])))

        mArm2COM = self.mbs.AddMarker(MarkerNodePosition(nodeNumber=self.nArm2))
        mArm2Joint = self.mbs.AddMarker(MarkerBodyPosition(bodyNumber=oArm2, localPosition=[0,-0.5*self.length,0]))

        mCartCoordX = self.mbs.AddMarker(MarkerNodeCoordinate(nodeNumber=self.nCart, coordinate=0))
        mCartCoordY = self.mbs.AddMarker(MarkerNodeCoordinate(nodeNumber=self.nCart, coordinate=1))
        mGroundNode = self.mbs.AddMarker(MarkerNodeCoordinate(nodeNumber=nGround, coordinate=0))

        #gravity
        self.mbs.AddLoad(Force(markerNumber=mCartCOM, loadVector=[0,-self.masscart*self.gravity,0]))
        self.mbs.AddLoad(Force(markerNumber=mArm1COM, loadVector=[0,-self.massarm*self.gravity,0]))
        self.mbs.AddLoad(Force(markerNumber=mArm2COM, loadVector=[0,-self.massarm*self.gravity,0]))

        #control force
        self.lControl = self.mbs.AddLoad(LoadCoordinate(markerNumber=mCartCoordX, load=1.))

        #joints and constraints:
        self.mbs.AddObject(RevoluteJoint2D(markerNumbers=[mCartCOM, mArm1JointA]))
        self.mbs.AddObject(RevoluteJoint2D(markerNumbers=[mArm1JointB, mArm2Joint]))
        self.mbs.AddObject(CoordinateConstraint(markerNumbers=[mCartCoordY, mGroundNode]))




        #%%++++++++++++++++++++++++
        self.mbs.Assemble() #computes initial vector

        self.simulationSettings = exu.SimulationSettings() #takes currently set values or default values


        self.simulationSettings.timeIntegration.numberOfSteps = 1
        self.simulationSettings.timeIntegration.endTime = 0 #will be overwritten in step
        self.simulationSettings.timeIntegration.verboseMode = 0
        self.simulationSettings.solutionSettings.writeSolutionToFile = False
        #self.simulationSettings.timeIntegration.simulateInRealtime = True

        self.simulationSettings.timeIntegration.newton.useModifiedNewton = True

        self.SC.visualizationSettings.general.drawWorldBasis=True
        self.SC.visualizationSettings.general.graphicsUpdateInterval = 0.01 #50Hz

        self.simulationSettings.solutionSettings.solutionInformation = "Open AI gym"

        self.dynamicSolver = exudyn.MainSolverImplicitSecondOrder()
        self.dynamicSolver.InitializeSolver(self.mbs, self.simulationSettings)
        self.dynamicSolver.SolveSteps(self.mbs, self.simulationSettings) #to initialize all data

    def integrateStep(self, force):
        #exudyn simulation part
        #index 2 solver
        self.mbs.SetLoadParameter(self.lControl, 'load', force)

        #progress integration time
        currentTime = self.simulationSettings.timeIntegration.endTime
        self.simulationSettings.timeIntegration.startTime = currentTime
        self.simulationSettings.timeIntegration.endTime = currentTime+self.stepUpdateTime

        # exu.SolveDynamic(self.mbs, self.simulationSettings, solverType=exu.DynamicSolverType.TrapezoidalIndex2,
        #                  updateInitialValues=True) #use final value as new initial values
        self.dynamicSolver.InitializeSolverInitialConditions(self.mbs, self.simulationSettings)
        self.dynamicSolver.SolveSteps(self.mbs, self.simulationSettings)
        currentState = self.mbs.systemData.GetSystemState() #get current values
        self.mbs.systemData.SetSystemState(systemStateList=currentState,
                                        configuration = exu.ConfigurationType.Initial)
        self.mbs.systemData.SetODE2Coordinates_tt(coordinates = self.mbs.systemData.GetODE2Coordinates_tt(),
                                                configuration = exu.ConfigurationType.Initial)



    def step(self, action):
        err_msg = f"{action!r} ({type(action)}) invalid"
        assert self.action_space.contains(action), err_msg
        assert self.state is not None, "Call reset before using step method."
        self.setState2InitialValues()

        force = self.force_mag if action == 1 else -self.force_mag

        #++++++++++++++++++++++++++++++++++++++++++++++++++
        #++++++++++++++++++++++++++++++++++++++++++++++++++
        self.integrateStep(force)
        #+++++++++++++++++++++++++
        #compute some output:
        cartPosX = self.mbs.GetNodeOutput(self.nCart, variableType=exu.OutputVariableType.Coordinates)[0]
        arm1Angle = self.mbs.GetNodeOutput(self.nArm1, variableType=exu.OutputVariableType.Coordinates)[2]
        arm2Angle = self.mbs.GetNodeOutput(self.nArm2, variableType=exu.OutputVariableType.Coordinates)[2]
        cartPosX_t = self.mbs.GetNodeOutput(self.nCart, variableType=exu.OutputVariableType.Coordinates_t)[0]
        arm1Angle_t = self.mbs.GetNodeOutput(self.nArm1, variableType=exu.OutputVariableType.Coordinates_t)[2]
        arm2Angle_t = self.mbs.GetNodeOutput(self.nArm2, variableType=exu.OutputVariableType.Coordinates_t)[2]

        #finally write updated state:
        self.state = (cartPosX, cartPosX_t, arm1Angle, arm1Angle_t, arm2Angle, arm2Angle_t)
        #++++++++++++++++++++++++++++++++++++++++++++++++++
        #++++++++++++++++++++++++++++++++++++++++++++++++++

        done = bool(
            cartPosX < -self.x_threshold
            or cartPosX > self.x_threshold
            or arm1Angle < -self.theta_threshold_radians
            or arm1Angle > self.theta_threshold_radians
            or arm2Angle < -self.theta_threshold_radians
            or arm2Angle > self.theta_threshold_radians
        )

        if not done:
            reward = 1.0
        elif self.steps_beyond_done is None:
            # Arm1 just fell!
            self.steps_beyond_done = 0
            reward = 1.0
        else:
            if self.steps_beyond_done == 0:
                logger.warn(
                    "You are calling 'step()' even though this "
                    "environment has already returned done = True. You "
                    "should always call 'reset()' once you receive 'done = "
                    "True' -- any further steps are undefined behavior."
                )
            self.steps_beyond_done += 1
            reward = 0.0

        return np.array(self.state, dtype=np.float32), reward, done, {}

    def setState2InitialValues(self):
        #+++++++++++++++++++++++++++++++++++++++++++++
        #set specific initial state:
        (xCart, xCart_t, phiArm1, phiArm1_t, phiArm2, phiArm2_t) = self.state

        initialValues = np.zeros(9) #model has 3*3 = 9  redundant states
        initialValues_t = np.zeros(9)

        #build redundant cordinates from self.state
        initialValues[0] = xCart
        initialValues[3+0] = xCart - 0.5*self.length * sin(phiArm1)
        initialValues[3+1] = 0.5*self.length * (cos(phiArm1)-1)
        initialValues[3+2] = phiArm1
        initialValues[6+0] = xCart - 1.*self.length * sin(phiArm1) - 0.5*self.length * sin(phiArm2)
        initialValues[6+1] = self.length * cos(phiArm1) + 0.5*self.length * cos(phiArm2) - 1.5*self.length
        initialValues[6+2] = phiArm2

        initialValues_t[0] = xCart_t
        initialValues_t[3+0] = xCart_t - phiArm1_t*0.5*self.length * cos(phiArm1)
        initialValues_t[3+1] = -0.5*self.length * sin(phiArm1)  * phiArm1_t
        initialValues_t[3+2] = phiArm1_t
        initialValues_t[6+0] = xCart_t - phiArm1_t*1.*self.length * cos(phiArm1) - phiArm2_t*0.5*self.length * cos(phiArm2)
        initialValues_t[6+1] = -self.length * sin(phiArm1)  * phiArm1_t - 0.5*self.length * sin(phiArm2)  * phiArm2_t
        initialValues_t[6+2] = phiArm2_t

        self.mbs.systemData.SetODE2Coordinates(initialValues, exu.ConfigurationType.Initial)
        self.mbs.systemData.SetODE2Coordinates_t(initialValues_t, exu.ConfigurationType.Initial)


    def reset(
        self,
        *,
        seed: Optional[int] = None,
        return_info: bool = False,
        options: Optional[dict] = None,
    ):
        #super().reset(seed=seed)
        self.state = np.random.uniform(low=-0.05, high=0.05, size=(6,))
        self.steps_beyond_done = None


        #+++++++++++++++++++++++++++++++++++++++++++++
        #set state into initial values:
        self.setState2InitialValues()

        self.simulationSettings.timeIntegration.endTime = 0
        self.dynamicSolver.InitializeSolver(self.mbs, self.simulationSettings) #needed to update initial conditions

        #+++++++++++++++++++++++++++++++++++++++++++++

        if not return_info:
            return np.array(self.state, dtype=np.float32)
        else:
            return np.array(self.state, dtype=np.float32), {}

    def render(self, mode="human"):
        if self.rendererRunning==None and self.useRenderer:
            exu.StartRenderer()
            self.rendererRunning = True

    def close(self):
        self.dynamicSolver.FinalizeSolver(self.mbs, self.simulationSettings)
        if self.rendererRunning==True:
            # SC.WaitForRenderEngineStopFlag()
            exu.StopRenderer() #safely close rendering window!



# #+++++++++++++++++++++++++++++++++++++++++++++
# #reset:
# self.mbs.systemData.SetODE2Coordinates(initialValues, exu.ConfigurationType.Initial)
# self.mbs.systemData.SetODE2Coordinates_t(initialValues, exu.ConfigurationType.Initial)
# self.mbs.systemData.SetODE2Coordinates_tt(initialValues, exu.ConfigurationType.Initial)