testGymDoublePendulumEnv.py

You can view and download this file on Github: testGymDoublePendulumEnv.py

  1#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  2# This is an EXUDYN example
  3#
  4# Details:  This file serves as an input to testGymDoublePendulum.py
  5#
  6# Author:   Johannes Gerstmayr
  7# Date:     2022-05-18
  8# Update:   2023-05-20: derive from gym.Env to ensure compatibility with newer stable-baselines3
  9#
 10# Copyright:This file is part of Exudyn. Exudyn is free software. You can redistribute it and/or modify it under the terms of the Exudyn license. See 'LICENSE.txt' for more details.
 11#
 12#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 13
 14import exudyn as exu
 15from exudyn.utilities import * #includes itemInterface and rigidBodyUtilities
 16import exudyn.graphics as graphics #only import if it does not conflict
 17import math
 18
 19import math
 20from typing import Optional, Union
 21
 22import numpy as np
 23
 24import gym
 25from gym import logger, spaces, Env
 26from gym.error import DependencyNotInstalled
 27
 28import stable_baselines3
 29useOldGym = tuple(map(int, stable_baselines3.__version__.split('.'))) <= tuple(map(int, '1.8.0'.split('.')))
 30
 31class DoublePendulumEnv(Env):
 32
 33    #metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 50}
 34    metadata = {"render_modes": ["human"], "render_fps": 50}
 35
 36    def __init__(self, thresholdFactor = 1.):
 37
 38        self.SC = exu.SystemContainer()
 39        self.mbs = self.SC.AddSystem()
 40
 41
 42        #%%+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 43        #+++++++++++++++++++++++++++++++++++++++++++++++++++++
 44        self.gravity = 9.81
 45        self.length = 1.
 46        self.masscart = 1.
 47        self.massarm = 0.1
 48        self.total_mass = self.massarm + self.masscart
 49        self.armInertia = self.length**2*0.5*self.massarm
 50        self.force_mag = 10.0
 51        self.stepUpdateTime = 0.02  # seconds between state updates
 52
 53        # Angle at which to fail the episode
 54        self.theta_threshold_radians = thresholdFactor* 12 * 2 * math.pi / 360
 55        self.x_threshold = thresholdFactor*2.4
 56
 57        high = np.array(
 58            [
 59                self.x_threshold * 2,
 60                np.finfo(np.float32).max,
 61                self.theta_threshold_radians * 2,
 62                np.finfo(np.float32).max,
 63                self.theta_threshold_radians * 2,
 64                np.finfo(np.float32).max,
 65            ],
 66            dtype=np.float32,
 67        )
 68
 69        #+++++++++++++++++++++++++++++++++++++++++++++++++++++
 70        #see https://github.com/openai/gym/blob/64b4b31d8245f6972b3d37270faf69b74908a67d/gym/core.py#L16
 71        #for Env:
 72        self.action_space = spaces.Discrete(2)
 73        self.observation_space = spaces.Box(-high, high, dtype=np.float32)
 74        #+++++++++++++++++++++++++++++++++++++++++++++++++++++
 75        self.state = None
 76        self.rendererRunning=None
 77        self.useRenderer = False #turn this on if needed
 78
 79        background = graphics.CheckerBoard(point= [0,0,0], normal= [0,0,1], size=4)
 80
 81        oGround=self.mbs.AddObject(ObjectGround(referencePosition= [0,0,0],  #x-pos,y-pos,angle
 82                                           visualization=VObjectGround(graphicsData= [background])))
 83        nGround=self.mbs.AddNode(NodePointGround())
 84
 85        gCart = graphics.Brick(size=[0.5*self.length, 0.1*self.length, 0.1*self.length],
 86                                           color=graphics.color.dodgerblue)
 87        self.nCart = self.mbs.AddNode(Rigid2D(referenceCoordinates=[0,0,0]));
 88        oCart = self.mbs.AddObject(RigidBody2D(physicsMass=self.masscart,
 89                                          physicsInertia=0.1*self.masscart, #not needed
 90                                          nodeNumber=self.nCart,
 91                                          visualization=VObjectRigidBody2D(graphicsData= [gCart])))
 92        mCartCOM = self.mbs.AddMarker(MarkerNodePosition(nodeNumber=self.nCart))
 93
 94        gArm1 = graphics.Brick(size=[0.1*self.length, self.length, 0.1*self.length], color=graphics.color.red)
 95        self.nArm1 = self.mbs.AddNode(Rigid2D(referenceCoordinates=[0,0.5*self.length,0]));
 96        oArm1 = self.mbs.AddObject(RigidBody2D(physicsMass=self.massarm,
 97                                          physicsInertia=self.armInertia, #not included in original paper
 98                                          nodeNumber=self.nArm1,
 99                                          visualization=VObjectRigidBody2D(graphicsData= [gArm1])))
100
101        mArm1COM = self.mbs.AddMarker(MarkerNodePosition(nodeNumber=self.nArm1))
102        mArm1JointA = self.mbs.AddMarker(MarkerBodyPosition(bodyNumber=oArm1, localPosition=[0,-0.5*self.length,0]))
103        mArm1JointB = self.mbs.AddMarker(MarkerBodyPosition(bodyNumber=oArm1, localPosition=[0, 0.5*self.length,0]))
104
105        gArm2 = graphics.Brick(size=[0.1*self.length, self.length, 0.1*self.length], color=graphics.color.red)
106        self.nArm2 = self.mbs.AddNode(Rigid2D(referenceCoordinates=[0,1.5*self.length,0]));
107        oArm2 = self.mbs.AddObject(RigidBody2D(physicsMass=self.massarm,
108                                          physicsInertia=self.armInertia, #not included in original paper
109                                          nodeNumber=self.nArm2,
110                                          visualization=VObjectRigidBody2D(graphicsData= [gArm2])))
111
112        mArm2COM = self.mbs.AddMarker(MarkerNodePosition(nodeNumber=self.nArm2))
113        mArm2Joint = self.mbs.AddMarker(MarkerBodyPosition(bodyNumber=oArm2, localPosition=[0,-0.5*self.length,0]))
114
115        mCartCoordX = self.mbs.AddMarker(MarkerNodeCoordinate(nodeNumber=self.nCart, coordinate=0))
116        mCartCoordY = self.mbs.AddMarker(MarkerNodeCoordinate(nodeNumber=self.nCart, coordinate=1))
117        mGroundNode = self.mbs.AddMarker(MarkerNodeCoordinate(nodeNumber=nGround, coordinate=0))
118
119        #gravity
120        self.mbs.AddLoad(Force(markerNumber=mCartCOM, loadVector=[0,-self.masscart*self.gravity,0]))
121        self.mbs.AddLoad(Force(markerNumber=mArm1COM, loadVector=[0,-self.massarm*self.gravity,0]))
122        self.mbs.AddLoad(Force(markerNumber=mArm2COM, loadVector=[0,-self.massarm*self.gravity,0]))
123
124        #control force
125        self.lControl = self.mbs.AddLoad(LoadCoordinate(markerNumber=mCartCoordX, load=1.))
126
127        #joints and constraints:
128        self.mbs.AddObject(RevoluteJoint2D(markerNumbers=[mCartCOM, mArm1JointA]))
129        self.mbs.AddObject(RevoluteJoint2D(markerNumbers=[mArm1JointB, mArm2Joint]))
130        self.mbs.AddObject(CoordinateConstraint(markerNumbers=[mCartCoordY, mGroundNode]))
131
132
133
134
135        #%%++++++++++++++++++++++++
136        self.mbs.Assemble() #computes initial vector
137
138        self.simulationSettings = exu.SimulationSettings() #takes currently set values or default values
139
140
141        self.simulationSettings.timeIntegration.numberOfSteps = 1
142        self.simulationSettings.timeIntegration.endTime = 0 #will be overwritten in step
143        self.simulationSettings.timeIntegration.verboseMode = 0
144        self.simulationSettings.solutionSettings.writeSolutionToFile = False
145        #self.simulationSettings.timeIntegration.simulateInRealtime = True
146
147        self.simulationSettings.timeIntegration.newton.useModifiedNewton = True
148
149        self.SC.visualizationSettings.general.drawWorldBasis=True
150        self.SC.visualizationSettings.general.graphicsUpdateInterval = 0.01 #50Hz
151
152        self.simulationSettings.solutionSettings.solutionInformation = "Open AI gym"
153
154        self.dynamicSolver = exudyn.MainSolverImplicitSecondOrder()
155        self.dynamicSolver.InitializeSolver(self.mbs, self.simulationSettings)
156        self.dynamicSolver.SolveSteps(self.mbs, self.simulationSettings) #to initialize all data
157
158    def integrateStep(self, force):
159        #exudyn simulation part
160        #index 2 solver
161        self.mbs.SetLoadParameter(self.lControl, 'load', force)
162
163        #progress integration time
164        currentTime = self.simulationSettings.timeIntegration.endTime
165        self.simulationSettings.timeIntegration.startTime = currentTime
166        self.simulationSettings.timeIntegration.endTime = currentTime+self.stepUpdateTime
167
168        # exu.SolveDynamic(self.mbs, self.simulationSettings, solverType=exu.DynamicSolverType.TrapezoidalIndex2,
169        #                  updateInitialValues=True) #use final value as new initial values
170        self.dynamicSolver.InitializeSolverInitialConditions(self.mbs, self.simulationSettings)
171        self.dynamicSolver.SolveSteps(self.mbs, self.simulationSettings)
172        currentState = self.mbs.systemData.GetSystemState() #get current values
173        self.mbs.systemData.SetSystemState(systemStateList=currentState,
174                                        configuration = exu.ConfigurationType.Initial)
175        self.mbs.systemData.SetODE2Coordinates_tt(coordinates = self.mbs.systemData.GetODE2Coordinates_tt(),
176                                                configuration = exu.ConfigurationType.Initial)
177
178
179
180    def step(self, action):
181        err_msg = f"{action!r} ({type(action)}) invalid"
182        assert self.action_space.contains(action), err_msg
183        assert self.state is not None, "Call reset before using step method."
184        self.setState2InitialValues()
185
186        force = self.force_mag if action == 1 else -self.force_mag
187
188        #++++++++++++++++++++++++++++++++++++++++++++++++++
189        #++++++++++++++++++++++++++++++++++++++++++++++++++
190        self.integrateStep(force)
191        #+++++++++++++++++++++++++
192        #compute some output:
193        cartPosX = self.mbs.GetNodeOutput(self.nCart, variableType=exu.OutputVariableType.Coordinates)[0]
194        arm1Angle = self.mbs.GetNodeOutput(self.nArm1, variableType=exu.OutputVariableType.Coordinates)[2]
195        arm2Angle = self.mbs.GetNodeOutput(self.nArm2, variableType=exu.OutputVariableType.Coordinates)[2]
196        cartPosX_t = self.mbs.GetNodeOutput(self.nCart, variableType=exu.OutputVariableType.Coordinates_t)[0]
197        arm1Angle_t = self.mbs.GetNodeOutput(self.nArm1, variableType=exu.OutputVariableType.Coordinates_t)[2]
198        arm2Angle_t = self.mbs.GetNodeOutput(self.nArm2, variableType=exu.OutputVariableType.Coordinates_t)[2]
199
200        #finally write updated state:
201        self.state = (cartPosX, cartPosX_t, arm1Angle, arm1Angle_t, arm2Angle, arm2Angle_t)
202        #++++++++++++++++++++++++++++++++++++++++++++++++++
203        #++++++++++++++++++++++++++++++++++++++++++++++++++
204
205        done = bool(
206            cartPosX < -self.x_threshold
207            or cartPosX > self.x_threshold
208            or arm1Angle < -self.theta_threshold_radians
209            or arm1Angle > self.theta_threshold_radians
210            or arm2Angle < -self.theta_threshold_radians
211            or arm2Angle > self.theta_threshold_radians
212        )
213
214        if not done:
215            reward = 1.0
216        elif self.steps_beyond_done is None:
217            # Arm1 just fell!
218            self.steps_beyond_done = 0
219            reward = 1.0
220        else:
221            if self.steps_beyond_done == 0:
222                logger.warn(
223                    "You are calling 'step()' even though this "
224                    "environment has already returned done = True. You "
225                    "should always call 'reset()' once you receive 'done = "
226                    "True' -- any further steps are undefined behavior."
227                )
228            self.steps_beyond_done += 1
229            reward = 0.0
230
231        info = {}
232        terminated, truncated = done, False # since stable-baselines3 > 1.8.0 implementations terminated and truncated
233        if useOldGym:
234            return np.array(self.state, dtype=np.float32), reward, terminated, info
235        else:
236            return np.array(self.state, dtype=np.float32), reward, terminated, truncated, info
237
238    def setState2InitialValues(self):
239        #+++++++++++++++++++++++++++++++++++++++++++++
240        #set specific initial state:
241        (xCart, xCart_t, phiArm1, phiArm1_t, phiArm2, phiArm2_t) = self.state
242
243        initialValues = np.zeros(9) #model has 3*3 = 9  redundant states
244        initialValues_t = np.zeros(9)
245
246        #build redundant cordinates from self.state
247        initialValues[0] = xCart
248        initialValues[3+0] = xCart - 0.5*self.length * sin(phiArm1)
249        initialValues[3+1] = 0.5*self.length * (cos(phiArm1)-1)
250        initialValues[3+2] = phiArm1
251        initialValues[6+0] = xCart - 1.*self.length * sin(phiArm1) - 0.5*self.length * sin(phiArm2)
252        initialValues[6+1] = self.length * cos(phiArm1) + 0.5*self.length * cos(phiArm2) - 1.5*self.length
253        initialValues[6+2] = phiArm2
254
255        initialValues_t[0] = xCart_t
256        initialValues_t[3+0] = xCart_t - phiArm1_t*0.5*self.length * cos(phiArm1)
257        initialValues_t[3+1] = -0.5*self.length * sin(phiArm1)  * phiArm1_t
258        initialValues_t[3+2] = phiArm1_t
259        initialValues_t[6+0] = xCart_t - phiArm1_t*1.*self.length * cos(phiArm1) - phiArm2_t*0.5*self.length * cos(phiArm2)
260        initialValues_t[6+1] = -self.length * sin(phiArm1)  * phiArm1_t - 0.5*self.length * sin(phiArm2)  * phiArm2_t
261        initialValues_t[6+2] = phiArm2_t
262
263        self.mbs.systemData.SetODE2Coordinates(initialValues, exu.ConfigurationType.Initial)
264        self.mbs.systemData.SetODE2Coordinates_t(initialValues_t, exu.ConfigurationType.Initial)
265
266
267    def reset(
268        self,
269        *,
270        seed: Optional[int] = None,
271        return_info: bool = False,
272        options: Optional[dict] = None,
273    ):
274        #super().reset(seed=seed)
275        self.state = np.random.uniform(low=-0.05, high=0.05, size=(6,))
276        self.steps_beyond_done = None
277
278
279        #+++++++++++++++++++++++++++++++++++++++++++++
280        #set state into initial values:
281        self.setState2InitialValues()
282
283        self.simulationSettings.timeIntegration.endTime = 0
284        self.dynamicSolver.InitializeSolver(self.mbs, self.simulationSettings) #needed to update initial conditions
285
286        #+++++++++++++++++++++++++++++++++++++++++++++
287
288        if not return_info and useOldGym:
289            return np.array(self.state, dtype=np.float32)
290        else:
291            return np.array(self.state, dtype=np.float32), {}
292
293    def render(self, mode="human"):
294        if self.rendererRunning==None and self.useRenderer:
295            exu.StartRenderer()
296            self.rendererRunning = True
297
298    def close(self):
299        self.dynamicSolver.FinalizeSolver(self.mbs, self.simulationSettings)
300        if self.rendererRunning==True:
301            # SC.WaitForRenderEngineStopFlag()
302            exu.StopRenderer() #safely close rendering window!
303
304
305
306# #+++++++++++++++++++++++++++++++++++++++++++++
307# #reset:
308# self.mbs.systemData.SetODE2Coordinates(initialValues, exu.ConfigurationType.Initial)
309# self.mbs.systemData.SetODE2Coordinates_t(initialValues, exu.ConfigurationType.Initial)
310# self.mbs.systemData.SetODE2Coordinates_tt(initialValues, exu.ConfigurationType.Initial)