testGymDoublePendulumEnv.py

You can view and download this file on Github: testGymDoublePendulumEnv.py

  1#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  2# This is an EXUDYN example
  3#
  4# Details:  This file serves as an input to testGymDoublePendulum.py
  5#
  6# Author:   Johannes Gerstmayr
  7# Date:     2022-05-18
  8# Update:   2023-05-20: derive from gym.Env to ensure compatibility with newer stable-baselines3
  9#
 10# Copyright:This file is part of Exudyn. Exudyn is free software. You can redistribute it and/or modify it under the terms of the Exudyn license. See 'LICENSE.txt' for more details.
 11#
 12#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 13
 14import exudyn as exu
 15from exudyn.utilities import * #includes itemInterface and rigidBodyUtilities
 16import exudyn.graphics as graphics #only import if it does not conflict
 17import math
 18
 19import math
 20from typing import Optional, Union
 21
 22import numpy as np
 23
 24import gym
 25from gym import logger, spaces, Env
 26from gym.error import DependencyNotInstalled
 27
 28import stable_baselines3
 29useOldGym = tuple(map(int, stable_baselines3.__version__.split('.'))) <= tuple(map(int, '1.8.0'.split('.')))
 30
 31class DoublePendulumEnv(Env):
 32
 33    #metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 50}
 34    metadata = {"render_modes": ["human"], "render_fps": 50}
 35
 36    def __init__(self, thresholdFactor = 1.):
 37
 38        self.SC = exu.SystemContainer()
 39        self.mbs = self.SC.AddSystem()
 40
 41
 42        #%%+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 43        #+++++++++++++++++++++++++++++++++++++++++++++++++++++
 44        self.gravity = 9.81
 45        self.length = 1.
 46        self.masscart = 1.
 47        self.massarm = 0.1
 48        self.total_mass = self.massarm + self.masscart
 49        self.armInertia = self.length**2*0.5*self.massarm
 50        self.force_mag = 10.0
 51        self.stepUpdateTime = 0.02  # seconds between state updates
 52
 53        # Angle at which to fail the episode
 54        self.theta_threshold_radians = thresholdFactor* 12 * 2 * math.pi / 360
 55        self.x_threshold = thresholdFactor*2.4
 56
 57        high = np.array(
 58            [
 59                self.x_threshold * 2,
 60                np.finfo(np.float32).max,
 61                self.theta_threshold_radians * 2,
 62                np.finfo(np.float32).max,
 63                self.theta_threshold_radians * 2,
 64                np.finfo(np.float32).max,
 65            ],
 66            dtype=np.float32,
 67        )
 68
 69        #+++++++++++++++++++++++++++++++++++++++++++++++++++++
 70        #see https://github.com/openai/gym/blob/64b4b31d8245f6972b3d37270faf69b74908a67d/gym/core.py#L16
 71        #for Env:
 72        self.action_space = spaces.Discrete(2)
 73        self.observation_space = spaces.Box(-high, high, dtype=np.float32)
 74        #+++++++++++++++++++++++++++++++++++++++++++++++++++++
 75        self.state = None
 76        self.rendererRunning=None
 77        self.useRenderer = False #turn this on if needed
 78
 79        background = graphics.CheckerBoard(point= [0,0,0], normal= [0,0,1], size=4)
 80
 81        oGround=self.mbs.AddObject(ObjectGround(referencePosition= [0,0,0],  #x-pos,y-pos,angle
 82                                           visualization=VObjectGround(graphicsData= [background])))
 83        nGround=self.mbs.AddNode(NodePointGround())
 84
 85        gCart = graphics.Brick(size=[0.5*self.length, 0.1*self.length, 0.1*self.length],
 86                                           color=graphics.color.dodgerblue)
 87        self.nCart = self.mbs.AddNode(Rigid2D(referenceCoordinates=[0,0,0]));
 88        oCart = self.mbs.AddObject(RigidBody2D(physicsMass=self.masscart,
 89                                          physicsInertia=0.1*self.masscart, #not needed
 90                                          nodeNumber=self.nCart,
 91                                          visualization=VObjectRigidBody2D(graphicsData= [gCart])))
 92        mCartCOM = self.mbs.AddMarker(MarkerNodePosition(nodeNumber=self.nCart))
 93
 94        gArm1 = graphics.Brick(size=[0.1*self.length, self.length, 0.1*self.length], color=graphics.color.red)
 95        self.nArm1 = self.mbs.AddNode(Rigid2D(referenceCoordinates=[0,0.5*self.length,0]));
 96        oArm1 = self.mbs.AddObject(RigidBody2D(physicsMass=self.massarm,
 97                                          physicsInertia=self.armInertia, #not included in original paper
 98                                          nodeNumber=self.nArm1,
 99                                          visualization=VObjectRigidBody2D(graphicsData= [gArm1])))
100
101        mArm1COM = self.mbs.AddMarker(MarkerNodePosition(nodeNumber=self.nArm1))
102        mArm1JointA = self.mbs.AddMarker(MarkerBodyPosition(bodyNumber=oArm1, localPosition=[0,-0.5*self.length,0]))
103        mArm1JointB = self.mbs.AddMarker(MarkerBodyPosition(bodyNumber=oArm1, localPosition=[0, 0.5*self.length,0]))
104
105        gArm2 = graphics.Brick(size=[0.1*self.length, self.length, 0.1*self.length], color=graphics.color.red)
106        self.nArm2 = self.mbs.AddNode(Rigid2D(referenceCoordinates=[0,1.5*self.length,0]));
107        oArm2 = self.mbs.AddObject(RigidBody2D(physicsMass=self.massarm,
108                                          physicsInertia=self.armInertia, #not included in original paper
109                                          nodeNumber=self.nArm2,
110                                          visualization=VObjectRigidBody2D(graphicsData= [gArm2])))
111
112        mArm2COM = self.mbs.AddMarker(MarkerNodePosition(nodeNumber=self.nArm2))
113        mArm2Joint = self.mbs.AddMarker(MarkerBodyPosition(bodyNumber=oArm2, localPosition=[0,-0.5*self.length,0]))
114
115        mCartCoordX = self.mbs.AddMarker(MarkerNodeCoordinate(nodeNumber=self.nCart, coordinate=0))
116        mCartCoordY = self.mbs.AddMarker(MarkerNodeCoordinate(nodeNumber=self.nCart, coordinate=1))
117        mGroundNode = self.mbs.AddMarker(MarkerNodeCoordinate(nodeNumber=nGround, coordinate=0))
118
119        #gravity
120        self.mbs.AddLoad(Force(markerNumber=mCartCOM, loadVector=[0,-self.masscart*self.gravity,0]))
121        self.mbs.AddLoad(Force(markerNumber=mArm1COM, loadVector=[0,-self.massarm*self.gravity,0]))
122        self.mbs.AddLoad(Force(markerNumber=mArm2COM, loadVector=[0,-self.massarm*self.gravity,0]))
123
124        #control force
125        self.lControl = self.mbs.AddLoad(LoadCoordinate(markerNumber=mCartCoordX, load=1.))
126
127        #joints and constraints:
128        self.mbs.AddObject(RevoluteJoint2D(markerNumbers=[mCartCOM, mArm1JointA]))
129        self.mbs.AddObject(RevoluteJoint2D(markerNumbers=[mArm1JointB, mArm2Joint]))
130        self.mbs.AddObject(CoordinateConstraint(markerNumbers=[mCartCoordY, mGroundNode]))
131
132
133
134
135        #%%++++++++++++++++++++++++
136        self.mbs.Assemble() #computes initial vector
137
138        self.simulationSettings = exu.SimulationSettings() #takes currently set values or default values
139
140
141        self.simulationSettings.timeIntegration.numberOfSteps = 1
142        self.simulationSettings.timeIntegration.endTime = 0 #will be overwritten in step
143        self.simulationSettings.timeIntegration.verboseMode = 0
144        self.simulationSettings.solutionSettings.writeSolutionToFile = False
145        #self.simulationSettings.timeIntegration.simulateInRealtime = True
146
147        self.simulationSettings.timeIntegration.newton.useModifiedNewton = True
148
149        self.SC.visualizationSettings.general.drawWorldBasis=True
150        self.SC.visualizationSettings.general.graphicsUpdateInterval = 0.01 #50Hz
151
152        self.simulationSettings.solutionSettings.solutionInformation = "Open AI gym"
153
154        self.dynamicSolver = exudyn.MainSolverImplicitSecondOrder()
155        self.dynamicSolver.InitializeSolver(self.mbs, self.simulationSettings)
156        self.dynamicSolver.SolveSteps(self.mbs, self.simulationSettings) #to initialize all data
157
158    def integrateStep(self, force):
159        #exudyn simulation part
160        #index 2 solver
161        self.mbs.SetLoadParameter(self.lControl, 'load', force)
162
163        #progress integration time
164        currentTime = self.simulationSettings.timeIntegration.endTime
165        self.simulationSettings.timeIntegration.startTime = currentTime
166        self.simulationSettings.timeIntegration.endTime = currentTime+self.stepUpdateTime
167
168        self.dynamicSolver.InitializeSolverInitialConditions(self.mbs, self.simulationSettings)
169        self.dynamicSolver.SolveSteps(self.mbs, self.simulationSettings)
170        currentState = self.mbs.systemData.GetSystemState() #get current values
171        self.mbs.systemData.SetSystemState(systemStateList=currentState,
172                                        configuration = exu.ConfigurationType.Initial)
173        self.mbs.systemData.SetODE2Coordinates_tt(coordinates = self.mbs.systemData.GetODE2Coordinates_tt(),
174                                                configuration = exu.ConfigurationType.Initial)
175
176
177
178    def step(self, action):
179        err_msg = f"{action!r} ({type(action)}) invalid"
180        assert self.action_space.contains(action), err_msg
181        assert self.state is not None, "Call reset before using step method."
182        self.setState2InitialValues()
183
184        force = self.force_mag if action == 1 else -self.force_mag
185
186        #++++++++++++++++++++++++++++++++++++++++++++++++++
187        #++++++++++++++++++++++++++++++++++++++++++++++++++
188        self.integrateStep(force)
189        #+++++++++++++++++++++++++
190        #compute some output:
191        cartPosX = self.mbs.GetNodeOutput(self.nCart, variableType=exu.OutputVariableType.Coordinates)[0]
192        arm1Angle = self.mbs.GetNodeOutput(self.nArm1, variableType=exu.OutputVariableType.Coordinates)[2]
193        arm2Angle = self.mbs.GetNodeOutput(self.nArm2, variableType=exu.OutputVariableType.Coordinates)[2]
194        cartPosX_t = self.mbs.GetNodeOutput(self.nCart, variableType=exu.OutputVariableType.Coordinates_t)[0]
195        arm1Angle_t = self.mbs.GetNodeOutput(self.nArm1, variableType=exu.OutputVariableType.Coordinates_t)[2]
196        arm2Angle_t = self.mbs.GetNodeOutput(self.nArm2, variableType=exu.OutputVariableType.Coordinates_t)[2]
197
198        #finally write updated state:
199        self.state = (cartPosX, cartPosX_t, arm1Angle, arm1Angle_t, arm2Angle, arm2Angle_t)
200        #++++++++++++++++++++++++++++++++++++++++++++++++++
201        #++++++++++++++++++++++++++++++++++++++++++++++++++
202
203        done = bool(
204            cartPosX < -self.x_threshold
205            or cartPosX > self.x_threshold
206            or arm1Angle < -self.theta_threshold_radians
207            or arm1Angle > self.theta_threshold_radians
208            or arm2Angle < -self.theta_threshold_radians
209            or arm2Angle > self.theta_threshold_radians
210        )
211
212        if not done:
213            reward = 1.0
214        elif self.steps_beyond_done is None:
215            # Arm1 just fell!
216            self.steps_beyond_done = 0
217            reward = 1.0
218        else:
219            if self.steps_beyond_done == 0:
220                logger.warn(
221                    "You are calling 'step()' even though this "
222                    "environment has already returned done = True. You "
223                    "should always call 'reset()' once you receive 'done = "
224                    "True' -- any further steps are undefined behavior."
225                )
226            self.steps_beyond_done += 1
227            reward = 0.0
228
229        info = {}
230        terminated, truncated = done, False # since stable-baselines3 > 1.8.0 implementations terminated and truncated
231        if useOldGym:
232            return np.array(self.state, dtype=np.float32), reward, terminated, info
233        else:
234            return np.array(self.state, dtype=np.float32), reward, terminated, truncated, info
235
236    def setState2InitialValues(self):
237        #+++++++++++++++++++++++++++++++++++++++++++++
238        #set specific initial state:
239        (xCart, xCart_t, phiArm1, phiArm1_t, phiArm2, phiArm2_t) = self.state
240
241        initialValues = np.zeros(9) #model has 3*3 = 9  redundant states
242        initialValues_t = np.zeros(9)
243
244        #build redundant cordinates from self.state
245        initialValues[0] = xCart
246        initialValues[3+0] = xCart - 0.5*self.length * sin(phiArm1)
247        initialValues[3+1] = 0.5*self.length * (cos(phiArm1)-1)
248        initialValues[3+2] = phiArm1
249        initialValues[6+0] = xCart - 1.*self.length * sin(phiArm1) - 0.5*self.length * sin(phiArm2)
250        initialValues[6+1] = self.length * cos(phiArm1) + 0.5*self.length * cos(phiArm2) - 1.5*self.length
251        initialValues[6+2] = phiArm2
252
253        initialValues_t[0] = xCart_t
254        initialValues_t[3+0] = xCart_t - phiArm1_t*0.5*self.length * cos(phiArm1)
255        initialValues_t[3+1] = -0.5*self.length * sin(phiArm1)  * phiArm1_t
256        initialValues_t[3+2] = phiArm1_t
257        initialValues_t[6+0] = xCart_t - phiArm1_t*1.*self.length * cos(phiArm1) - phiArm2_t*0.5*self.length * cos(phiArm2)
258        initialValues_t[6+1] = -self.length * sin(phiArm1)  * phiArm1_t - 0.5*self.length * sin(phiArm2)  * phiArm2_t
259        initialValues_t[6+2] = phiArm2_t
260
261        self.mbs.systemData.SetODE2Coordinates(initialValues, exu.ConfigurationType.Initial)
262        self.mbs.systemData.SetODE2Coordinates_t(initialValues_t, exu.ConfigurationType.Initial)
263
264
265    def reset(
266        self,
267        *,
268        seed: Optional[int] = None,
269        return_info: bool = False,
270        options: Optional[dict] = None,
271    ):
272        #super().reset(seed=seed)
273        self.state = np.random.uniform(low=-0.05, high=0.05, size=(6,))
274        self.steps_beyond_done = None
275
276
277        #+++++++++++++++++++++++++++++++++++++++++++++
278        #set state into initial values:
279        self.setState2InitialValues()
280
281        self.simulationSettings.timeIntegration.endTime = 0
282        self.dynamicSolver.InitializeSolver(self.mbs, self.simulationSettings) #needed to update initial conditions
283
284        #+++++++++++++++++++++++++++++++++++++++++++++
285
286        if not return_info and useOldGym:
287            return np.array(self.state, dtype=np.float32)
288        else:
289            return np.array(self.state, dtype=np.float32), {}
290
291    def render(self, mode="human"):
292        if self.rendererRunning==None and self.useRenderer:
293            SC.renderer.Start()
294            self.rendererRunning = True
295
296    def close(self):
297        self.dynamicSolver.FinalizeSolver(self.mbs, self.simulationSettings)
298        if self.rendererRunning==True:
299            # SC.renderer.DoIdleTasks()
300            SC.renderer.Stop() #safely close rendering window!
301
302
303
304# #+++++++++++++++++++++++++++++++++++++++++++++
305# #reset:
306# self.mbs.systemData.SetODE2Coordinates(initialValues, exu.ConfigurationType.Initial)
307# self.mbs.systemData.SetODE2Coordinates_t(initialValues, exu.ConfigurationType.Initial)
308# self.mbs.systemData.SetODE2Coordinates_tt(initialValues, exu.ConfigurationType.Initial)