testGymDoublePendulumEnv.py
You can view and download this file on Github: testGymDoublePendulumEnv.py
1#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2# This is an EXUDYN example
3#
4# Details: This file serves as an input to testGymDoublePendulum.py
5#
6# Author: Johannes Gerstmayr
7# Date: 2022-05-18
8# Update: 2023-05-20: derive from gym.Env to ensure compatibility with newer stable-baselines3
9#
10# Copyright:This file is part of Exudyn. Exudyn is free software. You can redistribute it and/or modify it under the terms of the Exudyn license. See 'LICENSE.txt' for more details.
11#
12#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
13
14import exudyn as exu
15from exudyn.utilities import * #includes itemInterface and rigidBodyUtilities
16import exudyn.graphics as graphics #only import if it does not conflict
17import math
18
19import math
20from typing import Optional, Union
21
22import numpy as np
23
24import gym
25from gym import logger, spaces, Env
26from gym.error import DependencyNotInstalled
27
28import stable_baselines3
29useOldGym = tuple(map(int, stable_baselines3.__version__.split('.'))) <= tuple(map(int, '1.8.0'.split('.')))
30
31class DoublePendulumEnv(Env):
32
33 #metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 50}
34 metadata = {"render_modes": ["human"], "render_fps": 50}
35
36 def __init__(self, thresholdFactor = 1.):
37
38 self.SC = exu.SystemContainer()
39 self.mbs = self.SC.AddSystem()
40
41
42 #%%+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
43 #+++++++++++++++++++++++++++++++++++++++++++++++++++++
44 self.gravity = 9.81
45 self.length = 1.
46 self.masscart = 1.
47 self.massarm = 0.1
48 self.total_mass = self.massarm + self.masscart
49 self.armInertia = self.length**2*0.5*self.massarm
50 self.force_mag = 10.0
51 self.stepUpdateTime = 0.02 # seconds between state updates
52
53 # Angle at which to fail the episode
54 self.theta_threshold_radians = thresholdFactor* 12 * 2 * math.pi / 360
55 self.x_threshold = thresholdFactor*2.4
56
57 high = np.array(
58 [
59 self.x_threshold * 2,
60 np.finfo(np.float32).max,
61 self.theta_threshold_radians * 2,
62 np.finfo(np.float32).max,
63 self.theta_threshold_radians * 2,
64 np.finfo(np.float32).max,
65 ],
66 dtype=np.float32,
67 )
68
69 #+++++++++++++++++++++++++++++++++++++++++++++++++++++
70 #see https://github.com/openai/gym/blob/64b4b31d8245f6972b3d37270faf69b74908a67d/gym/core.py#L16
71 #for Env:
72 self.action_space = spaces.Discrete(2)
73 self.observation_space = spaces.Box(-high, high, dtype=np.float32)
74 #+++++++++++++++++++++++++++++++++++++++++++++++++++++
75 self.state = None
76 self.rendererRunning=None
77 self.useRenderer = False #turn this on if needed
78
79 background = graphics.CheckerBoard(point= [0,0,0], normal= [0,0,1], size=4)
80
81 oGround=self.mbs.AddObject(ObjectGround(referencePosition= [0,0,0], #x-pos,y-pos,angle
82 visualization=VObjectGround(graphicsData= [background])))
83 nGround=self.mbs.AddNode(NodePointGround())
84
85 gCart = graphics.Brick(size=[0.5*self.length, 0.1*self.length, 0.1*self.length],
86 color=graphics.color.dodgerblue)
87 self.nCart = self.mbs.AddNode(Rigid2D(referenceCoordinates=[0,0,0]));
88 oCart = self.mbs.AddObject(RigidBody2D(physicsMass=self.masscart,
89 physicsInertia=0.1*self.masscart, #not needed
90 nodeNumber=self.nCart,
91 visualization=VObjectRigidBody2D(graphicsData= [gCart])))
92 mCartCOM = self.mbs.AddMarker(MarkerNodePosition(nodeNumber=self.nCart))
93
94 gArm1 = graphics.Brick(size=[0.1*self.length, self.length, 0.1*self.length], color=graphics.color.red)
95 self.nArm1 = self.mbs.AddNode(Rigid2D(referenceCoordinates=[0,0.5*self.length,0]));
96 oArm1 = self.mbs.AddObject(RigidBody2D(physicsMass=self.massarm,
97 physicsInertia=self.armInertia, #not included in original paper
98 nodeNumber=self.nArm1,
99 visualization=VObjectRigidBody2D(graphicsData= [gArm1])))
100
101 mArm1COM = self.mbs.AddMarker(MarkerNodePosition(nodeNumber=self.nArm1))
102 mArm1JointA = self.mbs.AddMarker(MarkerBodyPosition(bodyNumber=oArm1, localPosition=[0,-0.5*self.length,0]))
103 mArm1JointB = self.mbs.AddMarker(MarkerBodyPosition(bodyNumber=oArm1, localPosition=[0, 0.5*self.length,0]))
104
105 gArm2 = graphics.Brick(size=[0.1*self.length, self.length, 0.1*self.length], color=graphics.color.red)
106 self.nArm2 = self.mbs.AddNode(Rigid2D(referenceCoordinates=[0,1.5*self.length,0]));
107 oArm2 = self.mbs.AddObject(RigidBody2D(physicsMass=self.massarm,
108 physicsInertia=self.armInertia, #not included in original paper
109 nodeNumber=self.nArm2,
110 visualization=VObjectRigidBody2D(graphicsData= [gArm2])))
111
112 mArm2COM = self.mbs.AddMarker(MarkerNodePosition(nodeNumber=self.nArm2))
113 mArm2Joint = self.mbs.AddMarker(MarkerBodyPosition(bodyNumber=oArm2, localPosition=[0,-0.5*self.length,0]))
114
115 mCartCoordX = self.mbs.AddMarker(MarkerNodeCoordinate(nodeNumber=self.nCart, coordinate=0))
116 mCartCoordY = self.mbs.AddMarker(MarkerNodeCoordinate(nodeNumber=self.nCart, coordinate=1))
117 mGroundNode = self.mbs.AddMarker(MarkerNodeCoordinate(nodeNumber=nGround, coordinate=0))
118
119 #gravity
120 self.mbs.AddLoad(Force(markerNumber=mCartCOM, loadVector=[0,-self.masscart*self.gravity,0]))
121 self.mbs.AddLoad(Force(markerNumber=mArm1COM, loadVector=[0,-self.massarm*self.gravity,0]))
122 self.mbs.AddLoad(Force(markerNumber=mArm2COM, loadVector=[0,-self.massarm*self.gravity,0]))
123
124 #control force
125 self.lControl = self.mbs.AddLoad(LoadCoordinate(markerNumber=mCartCoordX, load=1.))
126
127 #joints and constraints:
128 self.mbs.AddObject(RevoluteJoint2D(markerNumbers=[mCartCOM, mArm1JointA]))
129 self.mbs.AddObject(RevoluteJoint2D(markerNumbers=[mArm1JointB, mArm2Joint]))
130 self.mbs.AddObject(CoordinateConstraint(markerNumbers=[mCartCoordY, mGroundNode]))
131
132
133
134
135 #%%++++++++++++++++++++++++
136 self.mbs.Assemble() #computes initial vector
137
138 self.simulationSettings = exu.SimulationSettings() #takes currently set values or default values
139
140
141 self.simulationSettings.timeIntegration.numberOfSteps = 1
142 self.simulationSettings.timeIntegration.endTime = 0 #will be overwritten in step
143 self.simulationSettings.timeIntegration.verboseMode = 0
144 self.simulationSettings.solutionSettings.writeSolutionToFile = False
145 #self.simulationSettings.timeIntegration.simulateInRealtime = True
146
147 self.simulationSettings.timeIntegration.newton.useModifiedNewton = True
148
149 self.SC.visualizationSettings.general.drawWorldBasis=True
150 self.SC.visualizationSettings.general.graphicsUpdateInterval = 0.01 #50Hz
151
152 self.simulationSettings.solutionSettings.solutionInformation = "Open AI gym"
153
154 self.dynamicSolver = exudyn.MainSolverImplicitSecondOrder()
155 self.dynamicSolver.InitializeSolver(self.mbs, self.simulationSettings)
156 self.dynamicSolver.SolveSteps(self.mbs, self.simulationSettings) #to initialize all data
157
158 def integrateStep(self, force):
159 #exudyn simulation part
160 #index 2 solver
161 self.mbs.SetLoadParameter(self.lControl, 'load', force)
162
163 #progress integration time
164 currentTime = self.simulationSettings.timeIntegration.endTime
165 self.simulationSettings.timeIntegration.startTime = currentTime
166 self.simulationSettings.timeIntegration.endTime = currentTime+self.stepUpdateTime
167
168 self.dynamicSolver.InitializeSolverInitialConditions(self.mbs, self.simulationSettings)
169 self.dynamicSolver.SolveSteps(self.mbs, self.simulationSettings)
170 currentState = self.mbs.systemData.GetSystemState() #get current values
171 self.mbs.systemData.SetSystemState(systemStateList=currentState,
172 configuration = exu.ConfigurationType.Initial)
173 self.mbs.systemData.SetODE2Coordinates_tt(coordinates = self.mbs.systemData.GetODE2Coordinates_tt(),
174 configuration = exu.ConfigurationType.Initial)
175
176
177
178 def step(self, action):
179 err_msg = f"{action!r} ({type(action)}) invalid"
180 assert self.action_space.contains(action), err_msg
181 assert self.state is not None, "Call reset before using step method."
182 self.setState2InitialValues()
183
184 force = self.force_mag if action == 1 else -self.force_mag
185
186 #++++++++++++++++++++++++++++++++++++++++++++++++++
187 #++++++++++++++++++++++++++++++++++++++++++++++++++
188 self.integrateStep(force)
189 #+++++++++++++++++++++++++
190 #compute some output:
191 cartPosX = self.mbs.GetNodeOutput(self.nCart, variableType=exu.OutputVariableType.Coordinates)[0]
192 arm1Angle = self.mbs.GetNodeOutput(self.nArm1, variableType=exu.OutputVariableType.Coordinates)[2]
193 arm2Angle = self.mbs.GetNodeOutput(self.nArm2, variableType=exu.OutputVariableType.Coordinates)[2]
194 cartPosX_t = self.mbs.GetNodeOutput(self.nCart, variableType=exu.OutputVariableType.Coordinates_t)[0]
195 arm1Angle_t = self.mbs.GetNodeOutput(self.nArm1, variableType=exu.OutputVariableType.Coordinates_t)[2]
196 arm2Angle_t = self.mbs.GetNodeOutput(self.nArm2, variableType=exu.OutputVariableType.Coordinates_t)[2]
197
198 #finally write updated state:
199 self.state = (cartPosX, cartPosX_t, arm1Angle, arm1Angle_t, arm2Angle, arm2Angle_t)
200 #++++++++++++++++++++++++++++++++++++++++++++++++++
201 #++++++++++++++++++++++++++++++++++++++++++++++++++
202
203 done = bool(
204 cartPosX < -self.x_threshold
205 or cartPosX > self.x_threshold
206 or arm1Angle < -self.theta_threshold_radians
207 or arm1Angle > self.theta_threshold_radians
208 or arm2Angle < -self.theta_threshold_radians
209 or arm2Angle > self.theta_threshold_radians
210 )
211
212 if not done:
213 reward = 1.0
214 elif self.steps_beyond_done is None:
215 # Arm1 just fell!
216 self.steps_beyond_done = 0
217 reward = 1.0
218 else:
219 if self.steps_beyond_done == 0:
220 logger.warn(
221 "You are calling 'step()' even though this "
222 "environment has already returned done = True. You "
223 "should always call 'reset()' once you receive 'done = "
224 "True' -- any further steps are undefined behavior."
225 )
226 self.steps_beyond_done += 1
227 reward = 0.0
228
229 info = {}
230 terminated, truncated = done, False # since stable-baselines3 > 1.8.0 implementations terminated and truncated
231 if useOldGym:
232 return np.array(self.state, dtype=np.float32), reward, terminated, info
233 else:
234 return np.array(self.state, dtype=np.float32), reward, terminated, truncated, info
235
236 def setState2InitialValues(self):
237 #+++++++++++++++++++++++++++++++++++++++++++++
238 #set specific initial state:
239 (xCart, xCart_t, phiArm1, phiArm1_t, phiArm2, phiArm2_t) = self.state
240
241 initialValues = np.zeros(9) #model has 3*3 = 9 redundant states
242 initialValues_t = np.zeros(9)
243
244 #build redundant cordinates from self.state
245 initialValues[0] = xCart
246 initialValues[3+0] = xCart - 0.5*self.length * sin(phiArm1)
247 initialValues[3+1] = 0.5*self.length * (cos(phiArm1)-1)
248 initialValues[3+2] = phiArm1
249 initialValues[6+0] = xCart - 1.*self.length * sin(phiArm1) - 0.5*self.length * sin(phiArm2)
250 initialValues[6+1] = self.length * cos(phiArm1) + 0.5*self.length * cos(phiArm2) - 1.5*self.length
251 initialValues[6+2] = phiArm2
252
253 initialValues_t[0] = xCart_t
254 initialValues_t[3+0] = xCart_t - phiArm1_t*0.5*self.length * cos(phiArm1)
255 initialValues_t[3+1] = -0.5*self.length * sin(phiArm1) * phiArm1_t
256 initialValues_t[3+2] = phiArm1_t
257 initialValues_t[6+0] = xCart_t - phiArm1_t*1.*self.length * cos(phiArm1) - phiArm2_t*0.5*self.length * cos(phiArm2)
258 initialValues_t[6+1] = -self.length * sin(phiArm1) * phiArm1_t - 0.5*self.length * sin(phiArm2) * phiArm2_t
259 initialValues_t[6+2] = phiArm2_t
260
261 self.mbs.systemData.SetODE2Coordinates(initialValues, exu.ConfigurationType.Initial)
262 self.mbs.systemData.SetODE2Coordinates_t(initialValues_t, exu.ConfigurationType.Initial)
263
264
265 def reset(
266 self,
267 *,
268 seed: Optional[int] = None,
269 return_info: bool = False,
270 options: Optional[dict] = None,
271 ):
272 #super().reset(seed=seed)
273 self.state = np.random.uniform(low=-0.05, high=0.05, size=(6,))
274 self.steps_beyond_done = None
275
276
277 #+++++++++++++++++++++++++++++++++++++++++++++
278 #set state into initial values:
279 self.setState2InitialValues()
280
281 self.simulationSettings.timeIntegration.endTime = 0
282 self.dynamicSolver.InitializeSolver(self.mbs, self.simulationSettings) #needed to update initial conditions
283
284 #+++++++++++++++++++++++++++++++++++++++++++++
285
286 if not return_info and useOldGym:
287 return np.array(self.state, dtype=np.float32)
288 else:
289 return np.array(self.state, dtype=np.float32), {}
290
291 def render(self, mode="human"):
292 if self.rendererRunning==None and self.useRenderer:
293 SC.renderer.Start()
294 self.rendererRunning = True
295
296 def close(self):
297 self.dynamicSolver.FinalizeSolver(self.mbs, self.simulationSettings)
298 if self.rendererRunning==True:
299 # SC.renderer.DoIdleTasks()
300 SC.renderer.Stop() #safely close rendering window!
301
302
303
304# #+++++++++++++++++++++++++++++++++++++++++++++
305# #reset:
306# self.mbs.systemData.SetODE2Coordinates(initialValues, exu.ConfigurationType.Initial)
307# self.mbs.systemData.SetODE2Coordinates_t(initialValues, exu.ConfigurationType.Initial)
308# self.mbs.systemData.SetODE2Coordinates_tt(initialValues, exu.ConfigurationType.Initial)