testGymDoublePendulumEnv.py
You can view and download this file on Github: testGymDoublePendulumEnv.py
1#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2# This is an EXUDYN example
3#
4# Details: This file serves as an input to testGymDoublePendulum.py
5#
6# Author: Johannes Gerstmayr
7# Date: 2022-05-18
8# Update: 2023-05-20: derive from gym.Env to ensure compatibility with newer stable-baselines3
9#
10# Copyright:This file is part of Exudyn. Exudyn is free software. You can redistribute it and/or modify it under the terms of the Exudyn license. See 'LICENSE.txt' for more details.
11#
12#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
13
14import exudyn as exu
15from exudyn.utilities import * #includes itemInterface and rigidBodyUtilities
16import exudyn.graphics as graphics #only import if it does not conflict
17import math
18
19import math
20from typing import Optional, Union
21
22import numpy as np
23
24import gym
25from gym import logger, spaces, Env
26from gym.error import DependencyNotInstalled
27
28import stable_baselines3
29useOldGym = tuple(map(int, stable_baselines3.__version__.split('.'))) <= tuple(map(int, '1.8.0'.split('.')))
30
31class DoublePendulumEnv(Env):
32
33 #metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 50}
34 metadata = {"render_modes": ["human"], "render_fps": 50}
35
36 def __init__(self, thresholdFactor = 1.):
37
38 self.SC = exu.SystemContainer()
39 self.mbs = self.SC.AddSystem()
40
41
42 #%%+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
43 #+++++++++++++++++++++++++++++++++++++++++++++++++++++
44 self.gravity = 9.81
45 self.length = 1.
46 self.masscart = 1.
47 self.massarm = 0.1
48 self.total_mass = self.massarm + self.masscart
49 self.armInertia = self.length**2*0.5*self.massarm
50 self.force_mag = 10.0
51 self.stepUpdateTime = 0.02 # seconds between state updates
52
53 # Angle at which to fail the episode
54 self.theta_threshold_radians = thresholdFactor* 12 * 2 * math.pi / 360
55 self.x_threshold = thresholdFactor*2.4
56
57 high = np.array(
58 [
59 self.x_threshold * 2,
60 np.finfo(np.float32).max,
61 self.theta_threshold_radians * 2,
62 np.finfo(np.float32).max,
63 self.theta_threshold_radians * 2,
64 np.finfo(np.float32).max,
65 ],
66 dtype=np.float32,
67 )
68
69 #+++++++++++++++++++++++++++++++++++++++++++++++++++++
70 #see https://github.com/openai/gym/blob/64b4b31d8245f6972b3d37270faf69b74908a67d/gym/core.py#L16
71 #for Env:
72 self.action_space = spaces.Discrete(2)
73 self.observation_space = spaces.Box(-high, high, dtype=np.float32)
74 #+++++++++++++++++++++++++++++++++++++++++++++++++++++
75 self.state = None
76 self.rendererRunning=None
77 self.useRenderer = False #turn this on if needed
78
79 background = graphics.CheckerBoard(point= [0,0,0], normal= [0,0,1], size=4)
80
81 oGround=self.mbs.AddObject(ObjectGround(referencePosition= [0,0,0], #x-pos,y-pos,angle
82 visualization=VObjectGround(graphicsData= [background])))
83 nGround=self.mbs.AddNode(NodePointGround())
84
85 gCart = graphics.Brick(size=[0.5*self.length, 0.1*self.length, 0.1*self.length],
86 color=graphics.color.dodgerblue)
87 self.nCart = self.mbs.AddNode(Rigid2D(referenceCoordinates=[0,0,0]));
88 oCart = self.mbs.AddObject(RigidBody2D(physicsMass=self.masscart,
89 physicsInertia=0.1*self.masscart, #not needed
90 nodeNumber=self.nCart,
91 visualization=VObjectRigidBody2D(graphicsData= [gCart])))
92 mCartCOM = self.mbs.AddMarker(MarkerNodePosition(nodeNumber=self.nCart))
93
94 gArm1 = graphics.Brick(size=[0.1*self.length, self.length, 0.1*self.length], color=graphics.color.red)
95 self.nArm1 = self.mbs.AddNode(Rigid2D(referenceCoordinates=[0,0.5*self.length,0]));
96 oArm1 = self.mbs.AddObject(RigidBody2D(physicsMass=self.massarm,
97 physicsInertia=self.armInertia, #not included in original paper
98 nodeNumber=self.nArm1,
99 visualization=VObjectRigidBody2D(graphicsData= [gArm1])))
100
101 mArm1COM = self.mbs.AddMarker(MarkerNodePosition(nodeNumber=self.nArm1))
102 mArm1JointA = self.mbs.AddMarker(MarkerBodyPosition(bodyNumber=oArm1, localPosition=[0,-0.5*self.length,0]))
103 mArm1JointB = self.mbs.AddMarker(MarkerBodyPosition(bodyNumber=oArm1, localPosition=[0, 0.5*self.length,0]))
104
105 gArm2 = graphics.Brick(size=[0.1*self.length, self.length, 0.1*self.length], color=graphics.color.red)
106 self.nArm2 = self.mbs.AddNode(Rigid2D(referenceCoordinates=[0,1.5*self.length,0]));
107 oArm2 = self.mbs.AddObject(RigidBody2D(physicsMass=self.massarm,
108 physicsInertia=self.armInertia, #not included in original paper
109 nodeNumber=self.nArm2,
110 visualization=VObjectRigidBody2D(graphicsData= [gArm2])))
111
112 mArm2COM = self.mbs.AddMarker(MarkerNodePosition(nodeNumber=self.nArm2))
113 mArm2Joint = self.mbs.AddMarker(MarkerBodyPosition(bodyNumber=oArm2, localPosition=[0,-0.5*self.length,0]))
114
115 mCartCoordX = self.mbs.AddMarker(MarkerNodeCoordinate(nodeNumber=self.nCart, coordinate=0))
116 mCartCoordY = self.mbs.AddMarker(MarkerNodeCoordinate(nodeNumber=self.nCart, coordinate=1))
117 mGroundNode = self.mbs.AddMarker(MarkerNodeCoordinate(nodeNumber=nGround, coordinate=0))
118
119 #gravity
120 self.mbs.AddLoad(Force(markerNumber=mCartCOM, loadVector=[0,-self.masscart*self.gravity,0]))
121 self.mbs.AddLoad(Force(markerNumber=mArm1COM, loadVector=[0,-self.massarm*self.gravity,0]))
122 self.mbs.AddLoad(Force(markerNumber=mArm2COM, loadVector=[0,-self.massarm*self.gravity,0]))
123
124 #control force
125 self.lControl = self.mbs.AddLoad(LoadCoordinate(markerNumber=mCartCoordX, load=1.))
126
127 #joints and constraints:
128 self.mbs.AddObject(RevoluteJoint2D(markerNumbers=[mCartCOM, mArm1JointA]))
129 self.mbs.AddObject(RevoluteJoint2D(markerNumbers=[mArm1JointB, mArm2Joint]))
130 self.mbs.AddObject(CoordinateConstraint(markerNumbers=[mCartCoordY, mGroundNode]))
131
132
133
134
135 #%%++++++++++++++++++++++++
136 self.mbs.Assemble() #computes initial vector
137
138 self.simulationSettings = exu.SimulationSettings() #takes currently set values or default values
139
140
141 self.simulationSettings.timeIntegration.numberOfSteps = 1
142 self.simulationSettings.timeIntegration.endTime = 0 #will be overwritten in step
143 self.simulationSettings.timeIntegration.verboseMode = 0
144 self.simulationSettings.solutionSettings.writeSolutionToFile = False
145 #self.simulationSettings.timeIntegration.simulateInRealtime = True
146
147 self.simulationSettings.timeIntegration.newton.useModifiedNewton = True
148
149 self.SC.visualizationSettings.general.drawWorldBasis=True
150 self.SC.visualizationSettings.general.graphicsUpdateInterval = 0.01 #50Hz
151
152 self.simulationSettings.solutionSettings.solutionInformation = "Open AI gym"
153
154 self.dynamicSolver = exudyn.MainSolverImplicitSecondOrder()
155 self.dynamicSolver.InitializeSolver(self.mbs, self.simulationSettings)
156 self.dynamicSolver.SolveSteps(self.mbs, self.simulationSettings) #to initialize all data
157
158 def integrateStep(self, force):
159 #exudyn simulation part
160 #index 2 solver
161 self.mbs.SetLoadParameter(self.lControl, 'load', force)
162
163 #progress integration time
164 currentTime = self.simulationSettings.timeIntegration.endTime
165 self.simulationSettings.timeIntegration.startTime = currentTime
166 self.simulationSettings.timeIntegration.endTime = currentTime+self.stepUpdateTime
167
168 # exu.SolveDynamic(self.mbs, self.simulationSettings, solverType=exu.DynamicSolverType.TrapezoidalIndex2,
169 # updateInitialValues=True) #use final value as new initial values
170 self.dynamicSolver.InitializeSolverInitialConditions(self.mbs, self.simulationSettings)
171 self.dynamicSolver.SolveSteps(self.mbs, self.simulationSettings)
172 currentState = self.mbs.systemData.GetSystemState() #get current values
173 self.mbs.systemData.SetSystemState(systemStateList=currentState,
174 configuration = exu.ConfigurationType.Initial)
175 self.mbs.systemData.SetODE2Coordinates_tt(coordinates = self.mbs.systemData.GetODE2Coordinates_tt(),
176 configuration = exu.ConfigurationType.Initial)
177
178
179
180 def step(self, action):
181 err_msg = f"{action!r} ({type(action)}) invalid"
182 assert self.action_space.contains(action), err_msg
183 assert self.state is not None, "Call reset before using step method."
184 self.setState2InitialValues()
185
186 force = self.force_mag if action == 1 else -self.force_mag
187
188 #++++++++++++++++++++++++++++++++++++++++++++++++++
189 #++++++++++++++++++++++++++++++++++++++++++++++++++
190 self.integrateStep(force)
191 #+++++++++++++++++++++++++
192 #compute some output:
193 cartPosX = self.mbs.GetNodeOutput(self.nCart, variableType=exu.OutputVariableType.Coordinates)[0]
194 arm1Angle = self.mbs.GetNodeOutput(self.nArm1, variableType=exu.OutputVariableType.Coordinates)[2]
195 arm2Angle = self.mbs.GetNodeOutput(self.nArm2, variableType=exu.OutputVariableType.Coordinates)[2]
196 cartPosX_t = self.mbs.GetNodeOutput(self.nCart, variableType=exu.OutputVariableType.Coordinates_t)[0]
197 arm1Angle_t = self.mbs.GetNodeOutput(self.nArm1, variableType=exu.OutputVariableType.Coordinates_t)[2]
198 arm2Angle_t = self.mbs.GetNodeOutput(self.nArm2, variableType=exu.OutputVariableType.Coordinates_t)[2]
199
200 #finally write updated state:
201 self.state = (cartPosX, cartPosX_t, arm1Angle, arm1Angle_t, arm2Angle, arm2Angle_t)
202 #++++++++++++++++++++++++++++++++++++++++++++++++++
203 #++++++++++++++++++++++++++++++++++++++++++++++++++
204
205 done = bool(
206 cartPosX < -self.x_threshold
207 or cartPosX > self.x_threshold
208 or arm1Angle < -self.theta_threshold_radians
209 or arm1Angle > self.theta_threshold_radians
210 or arm2Angle < -self.theta_threshold_radians
211 or arm2Angle > self.theta_threshold_radians
212 )
213
214 if not done:
215 reward = 1.0
216 elif self.steps_beyond_done is None:
217 # Arm1 just fell!
218 self.steps_beyond_done = 0
219 reward = 1.0
220 else:
221 if self.steps_beyond_done == 0:
222 logger.warn(
223 "You are calling 'step()' even though this "
224 "environment has already returned done = True. You "
225 "should always call 'reset()' once you receive 'done = "
226 "True' -- any further steps are undefined behavior."
227 )
228 self.steps_beyond_done += 1
229 reward = 0.0
230
231 info = {}
232 terminated, truncated = done, False # since stable-baselines3 > 1.8.0 implementations terminated and truncated
233 if useOldGym:
234 return np.array(self.state, dtype=np.float32), reward, terminated, info
235 else:
236 return np.array(self.state, dtype=np.float32), reward, terminated, truncated, info
237
238 def setState2InitialValues(self):
239 #+++++++++++++++++++++++++++++++++++++++++++++
240 #set specific initial state:
241 (xCart, xCart_t, phiArm1, phiArm1_t, phiArm2, phiArm2_t) = self.state
242
243 initialValues = np.zeros(9) #model has 3*3 = 9 redundant states
244 initialValues_t = np.zeros(9)
245
246 #build redundant cordinates from self.state
247 initialValues[0] = xCart
248 initialValues[3+0] = xCart - 0.5*self.length * sin(phiArm1)
249 initialValues[3+1] = 0.5*self.length * (cos(phiArm1)-1)
250 initialValues[3+2] = phiArm1
251 initialValues[6+0] = xCart - 1.*self.length * sin(phiArm1) - 0.5*self.length * sin(phiArm2)
252 initialValues[6+1] = self.length * cos(phiArm1) + 0.5*self.length * cos(phiArm2) - 1.5*self.length
253 initialValues[6+2] = phiArm2
254
255 initialValues_t[0] = xCart_t
256 initialValues_t[3+0] = xCart_t - phiArm1_t*0.5*self.length * cos(phiArm1)
257 initialValues_t[3+1] = -0.5*self.length * sin(phiArm1) * phiArm1_t
258 initialValues_t[3+2] = phiArm1_t
259 initialValues_t[6+0] = xCart_t - phiArm1_t*1.*self.length * cos(phiArm1) - phiArm2_t*0.5*self.length * cos(phiArm2)
260 initialValues_t[6+1] = -self.length * sin(phiArm1) * phiArm1_t - 0.5*self.length * sin(phiArm2) * phiArm2_t
261 initialValues_t[6+2] = phiArm2_t
262
263 self.mbs.systemData.SetODE2Coordinates(initialValues, exu.ConfigurationType.Initial)
264 self.mbs.systemData.SetODE2Coordinates_t(initialValues_t, exu.ConfigurationType.Initial)
265
266
267 def reset(
268 self,
269 *,
270 seed: Optional[int] = None,
271 return_info: bool = False,
272 options: Optional[dict] = None,
273 ):
274 #super().reset(seed=seed)
275 self.state = np.random.uniform(low=-0.05, high=0.05, size=(6,))
276 self.steps_beyond_done = None
277
278
279 #+++++++++++++++++++++++++++++++++++++++++++++
280 #set state into initial values:
281 self.setState2InitialValues()
282
283 self.simulationSettings.timeIntegration.endTime = 0
284 self.dynamicSolver.InitializeSolver(self.mbs, self.simulationSettings) #needed to update initial conditions
285
286 #+++++++++++++++++++++++++++++++++++++++++++++
287
288 if not return_info and useOldGym:
289 return np.array(self.state, dtype=np.float32)
290 else:
291 return np.array(self.state, dtype=np.float32), {}
292
293 def render(self, mode="human"):
294 if self.rendererRunning==None and self.useRenderer:
295 exu.StartRenderer()
296 self.rendererRunning = True
297
298 def close(self):
299 self.dynamicSolver.FinalizeSolver(self.mbs, self.simulationSettings)
300 if self.rendererRunning==True:
301 # SC.WaitForRenderEngineStopFlag()
302 exu.StopRenderer() #safely close rendering window!
303
304
305
306# #+++++++++++++++++++++++++++++++++++++++++++++
307# #reset:
308# self.mbs.systemData.SetODE2Coordinates(initialValues, exu.ConfigurationType.Initial)
309# self.mbs.systemData.SetODE2Coordinates_t(initialValues, exu.ConfigurationType.Initial)
310# self.mbs.systemData.SetODE2Coordinates_tt(initialValues, exu.ConfigurationType.Initial)