openAIgymTriplePendulum.py
You can view and download this file on Github: openAIgymTriplePendulum.py
1#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2# This is an EXUDYN example
3#
4# Details: This file shows integration with OpenAI gym by testing a triple pendulum example
5#
6# Author: Johannes Gerstmayr
7# Date: 2022-05-18
8#
9# Copyright:This file is part of Exudyn. Exudyn is free software. You can redistribute it and/or modify it under the terms of the Exudyn license. See 'LICENSE.txt' for more details.
10#
11#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
12
13
14import exudyn as exu
15from exudyn.utilities import * #includes itemInterface and rigidBodyUtilities
16import exudyn.graphics as graphics #only import if it does not conflict
17from exudyn.artificialIntelligence import *
18import math
19import os
20
21class InvertedTriplePendulumEnv(OpenAIGymInterfaceEnv):
22
23 #**classFunction: OVERRIDE this function to create multibody system mbs and setup simulationSettings; call Assemble() at the end!
24 # you may also change SC.visualizationSettings() individually; kwargs may be used for special setup
25 def CreateMBS(self, SC, mbs, simulationSettings, **kwargs):
26
27 #%%++++++++++++++++++++++++++++++++++++++++++++++
28 #this model uses kwargs: thresholdFactor
29 thresholdFactor = 3
30 if 'thresholdFactor' in kwargs:
31 thresholdFactor = kwargs['thresholdFactor']
32
33 gravity = 9.81
34 self.length = 1.
35 width = 0.1*self.length
36 masscart = 1.
37 massarm = 0.1
38 total_mass = massarm + masscart
39 armInertia = self.length**2*0.5*massarm
40 self.force_mag = 10.0*2 #must be larger for triple pendulum to be more reactive ...
41 self.stepUpdateTime = 0.02 # seconds between state updates
42
43 background = graphics.CheckerBoard(point= [0,0.5*self.length,-0.5*width],
44 normal= [0,0,1], size=10, size2=6, nTiles=20, nTiles2=12)
45
46 oGround=self.mbs.AddObject(ObjectGround(referencePosition= [0,0,0], #x-pos,y-pos,angle
47 visualization=VObjectGround(graphicsData= [background])))
48 nGround=self.mbs.AddNode(NodePointGround())
49
50 gCart = graphics.Brick(size=[0.5*self.length, width, width],
51 color=graphics.color.dodgerblue)
52 self.nCart = self.mbs.AddNode(Rigid2D(referenceCoordinates=[0,0,0]));
53 oCart = self.mbs.AddObject(RigidBody2D(physicsMass=masscart,
54 physicsInertia=0.1*masscart, #not needed
55 nodeNumber=self.nCart,
56 visualization=VObjectRigidBody2D(graphicsData= [gCart])))
57 mCartCOM = self.mbs.AddMarker(MarkerNodePosition(nodeNumber=self.nCart))
58
59 gArm1 = graphics.Brick(size=[width, self.length, width], color=graphics.color.red)
60 gArm1joint = graphics.Cylinder(pAxis=[0,-0.5*self.length,-0.6*width], vAxis=[0,0,1.2*width],
61 radius=0.0625*self.length, color=graphics.color.darkgrey)
62 self.nArm1 = self.mbs.AddNode(Rigid2D(referenceCoordinates=[0,0.5*self.length,0]));
63 oArm1 = self.mbs.AddObject(RigidBody2D(physicsMass=massarm,
64 physicsInertia=armInertia, #not included in original paper
65 nodeNumber=self.nArm1,
66 visualization=VObjectRigidBody2D(graphicsData= [gArm1, gArm1joint])))
67
68 mArm1COM = self.mbs.AddMarker(MarkerNodePosition(nodeNumber=self.nArm1))
69 mArm1JointA = self.mbs.AddMarker(MarkerBodyPosition(bodyNumber=oArm1, localPosition=[0,-0.5*self.length,0]))
70 mArm1JointB = self.mbs.AddMarker(MarkerBodyPosition(bodyNumber=oArm1, localPosition=[0, 0.5*self.length,0]))
71
72 gArm2 = graphics.Brick(size=[width, self.length, width], color=graphics.color.red)
73 self.nArm2 = self.mbs.AddNode(Rigid2D(referenceCoordinates=[0,1.5*self.length,0]));
74 oArm2 = self.mbs.AddObject(RigidBody2D(physicsMass=massarm,
75 physicsInertia=armInertia, #not included in original paper
76 nodeNumber=self.nArm2,
77 visualization=VObjectRigidBody2D(graphicsData= [gArm2, gArm1joint])))
78
79 mArm2COM = self.mbs.AddMarker(MarkerNodePosition(nodeNumber=self.nArm2))
80 mArm2Joint = self.mbs.AddMarker(MarkerBodyPosition(bodyNumber=oArm2, localPosition=[0,-0.5*self.length,0]))
81 mArm2JointB = self.mbs.AddMarker(MarkerBodyPosition(bodyNumber=oArm2, localPosition=[0, 0.5*self.length,0]))
82
83 gArm3 = graphics.Brick(size=[width, self.length, width], color=graphics.color.red)
84 self.nArm3 = self.mbs.AddNode(Rigid2D(referenceCoordinates=[0,2.5*self.length,0]));
85 oArm3 = self.mbs.AddObject(RigidBody2D(physicsMass=massarm,
86 physicsInertia=armInertia, #not included in original paper
87 nodeNumber=self.nArm3,
88 visualization=VObjectRigidBody2D(graphicsData= [gArm3, gArm1joint])))
89
90 mArm3COM = self.mbs.AddMarker(MarkerNodePosition(nodeNumber=self.nArm3))
91 mArm3Joint = self.mbs.AddMarker(MarkerBodyPosition(bodyNumber=oArm3, localPosition=[0,-0.5*self.length,0]))
92
93 mCartCoordX = self.mbs.AddMarker(MarkerNodeCoordinate(nodeNumber=self.nCart, coordinate=0))
94 mCartCoordY = self.mbs.AddMarker(MarkerNodeCoordinate(nodeNumber=self.nCart, coordinate=1))
95 mGroundNode = self.mbs.AddMarker(MarkerNodeCoordinate(nodeNumber=nGround, coordinate=0))
96
97 #gravity
98 self.mbs.AddLoad(Force(markerNumber=mCartCOM, loadVector=[0,-masscart*gravity,0]))
99 self.mbs.AddLoad(Force(markerNumber=mArm1COM, loadVector=[0,-massarm*gravity,0]))
100 self.mbs.AddLoad(Force(markerNumber=mArm2COM, loadVector=[0,-massarm*gravity,0]))
101 self.mbs.AddLoad(Force(markerNumber=mArm3COM, loadVector=[0,-massarm*gravity,0]))
102
103 #control force
104 self.lControl = self.mbs.AddLoad(LoadCoordinate(markerNumber=mCartCoordX, load=1.))
105
106 #joints and constraints:
107 self.mbs.AddObject(RevoluteJoint2D(markerNumbers=[mCartCOM, mArm1JointA]))
108 self.mbs.AddObject(RevoluteJoint2D(markerNumbers=[mArm1JointB, mArm2Joint]))
109 self.mbs.AddObject(RevoluteJoint2D(markerNumbers=[mArm2JointB, mArm3Joint]))
110
111 self.mbs.AddObject(CoordinateConstraint(markerNumbers=[mCartCoordY, mGroundNode]))
112
113
114
115
116 #%%++++++++++++++++++++++++
117 self.mbs.Assemble() #computes initial vector
118
119 self.simulationSettings.timeIntegration.numberOfSteps = 1
120 self.simulationSettings.timeIntegration.endTime = 0 #will be overwritten in step
121 self.simulationSettings.timeIntegration.verboseMode = 0
122 self.simulationSettings.solutionSettings.writeSolutionToFile = False
123 #self.simulationSettings.timeIntegration.simulateInRealtime = True
124
125 self.simulationSettings.timeIntegration.newton.useModifiedNewton = True
126
127 self.SC.visualizationSettings.general.drawWorldBasis=True
128 self.SC.visualizationSettings.general.graphicsUpdateInterval = 0.01 #50Hz
129 self.SC.visualizationSettings.openGL.multiSampling=4
130
131 #self.simulationSettings.solutionSettings.solutionInformation = "Open AI gym"
132
133 #+++++++++++++++++++++++++++++++++++++++++++++++++++++
134 # Angle at which to fail the episode
135 # these parameters are used in subfunctions
136 self.theta_threshold_radians = thresholdFactor* 12 * 2 * math.pi / 360
137 self.x_threshold = thresholdFactor*2.4
138
139 #must return state size
140 stateSize = 8 #the number of states (position/velocity that are used by learning algorithm)
141 return stateSize
142
143 #**classFunction: OVERRIDE this function to set up self.action_space and self.observation_space
144 def SetupSpaces(self):
145
146 high = np.array(
147 [
148 self.x_threshold * 2,
149 np.finfo(np.float32).max,
150 self.theta_threshold_radians * 2,
151 np.finfo(np.float32).max,
152 self.theta_threshold_radians * 2,
153 np.finfo(np.float32).max,
154 self.theta_threshold_radians * 2,
155 np.finfo(np.float32).max,
156 ],
157 dtype=np.float32,
158 )
159
160 #+++++++++++++++++++++++++++++++++++++++++++++++++++++
161 #see https://github.com/openai/gym/blob/64b4b31d8245f6972b3d37270faf69b74908a67d/gym/core.py#L16
162 #for Env:
163 self.action_space = spaces.Discrete(2)
164 self.observation_space = spaces.Box(-high, high, dtype=np.float32)
165 #+++++++++++++++++++++++++++++++++++++++++++++++++++++
166
167
168 #**classFunction: OVERRIDE this function to map the action given by learning algorithm to the multibody system, e.g. as a load parameter
169 def MapAction2MBS(self, action):
170 force = self.force_mag if action == 1 else -self.force_mag
171 self.mbs.SetLoadParameter(self.lControl, 'load', force)
172
173 #**classFunction: OVERRIDE this function to collect output of simulation and map to self.state tuple
174 #**output: return bool done which contains information if system state is outside valid range
175 def Output2StateAndDone(self):
176
177 #+++++++++++++++++++++++++
178 #compute some output:
179 cartPosX = self.mbs.GetNodeOutput(self.nCart, variableType=exu.OutputVariableType.Coordinates)[0]
180 arm1Angle = self.mbs.GetNodeOutput(self.nArm1, variableType=exu.OutputVariableType.Coordinates)[2]
181 arm2Angle = self.mbs.GetNodeOutput(self.nArm2, variableType=exu.OutputVariableType.Coordinates)[2]
182 arm3Angle = self.mbs.GetNodeOutput(self.nArm3, variableType=exu.OutputVariableType.Coordinates)[2]
183 cartPosX_t = self.mbs.GetNodeOutput(self.nCart, variableType=exu.OutputVariableType.Coordinates_t)[0]
184 arm1Angle_t = self.mbs.GetNodeOutput(self.nArm1, variableType=exu.OutputVariableType.Coordinates_t)[2]
185 arm2Angle_t = self.mbs.GetNodeOutput(self.nArm2, variableType=exu.OutputVariableType.Coordinates_t)[2]
186 arm3Angle_t = self.mbs.GetNodeOutput(self.nArm3, variableType=exu.OutputVariableType.Coordinates_t)[2]
187
188 #finally write updated state:
189 self.state = (cartPosX, cartPosX_t, arm1Angle, arm1Angle_t, arm2Angle, arm2Angle_t, arm3Angle, arm3Angle_t)
190 #++++++++++++++++++++++++++++++++++++++++++++++++++
191
192 done = bool(
193 cartPosX < -self.x_threshold
194 or cartPosX > self.x_threshold
195 or arm1Angle < -self.theta_threshold_radians
196 or arm1Angle > self.theta_threshold_radians
197 or arm2Angle < -self.theta_threshold_radians
198 or arm2Angle > self.theta_threshold_radians
199 or arm3Angle < -self.theta_threshold_radians
200 or arm3Angle > self.theta_threshold_radians
201 )
202 return done
203
204
205 #**classFunction: OVERRIDE this function to maps the current state to mbs initial values
206 #**output: return [initialValues, initialValues\_t] where initialValues[\_t] are ODE2 vectors of coordinates[\_t] for the mbs
207 def State2InitialValues(self):
208 #+++++++++++++++++++++++++++++++++++++++++++++
209 #set specific initial state:
210 (xCart, xCart_t, phiArm1, phiArm1_t, phiArm2, phiArm2_t, phiArm3, phiArm3_t) = self.state
211
212 initialValues = np.zeros(12) #model has 4*3 redundant states
213 initialValues_t = np.zeros(12)
214
215 #build redundant cordinates from self.state
216 initialValues[0] = xCart
217 initialValues[3+0] = xCart - 0.5*self.length * sin(phiArm1)
218 initialValues[3+1] = 0.5*self.length * (cos(phiArm1)-1)
219 initialValues[3+2] = phiArm1
220
221 initialValues[6+0] = xCart - self.length * sin(phiArm1) - 0.5*self.length * sin(phiArm2)
222 initialValues[6+1] = self.length * cos(phiArm1) + 0.5*self.length * cos(phiArm2) - 1.5*self.length
223 initialValues[6+2] = phiArm2
224
225 initialValues[9+0] = xCart - self.length * sin(phiArm1) - self.length * sin(phiArm2) - 0.5*self.length * sin(phiArm3)
226 initialValues[9+1] = self.length * cos(phiArm1) + self.length * cos(phiArm2) + 0.5*self.length * cos(phiArm3) - 2.5*self.length
227 initialValues[9+2] = phiArm3
228
229 initialValues_t[0] = xCart_t
230 initialValues_t[3+0] = xCart_t - phiArm1_t*0.5*self.length * cos(phiArm1)
231 initialValues_t[3+1] = -0.5*self.length * sin(phiArm1) * phiArm1_t
232 initialValues_t[3+2] = phiArm1_t
233
234 initialValues_t[6+0] = xCart_t - phiArm1_t*self.length * cos(phiArm1) - phiArm2_t*0.5*self.length * cos(phiArm2)
235 initialValues_t[6+1] = -self.length * sin(phiArm1) * phiArm1_t - 0.5*self.length * sin(phiArm2) * phiArm2_t
236 initialValues_t[6+2] = phiArm2_t
237
238 initialValues_t[9+0] = xCart_t - phiArm1_t*self.length * cos(phiArm1) - phiArm2_t*self.length * cos(phiArm2) - phiArm3_t*0.5*self.length * cos(phiArm3)
239 initialValues_t[9+1] = -self.length * sin(phiArm1) * phiArm1_t - self.length * sin(phiArm2) * phiArm2_t - 0.5*self.length * sin(phiArm3) * phiArm3_t
240 initialValues_t[9+2] = phiArm3_t
241
242 return [initialValues,initialValues_t]
243
244
245
246
247
248
249
250
251
252#%%+++++++++++++++++++++++++++++++++++++++++++++
253if __name__ == '__main__': #this is only executed when file is direct called in Python
254 import time
255
256
257 #%%++++++++++++++++++++++++++++++++++++++++++++++++++
258 #use some learning algorithm:
259 #pip install stable_baselines3
260 from stable_baselines3 import A2C
261
262
263 #create model and do reinforcement learning
264 if False: #'scalar' environment:
265 env = InvertedTriplePendulumEnv() #(thresholdFactor=2)
266 #check if model runs:
267 # env.TestModel(numberOfSteps=1000, seed=42)
268
269 #main learning task; 1e7 steps take 2-3 hours
270 model = A2C('MlpPolicy',
271 env,
272 device='cpu', #usually cpu is faster for this size of networks
273 #device='cuda', #usually cpu is faster for this size of networks
274 verbose=1)
275 ts = -time.time()
276 model.learn(total_timesteps=2000)
277 #model.learn(total_timesteps=2e7) #not sufficient ...
278 print('*** learning time total =',ts+time.time(),'***')
279
280 #save learned model
281 model.save("openAIgymTriplePendulum1e7d")
282 else:
283 #create vectorized environment, which is much faster for time
284 # consuming environments (otherwise learning algo may be the bottleneck)
285 # https://www.programcreek.com/python/example/121472/stable_baselines.common.vec_env.SubprocVecEnv
286 import torch #stable-baselines3 is based on pytorch
287 n_cores= max(1,int(os.cpu_count()/2)) #n_cores should be number of real cores (not threads)
288 #n_cores=14 #should be number of real cores (not threads)
289 torch.set_num_threads(n_cores) #seems to be ideal to match the size of subprocVecEnv
290
291 #test problem with nSteps=400 in time integration
292 #1 core: learning time total = 28.73 seconds
293 #4 core: learning time total = 8.10
294 #8 core: learning time total = 4.48
295 #14 core:learning time total = 3.77
296 #standard DummyVecEnv version: 15.14 seconds
297 print('using',n_cores,'cores')
298
299 from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
300 vecEnv = SubprocVecEnv([InvertedTriplePendulumEnv for i in range(n_cores)])
301
302
303 #main learning task; 1e7 steps take 2-3 hours
304 model = A2C('MlpPolicy',
305 vecEnv,
306 device='cpu', #usually cpu is faster for this size of networks
307 #device='cuda', #optimal with 64 SubprocVecEnv, torch.set_num_threads(1)
308 verbose=1)
309 ts = -time.time()
310 print('start learning...')
311 #model.learn(total_timesteps=50000)
312 model.learn(total_timesteps=7e7) #not sufficient ...
313 print('*** learning time total =',ts+time.time(),'***')
314
315 #save learned model
316 model.save("openAIgymTriplePendulum1e7d")
317
318 if False:
319 #%%++++++++++++++++++++++++++++++++++++++++++++++++++
320 #only load and test
321 model = A2C.load("openAIgymTriplePendulum1e7")
322 env = InvertedTriplePendulumEnv(thresholdFactor=15) #larger threshold for testing
323 solutionFile='solution/learningCoordinates.txt'
324 env.TestModel(numberOfSteps=2500, model=model, solutionFileName=solutionFile,
325 stopIfDone=False, useRenderer=False, sleepTime=0) #just compute solution file
326
327 #++++++++++++++++++++++++++++++++++++++++++++++
328 #visualize (and make animations) in exudyn:
329 from exudyn.interactive import SolutionViewer
330 env.SC.visualizationSettings.general.autoFitScene = False
331 solution = LoadSolutionFile(solutionFile)
332 SolutionViewer(env.mbs, solution) #loads solution file via name stored in mbs