openAIgymInterfaceTest.py
You can view and download this file on Github: openAIgymInterfaceTest.py
1#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2# This is an EXUDYN example
3#
4# Details: This file shows integration with OpenAI gym by testing a double pendulum example
5#
6# Author: Johannes Gerstmayr
7# Date: 2022-05-18
8#
9# Copyright:This file is part of Exudyn. Exudyn is free software. You can redistribute it and/or modify it under the terms of the Exudyn license. See 'LICENSE.txt' for more details.
10#
11#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
12
13#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
14#to run this example the following worked:
15#conda create -n venvGym python=3.10 numpy matplotlib spyder-kernels=2.4 ipykernel -y
16#pip install gym[spaces]
17#pip install stable-baselines3==1.7.0
18#pip install exudyn
19
20import exudyn as exu
21from exudyn.utilities import * #includes itemInterface and rigidBodyUtilities
22import exudyn.graphics as graphics #only import if it does not conflict
23from exudyn.artificialIntelligence import *
24import math
25
26#%%++++++++++++++++++++++++++++++++++++++++++++++
27
28#**class: interface class to set up Exudyn model which can be used as model in open AI gym;
29# see specific class functions which contain 'OVERRIDE' to integrate your model;
30# in general, set up a model with CreateMBS(), map state to initial values, initial values to state and action to mbs;
31class InvertedDoublePendulumEnv(OpenAIGymInterfaceEnv):
32
33 #**classFunction: OVERRIDE this function to create multibody system mbs and setup simulationSettings; call Assemble() at the end!
34 # you may also change SC.visualizationSettings() individually; kwargs may be used for special setup
35 def CreateMBS(self, SC, mbs, simulationSettings, **kwargs):
36
37 #this model uses kwargs: thresholdFactor
38 thresholdFactor = kwargs['thresholdFactor']
39 gravity = 9.81
40 self.length = 1.
41 width = 0.1*self.length
42 masscart = 1.
43 massarm = 0.1
44 total_mass = massarm + masscart
45 armInertia = self.length**2*0.5*massarm
46 self.force_mag = 10.0
47 self.stepUpdateTime = 0.02 # seconds between state updates
48
49 background = graphics.CheckerBoard(point= [0,0.5*self.length,-0.5*width],
50 normal= [0,0,1], size=10, size2=6, nTiles=20, nTiles2=12)
51
52 oGround=self.mbs.AddObject(ObjectGround(referencePosition= [0,0,0], #x-pos,y-pos,angle
53 visualization=VObjectGround(graphicsData= [background])))
54 nGround=self.mbs.AddNode(NodePointGround())
55
56 gCart = graphics.Brick(size=[0.5*self.length, width, width],
57 color=graphics.color.dodgerblue)
58 self.nCart = self.mbs.AddNode(Rigid2D(referenceCoordinates=[0,0,0]));
59 oCart = self.mbs.AddObject(RigidBody2D(physicsMass=masscart,
60 physicsInertia=0.1*masscart, #not needed
61 nodeNumber=self.nCart,
62 visualization=VObjectRigidBody2D(graphicsData= [gCart])))
63 mCartCOM = self.mbs.AddMarker(MarkerNodePosition(nodeNumber=self.nCart))
64
65 gArm1 = graphics.Brick(size=[width, self.length, width], color=graphics.color.red)
66 gArm1joint = graphics.Cylinder(pAxis=[0,-0.5*self.length,-0.6*width], vAxis=[0,0,1.2*width],
67 radius=0.0625*self.length, color=graphics.color.darkgrey)
68 self.nArm1 = self.mbs.AddNode(Rigid2D(referenceCoordinates=[0,0.5*self.length,0]));
69 oArm1 = self.mbs.AddObject(RigidBody2D(physicsMass=massarm,
70 physicsInertia=armInertia, #not included in original paper
71 nodeNumber=self.nArm1,
72 visualization=VObjectRigidBody2D(graphicsData= [gArm1, gArm1joint])))
73
74 mArm1COM = self.mbs.AddMarker(MarkerNodePosition(nodeNumber=self.nArm1))
75 mArm1JointA = self.mbs.AddMarker(MarkerBodyPosition(bodyNumber=oArm1, localPosition=[0,-0.5*self.length,0]))
76 mArm1JointB = self.mbs.AddMarker(MarkerBodyPosition(bodyNumber=oArm1, localPosition=[0, 0.5*self.length,0]))
77
78 gArm2 = graphics.Brick(size=[width, self.length, width], color=graphics.color.red)
79 self.nArm2 = self.mbs.AddNode(Rigid2D(referenceCoordinates=[0,1.5*self.length,0]));
80 oArm2 = self.mbs.AddObject(RigidBody2D(physicsMass=massarm,
81 physicsInertia=armInertia, #not included in original paper
82 nodeNumber=self.nArm2,
83 visualization=VObjectRigidBody2D(graphicsData= [gArm2, gArm1joint])))
84
85 mArm2COM = self.mbs.AddMarker(MarkerNodePosition(nodeNumber=self.nArm2))
86 mArm2Joint = self.mbs.AddMarker(MarkerBodyPosition(bodyNumber=oArm2, localPosition=[0,-0.5*self.length,0]))
87
88 mCartCoordX = self.mbs.AddMarker(MarkerNodeCoordinate(nodeNumber=self.nCart, coordinate=0))
89 mCartCoordY = self.mbs.AddMarker(MarkerNodeCoordinate(nodeNumber=self.nCart, coordinate=1))
90 mGroundNode = self.mbs.AddMarker(MarkerNodeCoordinate(nodeNumber=nGround, coordinate=0))
91
92 #gravity
93 self.mbs.AddLoad(Force(markerNumber=mCartCOM, loadVector=[0,-masscart*gravity,0]))
94 self.mbs.AddLoad(Force(markerNumber=mArm1COM, loadVector=[0,-massarm*gravity,0]))
95 self.mbs.AddLoad(Force(markerNumber=mArm2COM, loadVector=[0,-massarm*gravity,0]))
96
97 #control force
98 self.lControl = self.mbs.AddLoad(LoadCoordinate(markerNumber=mCartCoordX, load=1.))
99
100 #joints and constraints:
101 self.mbs.AddObject(RevoluteJoint2D(markerNumbers=[mCartCOM, mArm1JointA]))
102 self.mbs.AddObject(RevoluteJoint2D(markerNumbers=[mArm1JointB, mArm2Joint]))
103 self.mbs.AddObject(CoordinateConstraint(markerNumbers=[mCartCoordY, mGroundNode]))
104
105
106
107
108 #%%++++++++++++++++++++++++
109 self.mbs.Assemble() #computes initial vector
110
111 self.simulationSettings.timeIntegration.numberOfSteps = 1
112 self.simulationSettings.timeIntegration.endTime = 0 #will be overwritten in step
113 self.simulationSettings.timeIntegration.verboseMode = 0
114 self.simulationSettings.solutionSettings.writeSolutionToFile = False
115 #self.simulationSettings.timeIntegration.simulateInRealtime = True
116
117 self.simulationSettings.timeIntegration.newton.useModifiedNewton = True
118
119 self.SC.visualizationSettings.general.drawWorldBasis=True
120 self.SC.visualizationSettings.general.graphicsUpdateInterval = 0.01 #50Hz
121 self.SC.visualizationSettings.openGL.multiSampling=4
122
123 #self.simulationSettings.solutionSettings.solutionInformation = "Open AI gym"
124
125 #+++++++++++++++++++++++++++++++++++++++++++++++++++++
126 # Angle at which to fail the episode
127 # these parameters are used in subfunctions
128 self.theta_threshold_radians = thresholdFactor* 12 * 2 * math.pi / 360
129 self.x_threshold = thresholdFactor*2.4
130
131 #must return state size
132 stateSize = 6 #the number of states (position/velocity that are used by learning algorithm)
133 return stateSize
134
135 #**classFunction: OVERRIDE this function to set up self.action_space and self.observation_space
136 def SetupSpaces(self):
137
138 high = np.array(
139 [
140 self.x_threshold * 2,
141 np.finfo(np.float32).max,
142 self.theta_threshold_radians * 2,
143 np.finfo(np.float32).max,
144 self.theta_threshold_radians * 2,
145 np.finfo(np.float32).max,
146 ],
147 dtype=np.float32,
148 )
149
150 #+++++++++++++++++++++++++++++++++++++++++++++++++++++
151 #see https://github.com/openai/gym/blob/64b4b31d8245f6972b3d37270faf69b74908a67d/gym/core.py#L16
152 #for Env:
153 self.action_space = spaces.Discrete(2)
154 self.observation_space = spaces.Box(-high, high, dtype=np.float32)
155 #+++++++++++++++++++++++++++++++++++++++++++++++++++++
156
157
158 #**classFunction: OVERRIDE this function to map the action given by learning algorithm to the multibody system, e.g. as a load parameter
159 def MapAction2MBS(self, action):
160 force = self.force_mag if action == 1 else -self.force_mag
161 self.mbs.SetLoadParameter(self.lControl, 'load', force)
162
163 #**classFunction: OVERRIDE this function to collect output of simulation and map to self.state tuple
164 #**output: return bool done which contains information if system state is outside valid range
165 def Output2StateAndDone(self):
166
167 #+++++++++++++++++++++++++
168 #compute some output:
169 cartPosX = self.mbs.GetNodeOutput(self.nCart, variableType=exu.OutputVariableType.Coordinates)[0]
170 arm1Angle = self.mbs.GetNodeOutput(self.nArm1, variableType=exu.OutputVariableType.Coordinates)[2]
171 arm2Angle = self.mbs.GetNodeOutput(self.nArm2, variableType=exu.OutputVariableType.Coordinates)[2]
172 cartPosX_t = self.mbs.GetNodeOutput(self.nCart, variableType=exu.OutputVariableType.Coordinates_t)[0]
173 arm1Angle_t = self.mbs.GetNodeOutput(self.nArm1, variableType=exu.OutputVariableType.Coordinates_t)[2]
174 arm2Angle_t = self.mbs.GetNodeOutput(self.nArm2, variableType=exu.OutputVariableType.Coordinates_t)[2]
175
176 #finally write updated state:
177 self.state = (cartPosX, cartPosX_t, arm1Angle, arm1Angle_t, arm2Angle, arm2Angle_t)
178 #++++++++++++++++++++++++++++++++++++++++++++++++++
179
180 done = bool(
181 cartPosX < -self.x_threshold
182 or cartPosX > self.x_threshold
183 or arm1Angle < -self.theta_threshold_radians
184 or arm1Angle > self.theta_threshold_radians
185 or arm2Angle < -self.theta_threshold_radians
186 or arm2Angle > self.theta_threshold_radians
187 )
188 return done
189
190
191 #**classFunction: OVERRIDE this function to maps the current state to mbs initial values
192 #**output: return [initialValues, initialValues\_t] where initialValues[\_t] are ODE2 vectors of coordinates[\_t] for the mbs
193 def State2InitialValues(self):
194 #+++++++++++++++++++++++++++++++++++++++++++++
195 #set specific initial state:
196 (xCart, xCart_t, phiArm1, phiArm1_t, phiArm2, phiArm2_t) = self.state
197
198 initialValues = np.zeros(9) #model has 3*3 = 9 redundant states
199 initialValues_t = np.zeros(9)
200
201 #build redundant cordinates from self.state
202 initialValues[0] = xCart
203 initialValues[3+0] = xCart - 0.5*self.length * sin(phiArm1)
204 initialValues[3+1] = 0.5*self.length * (cos(phiArm1)-1)
205 initialValues[3+2] = phiArm1
206 initialValues[6+0] = xCart - 1.*self.length * sin(phiArm1) - 0.5*self.length * sin(phiArm2)
207 initialValues[6+1] = self.length * cos(phiArm1) + 0.5*self.length * cos(phiArm2) - 1.5*self.length
208 initialValues[6+2] = phiArm2
209
210 initialValues_t[0] = xCart_t
211 initialValues_t[3+0] = xCart_t - phiArm1_t*0.5*self.length * cos(phiArm1)
212 initialValues_t[3+1] = -0.5*self.length * sin(phiArm1) * phiArm1_t
213 initialValues_t[3+2] = phiArm1_t
214 initialValues_t[6+0] = xCart_t - phiArm1_t*1.*self.length * cos(phiArm1) - phiArm2_t*0.5*self.length * cos(phiArm2)
215 initialValues_t[6+1] = -self.length * sin(phiArm1) * phiArm1_t - 0.5*self.length * sin(phiArm2) * phiArm2_t
216 initialValues_t[6+2] = phiArm2_t
217
218 return [initialValues,initialValues_t]
219
220
221
222
223
224
225
226
227
228#%%+++++++++++++++++++++++++++++++++++++++++++++
229if __name__ == '__main__': #this is only executed when file is direct called in Python
230 import time
231
232
233 #%%++++++++++++++++++++++++++++++++++++++++++++++++++
234 #create model and do reinforcement learning
235 env = InvertedDoublePendulumEnv(thresholdFactor=1)
236
237 #check if model runs:
238 #env.TestModel(numberOfSteps=1000, seed=42)
239
240 #use some learning algorithm:
241 from stable_baselines3 import A2C
242
243 #main learning task; 1e7 steps take 2-3 hours
244 ts = -time.time()
245 model = A2C('MlpPolicy', env, verbose=1)
246 model.learn(total_timesteps=1e4) #1e6 may work, 2e7 is quite robust!
247 print('*** learning time total =',ts+time.time(),'***')
248
249 #save learned model
250 model.save("openAIgymDoublePendulum1e4")
251 del model
252
253 #%%++++++++++++++++++++++++++++++++++++++++++++++++++
254 #only load and test
255 model = A2C.load("openAIgymDoublePendulum1e5")
256 env = InvertedDoublePendulumEnv(thresholdFactor=15) #larger threshold for testing
257 solutionFile='solution/learningCoordinates.txt'
258 env.TestModel(numberOfSteps=2500, model=model, solutionFileName=solutionFile,
259 stopIfDone=False, useRenderer=True, sleepTime=0) #just compute solution file
260
261 #++++++++++++++++++++++++++++++++++++++++++++++
262 #visualize (and make animations) in exudyn:
263 from exudyn.interactive import SolutionViewer
264 env.SC.visualizationSettings.general.autoFitScene = False
265 solution = LoadSolutionFile(solutionFile)
266 env.mbs.SolutionViewer(solution) #loads solution file via name stored in mbs