reinforcementLearningRobot.py
You can view and download this file on Github: reinforcementLearningRobot.py
1#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2# This is an EXUDYN example
3#
4# Details: Reinforcement learning example with stable-baselines3;
5# training a mobile platform with differential drives to meet target points
6# NOTE: frictional contact requires small enough step size to avoid artifacts!
7# GeneralContact works less stable than RollingDisc objects
8#
9# Author: Johannes Gerstmayr
10# Date: 2024-04-27
11#
12# Copyright:This file is part of Exudyn. Exudyn is free software. You can redistribute it and/or modify it under the terms of the Exudyn license. See 'LICENSE.txt' for more details.
13#
14#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
15
16#NOTE: this model is using the stable-baselines3 version 1.7.0, which requires:
17#pip install exudyn
18#pip install pip install wheel==0.38.4 setuptools==66.0.0
19# => this downgrades setuptools to be able to install gym==0.21
20#pip install stable-baselines3==1.7.0
21#tested within a virtual environment: conda create -n venvP311 python=3.11 scipy matplotlib tqdm spyder-kernels=2.5 ipykernel psutil -y
22
23import sys
24sys.exudynFast = True
25
26import exudyn as exu
27from exudyn.utilities import * #includes itemInterface and rigidBodyUtilities
28import exudyn.graphics as graphics #only import if it does not conflict
29from exudyn.robotics import *
30from exudyn.artificialIntelligence import *
31import math
32
33import copy
34import os
35os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
36import torch
37
38import stable_baselines3
39useOldGym = tuple(map(int, stable_baselines3.__version__.split('.'))) <= tuple(map(int, '1.8.0'.split('.')))
40
41##%% here the number of links can be changed. Note that for n < 3 the actuator
42
43#**function: Add model of differential drive robot (two wheels which can be actuated independently);
44# model is parameterized for kinematics, inertia parameters as well as for graphics;
45# the model is created as a minimum coordinate model to use it together with explicit integration;
46# a contact model is added if it does not exist;
47def DifferentialDriveRobot(SC, mbs,
48 platformInertia = None,
49 wheelInertia = None,
50 platformPosition = [0,0,0], #this is the location of the platform ground centerpoint
51 wheelDistance = 0.4, #wheel midpoint-to-midpoint distance
52 platformHeight = 0.1,
53 platformRadius = 0.22,
54 platformMass = 5,
55 platformGroundOffset = 0.02,
56 planarPlatform = True,
57 dimGroundX = 8, dimGroundY = 8,
58 gravity = [0,0,-9.81],
59 wheelRadius = 0.04,
60 wheelThickness = 0.01,
61 wheelMass = 0.05,
62 pControl = 0,
63 dControl = 0.02,
64 usePenalty = True, #use penalty formulation in case useGeneralContact=False
65 frictionProportionalZone = 0.025,
66 frictionCoeff = 1, stiffnessGround = 1e5,
67 gContact = None,
68 frictionIndexWheel = None, frictionIndexFree = None,
69 useGeneralContact = False #generalcontact shows large errors currently
70 ):
71
72 #add class which can be returned to enable user to access parameters
73 class ddr: pass
74
75 #+++++++++++++++++++++++++++++++++++++++++++
76 #create contact (if not provided)
77 ddr.gGround = graphics.CheckerBoard(normal= [0,0,1],
78 size=dimGroundX, size2=dimGroundY, nTiles=8)
79
80 ddr.oGround= mbs.AddObject(ObjectGround(referencePosition= [0,0,0],
81 visualization=VObjectGround(graphicsData= [ddr.gGround])))
82 ddr.mGround = mbs.AddMarker(MarkerBodyRigid(bodyNumber=ddr.oGround))
83
84
85 ddr.frictionCoeff = frictionCoeff
86 ddr.stiffnessGround = stiffnessGround
87 ddr.dampingGround = ddr.stiffnessGround*0.01
88 if gContact == None and useGeneralContact:
89 frictionIndexGround = 0
90 frictionIndexWheel = 0
91 frictionIndexFree = 1
92
93 ddr.gContact = mbs.AddGeneralContact()
94 ddr.gContact.frictionProportionalZone = frictionProportionalZone
95 #ddr.gContact.frictionVelocityPenalty = 1e4
96
97 ddr.gContact.SetFrictionPairings(np.diag([ddr.frictionCoeff,0])) #second index is for frictionless contact
98 ddr.gContact.SetSearchTreeCellSize(numberOfCells=[4,4,1]) #just a few contact cells
99
100 #add ground to contact
101 [meshPoints, meshTrigs] = graphics.ToPointsAndTrigs(ddr.gGround) #could also use only 1 quad ...
102
103 ddr.gContact.AddTrianglesRigidBodyBased( rigidBodyMarkerIndex = ddr.mGround,
104 contactStiffness = ddr.stiffnessGround, contactDamping = ddr.dampingGround,
105 frictionMaterialIndex = frictionIndexGround,
106 pointList=meshPoints, triangleList=meshTrigs)
107
108
109 #+++++++++++++++++++++++++++++++++++++++++++
110 #create inertias (if not provided)
111 if wheelInertia == None:
112 ddr.iWheel = InertiaCylinder(wheelMass/(wheelRadius**2*pi*wheelThickness),
113 wheelThickness, wheelRadius, axis=0) #rotation about local X-axis
114 else:
115 ddr.iWheel = RigidBodyInertia(mass=wheelMass, inertiaTensorAtCOM=np.diag(wheelInertia))
116
117 if platformInertia == None:
118 ddr.iPlatform = InertiaCylinder(platformMass/(platformRadius**2*pi*platformHeight),
119 platformHeight, platformRadius, axis=0) #rotation about local X-axis
120 ddr.iPlatform = ddr.iPlatform.Translated([0,0,0.5*platformHeight+platformGroundOffset]) #put COM at mid of platform; but referencepoint is at ground level!
121 else:
122 ddr.iPlatform = RigidBodyInertia(mass=platformMass, inertiaTensorAtCOM=np.diag(platformInertia))
123
124 #+++++++++++++++++++++++++++++++++++++++++++
125 #create kinematic tree for wheeled robot
126 ddr.gPlatform = [graphics.Cylinder([0,0,platformGroundOffset], [0,0,platformHeight], platformRadius, color=graphics.color.steelblue, nTiles=64, addEdges=True, addFaces=False)]
127 ddr.gPlatform += [graphics.Cylinder([0,platformRadius*0.8,platformGroundOffset*1.5], [0,0,platformHeight], platformRadius*0.2, color=graphics.color.grey)]
128 ddr.gPlatform += [graphics.Basis(length=0.1)]
129 ddr.gWheel = [graphics.Cylinder([-wheelThickness*0.5,0,0], [wheelThickness,0,0], wheelRadius, color=graphics.color.red, nTiles=32)]
130 ddr.gWheel += [graphics.Brick([0,0,0],[wheelThickness*1.1,wheelRadius*1.3,wheelRadius*1.3], color=graphics.color.grey)]
131 ddr.gWheel += [graphics.Basis(length=0.075)]
132
133 #create node for unknowns of KinematicTree
134 ddr.nJoints = 3+3+2 - 3*planarPlatform #6 (3 in planar case) for the platform and 2 for the wheels;
135 referenceCoordinates=[0.]*ddr.nJoints
136 referenceCoordinates[0:len(platformPosition)] = platformPosition
137 ddr.nKT = mbs.AddNode(NodeGenericODE2(referenceCoordinates=referenceCoordinates,
138 initialCoordinates=[0.]*ddr.nJoints,
139 initialCoordinates_t=[0.]*ddr.nJoints,
140 numberOfODE2Coordinates=ddr.nJoints))
141
142 ddr.linkMasses = []
143 ddr.gList = [] #list of graphics objects for links
144 ddr.linkCOMs = exu.Vector3DList()
145 ddr.linkInertiasCOM=exu.Matrix3DList()
146 ddr.jointTransformations=exu.Matrix3DList()
147 ddr.jointOffsets = exu.Vector3DList()
148 ddr.jointTypes = [exu.JointType.PrismaticX,exu.JointType.PrismaticY,exu.JointType.RevoluteZ]
149 ddr.linkParents = list(np.arange(3)-1)
150
151 ddr.platformIndex = 2
152 if not planarPlatform:
153 ddr.jointTypes+=[exu.JointType.PrismaticZ,exu.JointType.RevoluteY,exu.JointType.RevoluteX]
154 ddr.linkParents+=[2,3,4]
155 ddr.platformIndex = 5
156
157 #add data for wheels:
158 ddr.jointTypes += [exu.JointType.RevoluteX]*2
159 ddr.linkParents += [ddr.platformIndex]*2
160
161 #now create offsets, graphics list and inertia for all links
162 for i in range(len(ddr.jointTypes)):
163 ddr.jointTransformations.Append(np.eye(3))
164
165 if i < ddr.platformIndex:
166 ddr.gList += [[]]
167 ddr.jointOffsets.Append([0,0,0])
168 ddr.linkInertiasCOM.Append(np.zeros([3,3]))
169 ddr.linkCOMs.Append([0,0,0])
170 ddr.linkMasses.append(0)
171 elif i == ddr.platformIndex:
172 ddr.gList += [ddr.gPlatform]
173 ddr.jointOffsets.Append([0,0,0])
174 ddr.linkInertiasCOM.Append(ddr.iPlatform.InertiaCOM())
175 ddr.linkCOMs.Append(ddr.iPlatform.COM())
176 ddr.linkMasses.append(ddr.iPlatform.Mass())
177 else:
178 ddr.gList += [ddr.gWheel]
179 sign = -1+(i>ddr.platformIndex+1)*2
180 offZ = wheelRadius
181 if planarPlatform and (useGeneralContact or usePenalty):
182 offZ *= 0.999 #to ensure contact
183 ddr.jointOffsets.Append([sign*wheelDistance*0.5,0,offZ])
184 ddr.linkInertiasCOM.Append(ddr.iWheel.InertiaCOM())
185 ddr.linkCOMs.Append(ddr.iWheel.COM())
186 ddr.linkMasses.append(ddr.iWheel.Mass())
187
188
189 ddr.jointDControlVector = [0]*ddr.nJoints
190 ddr.jointPControlVector = [0]*ddr.nJoints
191 ddr.jointPositionOffsetVector = [0]*ddr.nJoints
192 ddr.jointVelocityOffsetVector = [0]*ddr.nJoints
193
194 ddr.jointPControlVector[-2:] = [pControl]*2
195 ddr.jointDControlVector[-2:] = [dControl]*2
196
197
198 #create KinematicTree
199 ddr.oKT = mbs.AddObject(ObjectKinematicTree(nodeNumber=ddr.nKT,
200 jointTypes=ddr.jointTypes,
201 linkParents=ddr.linkParents,
202 jointTransformations=ddr.jointTransformations,
203 jointOffsets=ddr.jointOffsets,
204 linkInertiasCOM=ddr.linkInertiasCOM,
205 linkCOMs=ddr.linkCOMs,
206 linkMasses=ddr.linkMasses,
207 baseOffset = [0.,0.,0.], gravity=gravity,
208 jointPControlVector=ddr.jointPControlVector,
209 jointDControlVector=ddr.jointDControlVector,
210 jointPositionOffsetVector=ddr.jointPositionOffsetVector,
211 jointVelocityOffsetVector=ddr.jointVelocityOffsetVector,
212 visualization=VObjectKinematicTree(graphicsDataList = ddr.gList)))
213
214 ddr.sPlatformPos = mbs.AddSensor(SensorKinematicTree(objectNumber=ddr.oKT, linkNumber = ddr.platformIndex,
215 storeInternal=True, outputVariableType=exu.OutputVariableType.Position))
216 ddr.sPlatformVel = mbs.AddSensor(SensorKinematicTree(objectNumber=ddr.oKT, linkNumber = ddr.platformIndex,
217 storeInternal=True, outputVariableType=exu.OutputVariableType.Velocity))
218 ddr.sPlatformAng = mbs.AddSensor(SensorKinematicTree(objectNumber=ddr.oKT, linkNumber = ddr.platformIndex,
219 storeInternal=True, outputVariableType=exu.OutputVariableType.Rotation))
220 ddr.sPlatformAngVel = mbs.AddSensor(SensorKinematicTree(objectNumber=ddr.oKT, linkNumber = ddr.platformIndex,
221 storeInternal=True, outputVariableType=exu.OutputVariableType.AngularVelocity))
222
223 #create markers for wheels and add contact
224 ddr.mWheels = []
225 for i in range(2):
226 mWheel = mbs.AddMarker(MarkerKinematicTreeRigid(objectNumber=ddr.oKT,
227 linkNumber=ddr.platformIndex+1+i,
228 localPosition=[0,0,0]))
229 ddr.mWheels.append(mWheel)
230 if useGeneralContact:
231 ddr.gContact.AddSphereWithMarker(mWheel,
232 radius=wheelRadius,
233 contactStiffness=ddr.stiffnessGround,
234 contactDamping=ddr.dampingGround,
235 frictionMaterialIndex=frictionIndexWheel)
236 #for 3D platform, we need additional support points:
237 if not planarPlatform:
238 rY = platformRadius-platformGroundOffset
239 mPlatformFront = mbs.AddMarker(MarkerKinematicTreeRigid(objectNumber=ddr.oKT,
240 linkNumber=ddr.platformIndex,
241 localPosition=[0,rY,1.01*platformGroundOffset]))
242 mPlatformBack = mbs.AddMarker(MarkerKinematicTreeRigid(objectNumber=ddr.oKT,
243 linkNumber=ddr.platformIndex,
244 localPosition=[0,-rY,1.01*platformGroundOffset]))
245
246 fact = 1
247 ddr.gContact.AddSphereWithMarker(mPlatformFront,
248 radius=platformGroundOffset,
249 contactStiffness=ddr.stiffnessGround*fact,
250 contactDamping=ddr.dampingGround*fact,
251 frictionMaterialIndex=frictionIndexFree)
252 ddr.gContact.AddSphereWithMarker(mPlatformBack,
253 radius=platformGroundOffset,
254 contactStiffness=ddr.stiffnessGround*fact,
255 contactDamping=ddr.dampingGround*fact,
256 frictionMaterialIndex=frictionIndexFree)
257 else:
258 if not planarPlatform:
259 raise ValueError('DifferentialDriveRobot: if useGeneralContact==False then planarPlatform must be True!')
260 if not usePenalty:
261 ddr.oRollingDisc = mbs.AddObject(ObjectJointRollingDisc(markerNumbers=[ddr.mGround , mWheel],
262 constrainedAxes=[i,1,1-planarPlatform], discRadius=wheelRadius,
263 visualization=VObjectJointRollingDisc(discWidth=wheelThickness,color=graphics.color.blue)))
264 else:
265 nGeneric = mbs.AddNode(NodeGenericData(initialCoordinates=[0,0,0], numberOfDataCoordinates=3))
266 ddr.oRollingDisc = mbs.AddObject(ObjectConnectorRollingDiscPenalty(markerNumbers=[ddr.mGround , mWheel],
267 nodeNumber = nGeneric,
268 discRadius=wheelRadius,
269 useLinearProportionalZone=True,
270 dryFrictionProportionalZone=0.05,
271 contactStiffness=ddr.stiffnessGround,
272 contactDamping=ddr.dampingGround,
273 dryFriction=[ddr.frictionCoeff]*2,
274 visualization=VObjectConnectorRollingDiscPenalty(discWidth=wheelThickness,color=graphics.color.blue)))
275
276
277
278 #compute wheel velocities for given forward and rotation velocity
279 def WheelVelocities(forwardVelocity, vRotation, wheelRadius, wheelDistance):
280 vLeft = -forwardVelocity/wheelRadius
281 vRight = vLeft
282 vOff = vRotation*wheelDistance*0.5/wheelRadius
283 vLeft += vOff
284 vRight -= vOff
285 return [vLeft, vRight]
286
287 ddr.WheelVelocities = WheelVelocities
288
289 #add some useful graphics settings
290
291 SC.visualizationSettings.general.circleTiling=200
292 SC.visualizationSettings.general.drawCoordinateSystem=True
293 SC.visualizationSettings.loads.show=False
294 SC.visualizationSettings.bodies.show=True
295 SC.visualizationSettings.markers.show=False
296 SC.visualizationSettings.bodies.kinematicTree.frameSize = 0.1
297 SC.visualizationSettings.bodies.kinematicTree.showJointFrames = False
298
299 SC.visualizationSettings.nodes.show=True
300 # SC.visualizationSettings.nodes.showBasis =True
301 SC.visualizationSettings.nodes.drawNodesAsPoint = False
302 SC.visualizationSettings.nodes.defaultSize = 0 #must not be -1, otherwise uses autocomputed size
303
304 SC.visualizationSettings.openGL.multiSampling = 4
305 # SC.visualizationSettings.openGL.shadow = 0.25
306 #SC.visualizationSettings.openGL.light0position = [-3,3,10,0]
307 # SC.visualizationSettings.contact.showBoundingBoxes = True
308 SC.visualizationSettings.contact.showTriangles = True
309 SC.visualizationSettings.contact.showSpheres = True
310
311 return ddr
312
313#%%++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
314#for testing with a simple trajectory:
315if False:
316 SC = exu.SystemContainer()
317 mbs = SC.AddSystem()
318
319 useGeneralContact = False
320 usePenalty = True
321 wheelRadius = 0.04
322 wheelDistance = 0.4
323 ddr = DifferentialDriveRobot(SC, mbs,useGeneralContact=useGeneralContact,
324 usePenalty=usePenalty, planarPlatform=True,
325 wheelRadius=wheelRadius, wheelDistance=wheelDistance)
326 mbs.Assemble()
327
328 #create some nice trajectory
329 def PreStepUserFunction(mbs, t):
330 vSet = ddr.jointVelocityOffsetVector #nominal values
331 vSet[-2:] = [0,0]
332
333 if t < 2:
334 vSet[-2:] = ddr.WheelVelocities(0.5, 0, wheelRadius, wheelDistance)
335 elif t < 3: pass
336 elif t < 4:
337 vSet[-2:] = ddr.WheelVelocities(0, 0.5*pi, wheelRadius, wheelDistance)
338 elif t < 5: pass
339 elif t < 7:
340 vSet[-2:] = ddr.WheelVelocities(-1, 0, wheelRadius, wheelDistance)
341 elif t < 8: pass
342 elif t < 9:
343 vSet[-2:] = ddr.WheelVelocities(0.5, 0.5*pi, wheelRadius, wheelDistance)
344
345 mbs.SetObjectParameter(ddr.oKT, "jointVelocityOffsetVector", vSet)
346
347 return True
348
349 mbs.SetPreStepUserFunction(PreStepUserFunction)
350
351 tEnd = 12 #tEnd = 0.8 for test suite
352 stepSize = 0.002 #h= 0.0002 for test suite
353 if useGeneralContact or usePenalty:
354 stepSize = 2e-4
355 # h*=0.1
356 # tEnd*=3
357 simulationSettings = exu.SimulationSettings()
358 simulationSettings.solutionSettings.solutionWritePeriod = 0.01
359 simulationSettings.solutionSettings.writeSolutionToFile = False
360 simulationSettings.solutionSettings.coordinatesSolutionFileName = 'solution/coordinatesSolution.txt'
361
362 simulationSettings.solutionSettings.sensorsWritePeriod = stepSize*10
363 # simulationSettings.displayComputationTime = True
364 # simulationSettings.displayStatistics = True
365 # simulationSettings.timeIntegration.verboseMode = 1
366 #simulationSettings.timeIntegration.simulateInRealtime = True
367 simulationSettings.timeIntegration.discontinuous.maxIterations = 1 #speed up
368 #simulationSettings.timeIntegration.discontinuous.iterationTolerance = 1e-5
369
370
371 exu.StartRenderer()
372 if 'renderState' in exu.sys:
373 SC.SetRenderState(exu.sys['renderState'])
374 mbs.WaitForUserToContinue()
375
376 simulationSettings.timeIntegration.numberOfSteps = int(tEnd/stepSize)
377 simulationSettings.timeIntegration.endTime = tEnd
378 simulationSettings.timeIntegration.explicitIntegration.computeEndOfStepAccelerations = False #increase performance, accelerations less accurate
379
380 SC.visualizationSettings.window.renderWindowSize=[1600,1024]
381 SC.visualizationSettings.general.graphicsUpdateInterval = 0.02
382
383 if useGeneralContact or usePenalty:
384 mbs.SolveDynamic(simulationSettings, solverType=exu.DynamicSolverType.ExplicitEuler)
385 # mbs.SolveDynamic(simulationSettings, solverType=exu.DynamicSolverType.ExplicitMidpoint)
386 else:
387 mbs.SolveDynamic(simulationSettings)
388
389 SC.WaitForRenderEngineStopFlag()
390 exu.StopRenderer() #safely close rendering window!
391
392 if True:
393 mbs.PlotSensor(ddr.sPlatformVel, components=[0,1],closeAll=True)
394 mbs.PlotSensor(ddr.sPlatformAngVel, components=[0,1,2])
395
396def Rot2D(phi):
397 return np.array([[np.cos(phi),-np.sin(phi)],
398 [np.sin(phi), np.cos(phi)]])
399
400
401#%%++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
402class RobotEnv(OpenAIGymInterfaceEnv):
403
404 #**classFunction: OVERRIDE this function to create multibody system mbs and setup simulationSettings; call Assemble() at the end!
405 # you may also change SC.visualizationSettings() individually; kwargs may be used for special setup
406 def CreateMBS(self, SC, mbs, simulationSettings, **kwargs):
407
408 #%%++++++++++++++++++++++++++++++++++++++++++++++
409 self.mbs = mbs
410 self.SC = SC
411
412 self.dimGroundX = 4 #dimension of ground
413 self.dimGroundY = 4
414 self.maxRotations = 0.6 #maximum number before learning stops
415
416 self.maxWheelSpeed = 2*pi #2*pi = 1 revolution/second
417 self.wheelRadius = 0.04
418 self.wheelDistance = 0.4
419 self.maxVelocity = self.wheelRadius * self.maxWheelSpeed
420 self.maxPlatformAngVel = self.wheelRadius/(self.wheelDistance*0.5)*self.maxWheelSpeed
421
422 useGeneralContact = False
423 usePenalty = True
424 ddr = DifferentialDriveRobot(SC, mbs,useGeneralContact=useGeneralContact,
425 dimGroundX=self.dimGroundY, dimGroundY=self.dimGroundY,
426 usePenalty=usePenalty,
427 planarPlatform=True,
428 stiffnessGround=1e4,
429 wheelRadius=self.wheelRadius,
430 wheelDistance=self.wheelDistance)
431
432 self.ddr = ddr
433 self.oKT = ddr.oKT
434 self.nKT = ddr.nKT
435
436 #add graphics for desination
437 gDestination = graphics.Sphere(point=[0,0,0.05],radius = 0.02, color=graphics.color.red, nTiles=16)
438 self.oDestination = mbs.CreateGround(graphicsDataList=[gDestination])
439
440 mbs.Assemble()
441 self.stepSize = 1e-3
442 self.stepUpdateTime = 0.05
443
444 simulationSettings.solutionSettings.solutionWritePeriod = 0.1
445
446 writeSolutionToFile = False
447 if 'writeSolutionToFile' in kwargs:
448 writeSolutionToFile = kwargs['writeSolutionToFile']
449
450 useGraphics = False
451 if 'useGraphics' in kwargs:
452 useGraphics = kwargs['useGraphics']
453
454 simulationSettings.solutionSettings.writeSolutionToFile = writeSolutionToFile
455 simulationSettings.solutionSettings.writeSolutionToFile = False
456
457 simulationSettings.solutionSettings.coordinatesSolutionFileName = 'solution/coordinatesSolution.txt'
458
459 # simulationSettings.displayComputationTime = True
460 #simulationSettings.displayStatistics = True
461 #simulationSettings.timeIntegration.verboseMode = 1
462 #simulationSettings.timeIntegration.simulateInRealtime = True
463 simulationSettings.timeIntegration.discontinuous.maxIterations = 1 #speed up
464 #simulationSettings.timeIntegration.discontinuous.iterationTolerance = 1e-5
465
466
467 simulationSettings.timeIntegration.numberOfSteps = int(self.stepUpdateTime/self.stepSize)
468 simulationSettings.timeIntegration.endTime = self.stepUpdateTime
469 simulationSettings.timeIntegration.explicitIntegration.computeEndOfStepAccelerations = False #increase performance, accelerations less accurate
470
471 SC.visualizationSettings.window.renderWindowSize=[1600,1024]
472 SC.visualizationSettings.general.graphicsUpdateInterval = 0.02
473
474 #+++++++++++++++++++++++++++++++++++++++++++++++++++++
475 self.randomInitializationValue = [0.4*self.dimGroundX, 0.4*self.dimGroundY, self.maxRotations*2*pi*0.99,
476 self.maxVelocity*0,self.maxVelocity*0,self.maxPlatformAngVel*0,
477 0.3*self.dimGroundX, 0.3*self.dimGroundY, #destination points
478 ]
479
480 #must return state size
481 self.numberOfStates = 3 #posx, posy, rot
482 self.destinationStates = 2 #define here, if destination is included in states
483 self.destination = [0.,0.] #default value for destination
484
485 return self.destinationStates + self.numberOfStates * 2 #the number of states (position/velocity that are used by learning algorithm)
486
487 #**classFunction: OVERRIDE this function to set up self.action_space and self.observation_space
488 def SetupSpaces(self):
489
490 high = np.array(
491 [
492 self.dimGroundX*0.5,
493 self.dimGroundY*0.5,
494 2*pi*self.maxRotations #10 full revolutions; no more should be needed for any task
495 ] +
496 [
497 np.finfo(np.float32).max,
498 ] * self.numberOfStates +
499 [self.dimGroundX*0.5,
500 self.dimGroundY*0.5]*(self.destinationStates>0)
501 ,
502 dtype=np.float32,
503 )
504
505
506 #+++++++++++++++++++++++++++++++++++++++++++++++++++++
507 #see https://github.com/openai/gym/blob/64b4b31d8245f6972b3d37270faf69b74908a67d/gym/core.py#L16
508 #for Env:
509
510 self.action_space = spaces.Box(low=np.array([-self.maxWheelSpeed,-self.maxWheelSpeed], dtype=np.float32),
511 high=np.array([self.maxWheelSpeed,self.maxWheelSpeed], dtype=np.float32), dtype=np.float32)
512
513 self.observation_space = spaces.Box(-high, high, dtype=np.float32)
514 #+++++++++++++++++++++++++++++++++++++++++++++++++++++
515
516
517 #**classFunction: this function is overwritten to map the action given by learning algorithm to the multibody system (environment)
518 def MapAction2MBS(self, action):
519 # force = action[0] * self.force_mag
520 # self.mbs.SetLoadParameter(self.lControl, 'load', force)
521 vSet = self.ddr.jointVelocityOffsetVector #nominal values
522 vSet[-2:] = action
523 # vSet[-1] = vSet[-2]
524 # vSet[-2:] = [2,2.5]
525 # print('action:', action)
526
527 self.mbs.SetObjectParameter(self.oKT, "jointVelocityOffsetVector", vSet)
528
529
530 #**classFunction: this function is overwrritten to collect output of simulation and map to self.state tuple
531 #**output: return bool done which contains information if system state is outside valid range
532 def Output2StateAndDone(self):
533
534 #+++++++++++++++++++++++++
535 #implemented for planar model only!
536 statesVector = self.mbs.GetNodeOutput(self.nKT, variableType=exu.OutputVariableType.Coordinates)[0:self.numberOfStates]
537 statesVectorGlob_t = self.mbs.GetNodeOutput(self.nKT, variableType=exu.OutputVariableType.Coordinates_t)[0:self.numberOfStates]
538
539 # vLoc = Rot2D(statesVector[2]).T @ statesVectorGlob_t[0:2]
540 # print('vLoc=',vLoc)
541 # statesVector_t = np.array([vLoc[1], statesVectorGlob_t[2]])
542 statesVector_t = statesVectorGlob_t #change to local in future!
543
544 self.state = list(statesVector) + list(statesVector_t)
545 if self.destinationStates:
546 self.state += list(self.destination)
547 self.state = tuple(self.state)
548
549 done = bool(
550 statesVector[0] < -self.dimGroundX
551 or statesVector[0] > self.dimGroundX
552 or statesVector[1] < -self.dimGroundY
553 or statesVector[1] > self.dimGroundY
554 or statesVector[2] < -self.maxRotations*2*pi
555 or statesVector[2] > self.maxRotations*2*pi
556 )
557
558 return done
559
560
561 #**classFunction: OVERRIDE this function to map the current state to mbs initial values
562 #**output: return [initialValues, initialValues\_t] where initialValues[\_t] are ODE2 vectors of coordinates[\_t] for the mbs
563 def State2InitialValues(self):
564 #+++++++++++++++++++++++++++++++++++++++++++++
565 #states: x, y, phi, x_t, y_t, phi_t
566 initialValues = list(self.state[0:self.numberOfStates])+[0,0] #wheels do not initialize
567 initialValues_t = list(self.state[self.numberOfStates:2*self.numberOfStates])+[0,0]
568
569 if self.destinationStates:
570 if self.destination[0] != self.state[-2] or self.destination[1] != self.state[-1]:
571 # print('set new destination:', self.destination)
572 self.destination = self.state[-2:] #last two values are destination
573 self.mbs.SetObjectParameter(self.oDestination, 'referencePosition',
574 list(self.destination)+[0])
575
576 return [initialValues,initialValues_t]
577
578 def getReward(self):
579 X = self.dimGroundX
580 Y = self.dimGroundY
581 v = np.array([self.destination[0] - self.state[0], self.destination[1] - self.state[1]])
582 dist = NormL2(v)
583
584 phi = self.state[2]
585 localSpeed = Rot2D(phi).T @ [self.state[3],self.state[4]]
586 forwardSpeed = localSpeed[1]
587
588 reward = 1
589 #take power of 0.5 of dist to penalize small distances
590 #reward -= (dist/(0.5*NormL2([X,Y])))**0.5
591
592 #add penalty on rotations at a certain time (at beginning rotation may be needed...)
593 #reward -= 0.2*abs(self.state[5])/self.maxPlatformAngVel
594 # t = self.mbs.systemData.GetTime()
595 # if t > 4:
596 # fact = 1
597 # if t < 5: fact = 5-t
598 # reward -= fact*0.1*abs(self.state[5])/self.maxPlatformAngVel
599
600 #add penalty on reverse velocity: this supports solutions in forward direction!
601 # backwardMaxSpeed = 0.1
602 # if forwardSpeed < -backwardMaxSpeed*self.maxVelocity:
603 # reward -= abs(forwardSpeed)/self.maxVelocity+backwardMaxSpeed
604
605 reward -= 0.5*abs(forwardSpeed)
606
607 if dist > 0:
608 v0 = v*(1/dist)
609 vDir = Rot2D(phi) @ [0,1]
610 # print('v0=',v0,', dir=',vDir)
611 reward -= NormL2(vDir-v0)*0.5
612
613 # print('rew=', round(reward,3), ', vF=', round(0.5*abs(forwardSpeed),3),
614 # ', dir=', round(NormL2(vDir-v0)*0.5,4),
615 # 'v0=', v0, 'vDir=',vDir)
616
617 #reward -= max(0,abs(self.state[2])-pi)/(4*pi)
618 if reward < 0: reward = 0
619
620 # print('forwardSpeed',round(forwardSpeed/self.maxVelocity,3),
621 # ', reward',round(reward,3))
622
623 # print('reward=',reward, ', t=', self.mbs.systemData.GetTime())
624 return reward
625
626 #**classFunction: openAI gym interface function which is called to compute one step
627 def step(self, action):
628 err_msg = f"{action!r} ({type(action)}) invalid"
629 assert self.action_space.contains(action), err_msg
630 assert self.state is not None, "Call reset before using step method."
631
632 #++++++++++++++++++++++++++++++++++++++++++++++++++
633 #main steps:
634 [initialValues,initialValues_t] = self.State2InitialValues()
635 # print('initialValues_t:',initialValues_t)
636 # print(self.mbs)
637 qOriginal = self.mbs.systemData.GetODE2Coordinates(exu.ConfigurationType.Initial)
638 q_tOriginal = self.mbs.systemData.GetODE2Coordinates_t(exu.ConfigurationType.Initial)
639 initialValues[self.numberOfStates:] = qOriginal[self.numberOfStates:]
640 initialValues_t[self.numberOfStates:] = q_tOriginal[self.numberOfStates:]
641
642 self.mbs.systemData.SetODE2Coordinates(initialValues, exu.ConfigurationType.Initial)
643 self.mbs.systemData.SetODE2Coordinates_t(initialValues_t, exu.ConfigurationType.Initial)
644
645 self.MapAction2MBS(action)
646
647 #this may be time consuming for larger models!
648 self.IntegrateStep()
649
650 done = self.Output2StateAndDone()
651 if self.mbs.systemData.GetTime() > 16: #if it is too long, stop for now!
652 done = True
653
654 # print('state:', self.state, 'done: ', done)
655 #++++++++++++++++++++++++++++++++++++++++++++++++++
656 if not done:
657 reward = self.getReward()
658 elif self.steps_beyond_done is None:
659 self.steps_beyond_done = 0
660 reward = self.getReward()
661 else:
662
663 if self.steps_beyond_done == 0:
664 logger.warn(
665 "You are calling 'step()' even though this "
666 "environment has already returned done = True. You "
667 "should always call 'reset()' once you receive 'done = "
668 "True' -- any further steps are undefined behavior."
669 )
670 self.steps_beyond_done += 1
671 reward = 0.0
672
673 info = {}
674 terminated, truncated = done, False # since stable-baselines3 > 1.8.0 implementations terminated and truncated
675 if useOldGym:
676 return np.array(self.state, dtype=np.float32), reward, terminated, info
677 else:
678 return np.array(self.state, dtype=np.float32), reward, terminated, truncated, info
679
680
681
682
683# sys.exit()
684
685#%%+++++++++++++++++++++++++++++++++++++++++++++
686if __name__ == '__main__': #this is only executed when file is direct called in Python
687 import time
688
689 #%%++++++++++++++++++++++++++++++++++++++++++++++++++
690 #use some learning algorithm:
691 #pip install stable_baselines3
692 from stable_baselines3 import A2C, SAC
693
694
695 # here the model is loaded (either for vectorized or scalar environment´using SAC or A2C).
696 def GetModel(myEnv, modelType='SAC'):
697 if modelType=='SAC':
698 model = SAC('MlpPolicy',
699 env=myEnv,
700 #learning_rate=8e-4,
701 device='cpu', #usually cpu is faster for this size of networks
702 #batch_size=128,
703 verbose=1)
704 elif modelType == 'A2C':
705 model = A2C('MlpPolicy',
706 myEnv,
707 device='cpu',
708 #n_steps=5,
709 # policy_kwargs = dict(activation_fn=torch.nn.ReLU,
710 # net_arch=dict(pi=[8]*2, vf=[8]*2)),
711 verbose=1)
712 else:
713 print('Please specify the modelType.')
714 raise ValueError
715
716 return model
717
718 # sys.exit()
719 #create model and do reinforcement learning
720 modelType='A2C'
721 modelName = 'openAIgymDDrobot_'+modelType
722 if True: #'scalar' environment:
723 env = RobotEnv()
724 #check if model runs:
725 #env.SetSolver(exu.DynamicSolverType.ExplicitMidpoint)
726 #env.SetSolver(exu.DynamicSolverType.RK44) #very acurate
727 # env.TestModel(numberOfSteps=2000, seed=42, sleepTime=0.02*0, useRenderer=True)
728 model = GetModel(env, modelType=modelType)
729 env.useRenderer = True
730 # env.render()
731 # exu.StartRenderer()
732
733 ts = -time.time()
734 model.learn(total_timesteps=200000)
735
736 print('*** learning time total =',ts+time.time(),'***')
737
738 #save learned model
739
740 model.save("solution/" + modelName)
741 else:
742 import torch #stable-baselines3 is based on pytorch
743 n_cores= max(1,int(os.cpu_count()/2)) #n_cores should be number of real cores (not threads)
744 #n_cores = 8 #vecEnv can handle number of threads, while torch should rather use real cores
745 #torch.set_num_threads(n_cores) #seems to be ideal to match the size of subprocVecEnv
746 torch.set_num_threads(n_cores) #seems to be ideal to match the size of subprocVecEnv
747
748 print('using',n_cores,'cores')
749
750 from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
751 vecEnv = SubprocVecEnv([RobotEnv for i in range(n_cores)])
752
753
754 #main learning task; training of double pendulum: with 20 cores 800 000 steps take in the continous case approximatly 18 minutes (SAC), discrete (A2C) takes 2 minutes.
755 model = GetModel(vecEnv, modelType=modelType)
756
757 ts = -time.time()
758 print('start learning of agent with {}'.format(str(model.policy).split('(')[0]))
759 # model.learn(total_timesteps=50000)
760 model.learn(total_timesteps=int(500_000),log_interval=500)
761 print('*** learning time total =',ts+time.time(),'***')
762
763 #save learned model
764 model.save("solution/" + modelName)
765
766 if False: #set True to visualize results
767 #%%++++++++++++++++++++++++++++++++++++++++++++++++++
768 #only load and test
769 if False:
770 modelName = 'openAIgymDDrobot_A2C_16M'
771 modelType='A2C'
772 if modelType == 'SAC':
773 model = SAC.load("solution/" + modelName)
774 else:
775 model = A2C.load("solution/" + modelName)
776
777 env = RobotEnv() #larger threshold for testing
778 solutionFile='solution/learningCoordinates.txt'
779 env.TestModel(numberOfSteps=800, seed=3, model=model, solutionFileName=solutionFile,
780 stopIfDone=False, useRenderer=False, sleepTime=0) #just compute solution file
781
782 #++++++++++++++++++++++++++++++++++++++++++++++
783 #visualize (and make animations) in exudyn:
784 from exudyn.interactive import SolutionViewer
785 env.SC.visualizationSettings.general.autoFitScene = False
786 solution = LoadSolutionFile(solutionFile)
787 SolutionViewer(env.mbs, solution, timeout = 0.01, rowIncrement=2) #loads solution file via name stored in mbs