reinforcementLearningRobot.py

You can view and download this file on Github: reinforcementLearningRobot.py

  1#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  2# This is an EXUDYN example
  3#
  4# Details:  Reinforcement learning example with stable-baselines3;
  5#           training a mobile platform with differential drives to meet target points
  6#           NOTE: frictional contact requires small enough step size to avoid artifacts!
  7#           GeneralContact works less stable than RollingDisc objects
  8#
  9# Author:    Johannes Gerstmayr
 10# Date:      2024-04-27
 11#
 12# Copyright:This file is part of Exudyn. Exudyn is free software. You can redistribute it and/or modify it under the terms of the Exudyn license. See 'LICENSE.txt' for more details.
 13#
 14#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 15
 16#NOTE: this model is using the stable-baselines3 version 1.7.0, which requires:
 17#pip install exudyn
 18#pip install pip install wheel==0.38.4 setuptools==66.0.0
 19#      => this downgrades setuptools to be able to install gym==0.21
 20#pip install stable-baselines3==1.7.0
 21#tested within a virtual environment: conda create -n venvP311 python=3.11 scipy matplotlib tqdm spyder-kernels=2.5 ipykernel psutil -y
 22
 23import sys
 24sys.exudynFast = True
 25
 26import exudyn as exu
 27from exudyn.utilities import * #includes itemInterface and rigidBodyUtilities
 28import exudyn.graphics as graphics #only import if it does not conflict
 29from exudyn.robotics import *
 30from exudyn.artificialIntelligence import *
 31import math
 32
 33import copy
 34import os
 35os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
 36import torch
 37
 38import stable_baselines3
 39useOldGym = tuple(map(int, stable_baselines3.__version__.split('.'))) <= tuple(map(int, '1.8.0'.split('.')))
 40
 41##%% here the number of links can be changed. Note that for n < 3 the actuator
 42
 43#**function: Add model of differential drive robot (two wheels which can be actuated independently);
 44#            model is parameterized for kinematics, inertia parameters as well as for graphics;
 45#            the model is created as a minimum coordinate model to use it together with explicit integration;
 46#            a contact model is added if it does not exist;
 47def DifferentialDriveRobot(SC, mbs,
 48                           platformInertia = None,
 49                           wheelInertia = None,
 50                           platformPosition = [0,0,0], #this is the location of the platform ground centerpoint
 51                           wheelDistance = 0.4,        #wheel midpoint-to-midpoint distance
 52                           platformHeight = 0.1,
 53                           platformRadius = 0.22,
 54                           platformMass = 5,
 55                           platformGroundOffset = 0.02,
 56                           planarPlatform = True,
 57                           dimGroundX = 8, dimGroundY = 8,
 58                           gravity = [0,0,-9.81],
 59                           wheelRadius = 0.04,
 60                           wheelThickness = 0.01,
 61                           wheelMass = 0.05,
 62                           pControl = 0,
 63                           dControl = 0.02,
 64                           usePenalty = True, #use penalty formulation in case useGeneralContact=False
 65                           frictionProportionalZone = 0.025,
 66                           frictionCoeff = 1, stiffnessGround = 1e5,
 67                           gContact = None,
 68                           frictionIndexWheel = None, frictionIndexFree = None,
 69                           useGeneralContact = False #generalcontact shows large errors currently
 70                           ):
 71
 72    #add class which can be returned to enable user to access parameters
 73    class ddr: pass
 74
 75    #+++++++++++++++++++++++++++++++++++++++++++
 76    #create contact (if not provided)
 77    ddr.gGround = graphics.CheckerBoard(normal= [0,0,1],
 78                                           size=dimGroundX, size2=dimGroundY, nTiles=8)
 79
 80    ddr.oGround= mbs.AddObject(ObjectGround(referencePosition= [0,0,0],
 81                                            visualization=VObjectGround(graphicsData= [ddr.gGround])))
 82    ddr.mGround = mbs.AddMarker(MarkerBodyRigid(bodyNumber=ddr.oGround))
 83
 84
 85    ddr.frictionCoeff = frictionCoeff
 86    ddr.stiffnessGround = stiffnessGround
 87    ddr.dampingGround = ddr.stiffnessGround*0.01
 88    if gContact == None and useGeneralContact:
 89        frictionIndexGround = 0
 90        frictionIndexWheel = 0
 91        frictionIndexFree = 1
 92
 93        ddr.gContact = mbs.AddGeneralContact()
 94        ddr.gContact.frictionProportionalZone = frictionProportionalZone
 95        #ddr.gContact.frictionVelocityPenalty = 1e4
 96
 97        ddr.gContact.SetFrictionPairings(np.diag([ddr.frictionCoeff,0])) #second index is for frictionless contact
 98        ddr.gContact.SetSearchTreeCellSize(numberOfCells=[4,4,1]) #just a few contact cells
 99
100        #add ground to contact
101        [meshPoints, meshTrigs] = graphics.ToPointsAndTrigs(ddr.gGround) #could also use only 1 quad ...
102
103        ddr.gContact.AddTrianglesRigidBodyBased( rigidBodyMarkerIndex = ddr.mGround,
104                                            contactStiffness = ddr.stiffnessGround, contactDamping = ddr.dampingGround,
105                                            frictionMaterialIndex = frictionIndexGround,
106                                            pointList=meshPoints, triangleList=meshTrigs)
107
108
109    #+++++++++++++++++++++++++++++++++++++++++++
110    #create inertias (if not provided)
111    if wheelInertia == None:
112        ddr.iWheel = InertiaCylinder(wheelMass/(wheelRadius**2*pi*wheelThickness),
113                                 wheelThickness, wheelRadius, axis=0) #rotation about local X-axis
114    else:
115        ddr.iWheel = RigidBodyInertia(mass=wheelMass, inertiaTensorAtCOM=np.diag(wheelInertia))
116
117    if platformInertia == None:
118        ddr.iPlatform = InertiaCylinder(platformMass/(platformRadius**2*pi*platformHeight),
119                                 platformHeight, platformRadius, axis=0) #rotation about local X-axis
120        ddr.iPlatform = ddr.iPlatform.Translated([0,0,0.5*platformHeight+platformGroundOffset]) #put COM at mid of platform; but referencepoint is at ground level!
121    else:
122        ddr.iPlatform = RigidBodyInertia(mass=platformMass, inertiaTensorAtCOM=np.diag(platformInertia))
123
124    #+++++++++++++++++++++++++++++++++++++++++++
125    #create kinematic tree for wheeled robot
126    ddr.gPlatform = [graphics.Cylinder([0,0,platformGroundOffset], [0,0,platformHeight], platformRadius, color=graphics.color.steelblue, nTiles=64, addEdges=True, addFaces=False)]
127    ddr.gPlatform += [graphics.Cylinder([0,platformRadius*0.8,platformGroundOffset*1.5], [0,0,platformHeight], platformRadius*0.2, color=graphics.color.grey)]
128    ddr.gPlatform += [graphics.Basis(length=0.1)]
129    ddr.gWheel = [graphics.Cylinder([-wheelThickness*0.5,0,0], [wheelThickness,0,0], wheelRadius, color=graphics.color.red, nTiles=32)]
130    ddr.gWheel += [graphics.Brick([0,0,0],[wheelThickness*1.1,wheelRadius*1.3,wheelRadius*1.3], color=graphics.color.grey)]
131    ddr.gWheel += [graphics.Basis(length=0.075)]
132
133    #create node for unknowns of KinematicTree
134    ddr.nJoints = 3+3+2 - 3*planarPlatform #6 (3 in planar case) for the platform and 2 for the wheels;
135    referenceCoordinates=[0.]*ddr.nJoints
136    referenceCoordinates[0:len(platformPosition)] = platformPosition
137    ddr.nKT = mbs.AddNode(NodeGenericODE2(referenceCoordinates=referenceCoordinates,
138                                               initialCoordinates=[0.]*ddr.nJoints,
139                                               initialCoordinates_t=[0.]*ddr.nJoints,
140                                               numberOfODE2Coordinates=ddr.nJoints))
141
142    ddr.linkMasses = []
143    ddr.gList = [] #list of graphics objects for links
144    ddr.linkCOMs = exu.Vector3DList()
145    ddr.linkInertiasCOM=exu.Matrix3DList()
146    ddr.jointTransformations=exu.Matrix3DList()
147    ddr.jointOffsets = exu.Vector3DList()
148    ddr.jointTypes = [exu.JointType.PrismaticX,exu.JointType.PrismaticY,exu.JointType.RevoluteZ]
149    ddr.linkParents = list(np.arange(3)-1)
150
151    ddr.platformIndex = 2
152    if not planarPlatform:
153        ddr.jointTypes+=[exu.JointType.PrismaticZ,exu.JointType.RevoluteY,exu.JointType.RevoluteX]
154        ddr.linkParents+=[2,3,4]
155        ddr.platformIndex = 5
156
157    #add data for wheels:
158    ddr.jointTypes += [exu.JointType.RevoluteX]*2
159    ddr.linkParents += [ddr.platformIndex]*2
160
161    #now create offsets, graphics list and inertia for all links
162    for i in range(len(ddr.jointTypes)):
163        ddr.jointTransformations.Append(np.eye(3))
164
165        if i < ddr.platformIndex:
166            ddr.gList += [[]]
167            ddr.jointOffsets.Append([0,0,0])
168            ddr.linkInertiasCOM.Append(np.zeros([3,3]))
169            ddr.linkCOMs.Append([0,0,0])
170            ddr.linkMasses.append(0)
171        elif i == ddr.platformIndex:
172            ddr.gList += [ddr.gPlatform]
173            ddr.jointOffsets.Append([0,0,0])
174            ddr.linkInertiasCOM.Append(ddr.iPlatform.InertiaCOM())
175            ddr.linkCOMs.Append(ddr.iPlatform.COM())
176            ddr.linkMasses.append(ddr.iPlatform.Mass())
177        else:
178            ddr.gList += [ddr.gWheel]
179            sign = -1+(i>ddr.platformIndex+1)*2
180            offZ = wheelRadius
181            if planarPlatform and (useGeneralContact or usePenalty):
182                offZ *= 0.999 #to ensure contact
183            ddr.jointOffsets.Append([sign*wheelDistance*0.5,0,offZ])
184            ddr.linkInertiasCOM.Append(ddr.iWheel.InertiaCOM())
185            ddr.linkCOMs.Append(ddr.iWheel.COM())
186            ddr.linkMasses.append(ddr.iWheel.Mass())
187
188
189    ddr.jointDControlVector = [0]*ddr.nJoints
190    ddr.jointPControlVector = [0]*ddr.nJoints
191    ddr.jointPositionOffsetVector = [0]*ddr.nJoints
192    ddr.jointVelocityOffsetVector = [0]*ddr.nJoints
193
194    ddr.jointPControlVector[-2:] = [pControl]*2
195    ddr.jointDControlVector[-2:] = [dControl]*2
196
197
198    #create KinematicTree
199    ddr.oKT = mbs.AddObject(ObjectKinematicTree(nodeNumber=ddr.nKT,
200                                      jointTypes=ddr.jointTypes,
201                                      linkParents=ddr.linkParents,
202                                      jointTransformations=ddr.jointTransformations,
203                                      jointOffsets=ddr.jointOffsets,
204                                      linkInertiasCOM=ddr.linkInertiasCOM,
205                                      linkCOMs=ddr.linkCOMs,
206                                      linkMasses=ddr.linkMasses,
207                                      baseOffset = [0.,0.,0.], gravity=gravity,
208                                      jointPControlVector=ddr.jointPControlVector,
209                                      jointDControlVector=ddr.jointDControlVector,
210                                      jointPositionOffsetVector=ddr.jointPositionOffsetVector,
211                                      jointVelocityOffsetVector=ddr.jointVelocityOffsetVector,
212                                      visualization=VObjectKinematicTree(graphicsDataList = ddr.gList)))
213
214    ddr.sPlatformPos = mbs.AddSensor(SensorKinematicTree(objectNumber=ddr.oKT, linkNumber = ddr.platformIndex,
215                                                         storeInternal=True, outputVariableType=exu.OutputVariableType.Position))
216    ddr.sPlatformVel = mbs.AddSensor(SensorKinematicTree(objectNumber=ddr.oKT, linkNumber = ddr.platformIndex,
217                                                         storeInternal=True, outputVariableType=exu.OutputVariableType.Velocity))
218    ddr.sPlatformAng = mbs.AddSensor(SensorKinematicTree(objectNumber=ddr.oKT, linkNumber = ddr.platformIndex,
219                                                         storeInternal=True, outputVariableType=exu.OutputVariableType.Rotation))
220    ddr.sPlatformAngVel = mbs.AddSensor(SensorKinematicTree(objectNumber=ddr.oKT, linkNumber = ddr.platformIndex,
221                                                         storeInternal=True, outputVariableType=exu.OutputVariableType.AngularVelocity))
222
223    #create markers for wheels and add contact
224    ddr.mWheels = []
225    for i in range(2):
226        mWheel = mbs.AddMarker(MarkerKinematicTreeRigid(objectNumber=ddr.oKT,
227                                                        linkNumber=ddr.platformIndex+1+i,
228                                                        localPosition=[0,0,0]))
229        ddr.mWheels.append(mWheel)
230        if useGeneralContact:
231            ddr.gContact.AddSphereWithMarker(mWheel,
232                                             radius=wheelRadius,
233                                             contactStiffness=ddr.stiffnessGround,
234                                             contactDamping=ddr.dampingGround,
235                                             frictionMaterialIndex=frictionIndexWheel)
236            #for 3D platform, we need additional support points:
237            if not planarPlatform:
238                rY = platformRadius-platformGroundOffset
239                mPlatformFront = mbs.AddMarker(MarkerKinematicTreeRigid(objectNumber=ddr.oKT,
240                                                                linkNumber=ddr.platformIndex,
241                                                                localPosition=[0,rY,1.01*platformGroundOffset]))
242                mPlatformBack = mbs.AddMarker(MarkerKinematicTreeRigid(objectNumber=ddr.oKT,
243                                                                linkNumber=ddr.platformIndex,
244                                                                localPosition=[0,-rY,1.01*platformGroundOffset]))
245
246                fact = 1
247                ddr.gContact.AddSphereWithMarker(mPlatformFront,
248                                                 radius=platformGroundOffset,
249                                                 contactStiffness=ddr.stiffnessGround*fact,
250                                                 contactDamping=ddr.dampingGround*fact,
251                                                 frictionMaterialIndex=frictionIndexFree)
252                ddr.gContact.AddSphereWithMarker(mPlatformBack,
253                                                 radius=platformGroundOffset,
254                                                 contactStiffness=ddr.stiffnessGround*fact,
255                                                 contactDamping=ddr.dampingGround*fact,
256                                                 frictionMaterialIndex=frictionIndexFree)
257        else:
258            if not planarPlatform:
259                raise ValueError('DifferentialDriveRobot: if useGeneralContact==False then planarPlatform must be True!')
260            if not usePenalty:
261                ddr.oRollingDisc = mbs.AddObject(ObjectJointRollingDisc(markerNumbers=[ddr.mGround , mWheel],
262                                                     constrainedAxes=[i,1,1-planarPlatform], discRadius=wheelRadius,
263                                                     visualization=VObjectJointRollingDisc(discWidth=wheelThickness,color=graphics.color.blue)))
264            else:
265                nGeneric = mbs.AddNode(NodeGenericData(initialCoordinates=[0,0,0], numberOfDataCoordinates=3))
266                ddr.oRollingDisc = mbs.AddObject(ObjectConnectorRollingDiscPenalty(markerNumbers=[ddr.mGround , mWheel],
267                                                nodeNumber = nGeneric,
268                                                discRadius=wheelRadius,
269                                                useLinearProportionalZone=True,
270                                                dryFrictionProportionalZone=0.05,
271                                                contactStiffness=ddr.stiffnessGround,
272                                                contactDamping=ddr.dampingGround,
273                                                dryFriction=[ddr.frictionCoeff]*2,
274                                                visualization=VObjectConnectorRollingDiscPenalty(discWidth=wheelThickness,color=graphics.color.blue)))
275
276
277
278    #compute wheel velocities for given forward and rotation velocity
279    def WheelVelocities(forwardVelocity, vRotation, wheelRadius, wheelDistance):
280        vLeft = -forwardVelocity/wheelRadius
281        vRight = vLeft
282        vOff = vRotation*wheelDistance*0.5/wheelRadius
283        vLeft += vOff
284        vRight -= vOff
285        return [vLeft, vRight]
286
287    ddr.WheelVelocities = WheelVelocities
288
289    #add some useful graphics settings
290
291    SC.visualizationSettings.general.circleTiling=200
292    SC.visualizationSettings.general.drawCoordinateSystem=True
293    SC.visualizationSettings.loads.show=False
294    SC.visualizationSettings.bodies.show=True
295    SC.visualizationSettings.markers.show=False
296    SC.visualizationSettings.bodies.kinematicTree.frameSize = 0.1
297    SC.visualizationSettings.bodies.kinematicTree.showJointFrames = False
298
299    SC.visualizationSettings.nodes.show=True
300    # SC.visualizationSettings.nodes.showBasis =True
301    SC.visualizationSettings.nodes.drawNodesAsPoint = False
302    SC.visualizationSettings.nodes.defaultSize = 0 #must not be -1, otherwise uses autocomputed size
303
304    SC.visualizationSettings.openGL.multiSampling = 4
305    # SC.visualizationSettings.openGL.shadow = 0.25
306    #SC.visualizationSettings.openGL.light0position = [-3,3,10,0]
307    # SC.visualizationSettings.contact.showBoundingBoxes = True
308    SC.visualizationSettings.contact.showTriangles = True
309    SC.visualizationSettings.contact.showSpheres = True
310
311    return ddr
312
313#%%++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
314#for testing with a simple trajectory:
315if False:
316    SC = exu.SystemContainer()
317    mbs = SC.AddSystem()
318
319    useGeneralContact = False
320    usePenalty = True
321    wheelRadius = 0.04
322    wheelDistance = 0.4
323    ddr = DifferentialDriveRobot(SC, mbs,useGeneralContact=useGeneralContact,
324                                 usePenalty=usePenalty, planarPlatform=True,
325                                 wheelRadius=wheelRadius, wheelDistance=wheelDistance)
326    mbs.Assemble()
327
328    #create some nice trajectory
329    def PreStepUserFunction(mbs, t):
330        vSet = ddr.jointVelocityOffsetVector #nominal values
331        vSet[-2:] = [0,0]
332
333        if t < 2:
334            vSet[-2:] = ddr.WheelVelocities(0.5, 0, wheelRadius, wheelDistance)
335        elif t < 3: pass
336        elif t < 4:
337            vSet[-2:] = ddr.WheelVelocities(0, 0.5*pi, wheelRadius, wheelDistance)
338        elif t < 5: pass
339        elif t < 7:
340            vSet[-2:] = ddr.WheelVelocities(-1, 0, wheelRadius, wheelDistance)
341        elif t < 8: pass
342        elif t < 9:
343            vSet[-2:] = ddr.WheelVelocities(0.5, 0.5*pi, wheelRadius, wheelDistance)
344
345        mbs.SetObjectParameter(ddr.oKT, "jointVelocityOffsetVector", vSet)
346
347        return True
348
349    mbs.SetPreStepUserFunction(PreStepUserFunction)
350
351    tEnd = 12 #tEnd = 0.8 for test suite
352    stepSize = 0.002 #h= 0.0002 for test suite
353    if useGeneralContact or usePenalty:
354        stepSize = 2e-4
355    # h*=0.1
356    # tEnd*=3
357    simulationSettings = exu.SimulationSettings()
358    simulationSettings.solutionSettings.solutionWritePeriod = 0.01
359    simulationSettings.solutionSettings.writeSolutionToFile = False
360    simulationSettings.solutionSettings.coordinatesSolutionFileName = 'solution/coordinatesSolution.txt'
361
362    simulationSettings.solutionSettings.sensorsWritePeriod = stepSize*10
363    # simulationSettings.displayComputationTime = True
364    # simulationSettings.displayStatistics = True
365    # simulationSettings.timeIntegration.verboseMode = 1
366    #simulationSettings.timeIntegration.simulateInRealtime = True
367    simulationSettings.timeIntegration.discontinuous.maxIterations = 1 #speed up
368    #simulationSettings.timeIntegration.discontinuous.iterationTolerance = 1e-5
369
370
371    exu.StartRenderer()
372    if 'renderState' in exu.sys:
373        SC.SetRenderState(exu.sys['renderState'])
374    mbs.WaitForUserToContinue()
375
376    simulationSettings.timeIntegration.numberOfSteps = int(tEnd/stepSize)
377    simulationSettings.timeIntegration.endTime = tEnd
378    simulationSettings.timeIntegration.explicitIntegration.computeEndOfStepAccelerations = False #increase performance, accelerations less accurate
379
380    SC.visualizationSettings.window.renderWindowSize=[1600,1024]
381    SC.visualizationSettings.general.graphicsUpdateInterval = 0.02
382
383    if useGeneralContact or usePenalty:
384        mbs.SolveDynamic(simulationSettings, solverType=exu.DynamicSolverType.ExplicitEuler)
385        # mbs.SolveDynamic(simulationSettings, solverType=exu.DynamicSolverType.ExplicitMidpoint)
386    else:
387        mbs.SolveDynamic(simulationSettings)
388
389    SC.WaitForRenderEngineStopFlag()
390    exu.StopRenderer() #safely close rendering window!
391
392    if True:
393        mbs.PlotSensor(ddr.sPlatformVel, components=[0,1],closeAll=True)
394        mbs.PlotSensor(ddr.sPlatformAngVel, components=[0,1,2])
395
396def Rot2D(phi):
397    return np.array([[np.cos(phi),-np.sin(phi)],
398                     [np.sin(phi), np.cos(phi)]])
399
400
401#%%++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
402class RobotEnv(OpenAIGymInterfaceEnv):
403
404    #**classFunction: OVERRIDE this function to create multibody system mbs and setup simulationSettings; call Assemble() at the end!
405    #                 you may also change SC.visualizationSettings() individually; kwargs may be used for special setup
406    def CreateMBS(self, SC, mbs, simulationSettings, **kwargs):
407
408        #%%++++++++++++++++++++++++++++++++++++++++++++++
409        self.mbs = mbs
410        self.SC = SC
411
412        self.dimGroundX = 4 #dimension of ground
413        self.dimGroundY = 4
414        self.maxRotations = 0.6 #maximum number before learning stops
415
416        self.maxWheelSpeed = 2*pi #2*pi = 1 revolution/second
417        self.wheelRadius = 0.04
418        self.wheelDistance = 0.4
419        self.maxVelocity = self.wheelRadius * self.maxWheelSpeed
420        self.maxPlatformAngVel = self.wheelRadius/(self.wheelDistance*0.5)*self.maxWheelSpeed
421
422        useGeneralContact = False
423        usePenalty = True
424        ddr = DifferentialDriveRobot(SC, mbs,useGeneralContact=useGeneralContact,
425                                     dimGroundX=self.dimGroundY, dimGroundY=self.dimGroundY,
426                                     usePenalty=usePenalty,
427                                     planarPlatform=True,
428                                     stiffnessGround=1e4,
429                                     wheelRadius=self.wheelRadius,
430                                     wheelDistance=self.wheelDistance)
431
432        self.ddr = ddr
433        self.oKT = ddr.oKT
434        self.nKT = ddr.nKT
435
436        #add graphics for desination
437        gDestination = graphics.Sphere(point=[0,0,0.05],radius = 0.02, color=graphics.color.red, nTiles=16)
438        self.oDestination = mbs.CreateGround(graphicsDataList=[gDestination])
439
440        mbs.Assemble()
441        self.stepSize = 1e-3
442        self.stepUpdateTime = 0.05
443
444        simulationSettings.solutionSettings.solutionWritePeriod = 0.1
445
446        writeSolutionToFile = False
447        if 'writeSolutionToFile' in kwargs:
448            writeSolutionToFile = kwargs['writeSolutionToFile']
449
450        useGraphics = False
451        if 'useGraphics' in kwargs:
452            useGraphics = kwargs['useGraphics']
453
454        simulationSettings.solutionSettings.writeSolutionToFile = writeSolutionToFile
455        simulationSettings.solutionSettings.writeSolutionToFile = False
456
457        simulationSettings.solutionSettings.coordinatesSolutionFileName = 'solution/coordinatesSolution.txt'
458
459        # simulationSettings.displayComputationTime = True
460        #simulationSettings.displayStatistics = True
461        #simulationSettings.timeIntegration.verboseMode = 1
462        #simulationSettings.timeIntegration.simulateInRealtime = True
463        simulationSettings.timeIntegration.discontinuous.maxIterations = 1 #speed up
464        #simulationSettings.timeIntegration.discontinuous.iterationTolerance = 1e-5
465
466
467        simulationSettings.timeIntegration.numberOfSteps = int(self.stepUpdateTime/self.stepSize)
468        simulationSettings.timeIntegration.endTime = self.stepUpdateTime
469        simulationSettings.timeIntegration.explicitIntegration.computeEndOfStepAccelerations = False #increase performance, accelerations less accurate
470
471        SC.visualizationSettings.window.renderWindowSize=[1600,1024]
472        SC.visualizationSettings.general.graphicsUpdateInterval = 0.02
473
474        #+++++++++++++++++++++++++++++++++++++++++++++++++++++
475        self.randomInitializationValue = [0.4*self.dimGroundX, 0.4*self.dimGroundY, self.maxRotations*2*pi*0.99,
476                                          self.maxVelocity*0,self.maxVelocity*0,self.maxPlatformAngVel*0,
477                                          0.3*self.dimGroundX, 0.3*self.dimGroundY, #destination points
478                                          ]
479
480        #must return state size
481        self.numberOfStates = 3 #posx, posy, rot
482        self.destinationStates = 2 #define here, if destination is included in states
483        self.destination = [0.,0.] #default value for destination
484
485        return self.destinationStates + self.numberOfStates * 2 #the number of states (position/velocity that are used by learning algorithm)
486
487    #**classFunction: OVERRIDE this function to set up self.action_space and self.observation_space
488    def SetupSpaces(self):
489
490        high = np.array(
491            [
492                self.dimGroundX*0.5,
493                self.dimGroundY*0.5,
494                2*pi*self.maxRotations #10 full revolutions; no more should be needed for any task
495            ] +
496            [
497                np.finfo(np.float32).max,
498            ] * self.numberOfStates +
499            [self.dimGroundX*0.5,
500             self.dimGroundY*0.5]*(self.destinationStates>0)
501            ,
502            dtype=np.float32,
503        )
504
505
506        #+++++++++++++++++++++++++++++++++++++++++++++++++++++
507        #see https://github.com/openai/gym/blob/64b4b31d8245f6972b3d37270faf69b74908a67d/gym/core.py#L16
508        #for Env:
509
510        self.action_space = spaces.Box(low=np.array([-self.maxWheelSpeed,-self.maxWheelSpeed], dtype=np.float32),
511                                       high=np.array([self.maxWheelSpeed,self.maxWheelSpeed], dtype=np.float32), dtype=np.float32)
512
513        self.observation_space = spaces.Box(-high, high, dtype=np.float32)
514        #+++++++++++++++++++++++++++++++++++++++++++++++++++++
515
516
517    #**classFunction: this function is overwritten to map the action given by learning algorithm to the multibody system (environment)
518    def MapAction2MBS(self, action):
519        # force = action[0] * self.force_mag
520        # self.mbs.SetLoadParameter(self.lControl, 'load', force)
521        vSet = self.ddr.jointVelocityOffsetVector #nominal values
522        vSet[-2:] = action
523        # vSet[-1] = vSet[-2]
524        # vSet[-2:] = [2,2.5]
525        # print('action:', action)
526
527        self.mbs.SetObjectParameter(self.oKT, "jointVelocityOffsetVector", vSet)
528
529
530    #**classFunction: this function is overwrritten to collect output of simulation and map to self.state tuple
531    #**output: return bool done which contains information if system state is outside valid range
532    def Output2StateAndDone(self):
533
534        #+++++++++++++++++++++++++
535        #implemented for planar model only!
536        statesVector =  self.mbs.GetNodeOutput(self.nKT, variableType=exu.OutputVariableType.Coordinates)[0:self.numberOfStates]
537        statesVectorGlob_t =  self.mbs.GetNodeOutput(self.nKT, variableType=exu.OutputVariableType.Coordinates_t)[0:self.numberOfStates]
538
539        # vLoc = Rot2D(statesVector[2]).T @ statesVectorGlob_t[0:2]
540        # print('vLoc=',vLoc)
541        # statesVector_t = np.array([vLoc[1], statesVectorGlob_t[2]])
542        statesVector_t = statesVectorGlob_t #change to local in future!
543
544        self.state = list(statesVector) + list(statesVector_t)
545        if self.destinationStates:
546            self.state += list(self.destination)
547        self.state = tuple(self.state)
548
549        done = bool(
550            statesVector[0] < -self.dimGroundX
551            or statesVector[0] > self.dimGroundX
552            or statesVector[1] < -self.dimGroundY
553            or statesVector[1] > self.dimGroundY
554            or statesVector[2] < -self.maxRotations*2*pi
555            or statesVector[2] > self.maxRotations*2*pi
556            )
557
558        return done
559
560
561    #**classFunction: OVERRIDE this function to map the current state to mbs initial values
562    #**output: return [initialValues, initialValues\_t] where initialValues[\_t] are ODE2 vectors of coordinates[\_t] for the mbs
563    def State2InitialValues(self):
564        #+++++++++++++++++++++++++++++++++++++++++++++
565        #states: x, y, phi, x_t, y_t, phi_t
566        initialValues = list(self.state[0:self.numberOfStates])+[0,0] #wheels do not initialize
567        initialValues_t = list(self.state[self.numberOfStates:2*self.numberOfStates])+[0,0]
568
569        if self.destinationStates:
570            if self.destination[0] != self.state[-2] or self.destination[1] != self.state[-1]:
571                # print('set new destination:', self.destination)
572                self.destination = self.state[-2:] #last two values are destination
573                self.mbs.SetObjectParameter(self.oDestination, 'referencePosition',
574                                             list(self.destination)+[0])
575
576        return [initialValues,initialValues_t]
577
578    def getReward(self):
579        X = self.dimGroundX
580        Y = self.dimGroundY
581        v = np.array([self.destination[0] - self.state[0], self.destination[1] - self.state[1]])
582        dist = NormL2(v)
583
584        phi = self.state[2]
585        localSpeed = Rot2D(phi).T @ [self.state[3],self.state[4]]
586        forwardSpeed = localSpeed[1]
587
588        reward = 1
589        #take power of 0.5 of dist to penalize small distances
590        #reward -= (dist/(0.5*NormL2([X,Y])))**0.5
591
592        #add penalty on rotations at a certain time (at beginning rotation may be needed...)
593        #reward -= 0.2*abs(self.state[5])/self.maxPlatformAngVel
594        # t = self.mbs.systemData.GetTime()
595        # if t > 4:
596        #     fact = 1
597        #     if t < 5: fact  = 5-t
598        #     reward -= fact*0.1*abs(self.state[5])/self.maxPlatformAngVel
599
600        #add penalty on reverse velocity: this supports solutions in forward direction!
601        # backwardMaxSpeed = 0.1
602        # if forwardSpeed < -backwardMaxSpeed*self.maxVelocity:
603        #     reward -= abs(forwardSpeed)/self.maxVelocity+backwardMaxSpeed
604
605        reward -= 0.5*abs(forwardSpeed)
606
607        if dist > 0:
608            v0 = v*(1/dist)
609            vDir = Rot2D(phi) @ [0,1]
610            # print('v0=',v0,', dir=',vDir)
611            reward -= NormL2(vDir-v0)*0.5
612
613        # print('rew=', round(reward,3), ', vF=', round(0.5*abs(forwardSpeed),3),
614        #       ', dir=', round(NormL2(vDir-v0)*0.5,4),
615        #       'v0=', v0, 'vDir=',vDir)
616
617        #reward -= max(0,abs(self.state[2])-pi)/(4*pi)
618        if reward < 0: reward = 0
619
620        # print('forwardSpeed',round(forwardSpeed/self.maxVelocity,3),
621        #       ', reward',round(reward,3))
622
623        # print('reward=',reward, ', t=', self.mbs.systemData.GetTime())
624        return reward
625
626    #**classFunction: openAI gym interface function which is called to compute one step
627    def step(self, action):
628        err_msg = f"{action!r} ({type(action)}) invalid"
629        assert self.action_space.contains(action), err_msg
630        assert self.state is not None, "Call reset before using step method."
631
632        #++++++++++++++++++++++++++++++++++++++++++++++++++
633        #main steps:
634        [initialValues,initialValues_t] = self.State2InitialValues()
635        # print('initialValues_t:',initialValues_t)
636        # print(self.mbs)
637        qOriginal = self.mbs.systemData.GetODE2Coordinates(exu.ConfigurationType.Initial)
638        q_tOriginal = self.mbs.systemData.GetODE2Coordinates_t(exu.ConfigurationType.Initial)
639        initialValues[self.numberOfStates:] = qOriginal[self.numberOfStates:]
640        initialValues_t[self.numberOfStates:] = q_tOriginal[self.numberOfStates:]
641
642        self.mbs.systemData.SetODE2Coordinates(initialValues, exu.ConfigurationType.Initial)
643        self.mbs.systemData.SetODE2Coordinates_t(initialValues_t, exu.ConfigurationType.Initial)
644
645        self.MapAction2MBS(action)
646
647        #this may be time consuming for larger models!
648        self.IntegrateStep()
649
650        done = self.Output2StateAndDone()
651        if self.mbs.systemData.GetTime() > 16: #if it is too long, stop for now!
652            done = True
653
654        # print('state:', self.state, 'done: ', done)
655        #++++++++++++++++++++++++++++++++++++++++++++++++++
656        if not done:
657            reward = self.getReward()
658        elif self.steps_beyond_done is None:
659            self.steps_beyond_done = 0
660            reward = self.getReward()
661        else:
662
663            if self.steps_beyond_done == 0:
664                logger.warn(
665                    "You are calling 'step()' even though this "
666                    "environment has already returned done = True. You "
667                    "should always call 'reset()' once you receive 'done = "
668                    "True' -- any further steps are undefined behavior."
669                )
670            self.steps_beyond_done += 1
671            reward = 0.0
672
673        info = {}
674        terminated, truncated = done, False # since stable-baselines3 > 1.8.0 implementations terminated and truncated
675        if useOldGym:
676            return np.array(self.state, dtype=np.float32), reward, terminated, info
677        else:
678            return np.array(self.state, dtype=np.float32), reward, terminated, truncated, info
679
680
681
682
683# sys.exit()
684
685#%%+++++++++++++++++++++++++++++++++++++++++++++
686if __name__ == '__main__': #this is only executed when file is direct called in Python
687    import time
688
689    #%%++++++++++++++++++++++++++++++++++++++++++++++++++
690    #use some learning algorithm:
691    #pip install stable_baselines3
692    from stable_baselines3 import A2C, SAC
693
694
695    # here the model is loaded (either for vectorized or scalar environment´using SAC or A2C).
696    def GetModel(myEnv, modelType='SAC'):
697        if modelType=='SAC':
698            model = SAC('MlpPolicy',
699                   env=myEnv,
700                   #learning_rate=8e-4,
701                   device='cpu', #usually cpu is faster for this size of networks
702                   #batch_size=128,
703                   verbose=1)
704        elif modelType == 'A2C':
705            model = A2C('MlpPolicy',
706                    myEnv,
707                    device='cpu',
708                    #n_steps=5,
709                    # policy_kwargs = dict(activation_fn=torch.nn.ReLU,
710                    #  net_arch=dict(pi=[8]*2, vf=[8]*2)),
711                    verbose=1)
712        else:
713            print('Please specify the modelType.')
714            raise ValueError
715
716        return model
717
718    # sys.exit()
719    #create model and do reinforcement learning
720    modelType='A2C'
721    modelName = 'openAIgymDDrobot_'+modelType
722    if True: #'scalar' environment:
723        env = RobotEnv()
724        #check if model runs:
725        #env.SetSolver(exu.DynamicSolverType.ExplicitMidpoint)
726        #env.SetSolver(exu.DynamicSolverType.RK44) #very acurate
727        # env.TestModel(numberOfSteps=2000, seed=42, sleepTime=0.02*0, useRenderer=True)
728        model = GetModel(env, modelType=modelType)
729        env.useRenderer = True
730        # env.render()
731        # exu.StartRenderer()
732
733        ts = -time.time()
734        model.learn(total_timesteps=200000)
735
736        print('*** learning time total =',ts+time.time(),'***')
737
738        #save learned model
739
740        model.save("solution/" + modelName)
741    else:
742        import torch #stable-baselines3 is based on pytorch
743        n_cores= max(1,int(os.cpu_count()/2)) #n_cores should be number of real cores (not threads)
744        #n_cores = 8 #vecEnv can handle number of threads, while torch should rather use real cores
745        #torch.set_num_threads(n_cores) #seems to be ideal to match the size of subprocVecEnv
746        torch.set_num_threads(n_cores) #seems to be ideal to match the size of subprocVecEnv
747
748        print('using',n_cores,'cores')
749
750        from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
751        vecEnv = SubprocVecEnv([RobotEnv for i in range(n_cores)])
752
753
754        #main learning task;  training of double pendulum: with 20 cores 800 000 steps take in the continous case approximatly 18 minutes (SAC), discrete (A2C) takes 2 minutes.
755        model = GetModel(vecEnv, modelType=modelType)
756
757        ts = -time.time()
758        print('start learning of agent with {}'.format(str(model.policy).split('(')[0]))
759        # model.learn(total_timesteps=50000)
760        model.learn(total_timesteps=int(500_000),log_interval=500)
761        print('*** learning time total =',ts+time.time(),'***')
762
763        #save learned model
764        model.save("solution/" + modelName)
765
766    if False: #set True to visualize results
767        #%%++++++++++++++++++++++++++++++++++++++++++++++++++
768        #only load and test
769        if False:
770            modelName = 'openAIgymDDrobot_A2C_16M'
771            modelType='A2C'
772            if modelType == 'SAC':
773                model = SAC.load("solution/" + modelName)
774            else:
775                model = A2C.load("solution/" + modelName)
776
777        env = RobotEnv() #larger threshold for testing
778        solutionFile='solution/learningCoordinates.txt'
779        env.TestModel(numberOfSteps=800, seed=3, model=model, solutionFileName=solutionFile,
780                      stopIfDone=False, useRenderer=False, sleepTime=0) #just compute solution file
781
782        #++++++++++++++++++++++++++++++++++++++++++++++
783        #visualize (and make animations) in exudyn:
784        from exudyn.interactive import SolutionViewer
785        env.SC.visualizationSettings.general.autoFitScene = False
786        solution = LoadSolutionFile(solutionFile)
787        SolutionViewer(env.mbs, solution, timeout = 0.01, rowIncrement=2) #loads solution file via name stored in mbs