testGymCartpole.py
You can view and download this file on Github: testGymCartpole.py
1#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2# This is an EXUDYN example
3#
4# Details: This file shows integration with OpenAI gym by testing a cart-pole example
5# Needs input file testGymCartpoleEnv.py which defines the model in the gym environment
6# Works well with Python3.8!
7#
8# Author: Johannes Gerstmayr, Grzegorz Orzechowski
9# Date: 2022-05-17
10#
11# Copyright:This file is part of Exudyn. Exudyn is free software. You can redistribute it and/or modify it under the terms of the Exudyn license. See 'LICENSE.txt' for more details.
12#
13#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
14
15#+++++++++++++++++++++++++++++++++++++++++++++++++
16#conda create -n venvGym python=3.10 numpy matplotlib spyder-kernels=2.4 ipykernel -y
17#pip install pip install wheel==0.38.4 setuptools==66.0.0
18# => this downgrades setuptools to be able to install gym==0.21
19#pip install stable-baselines3==1.7.0
20
21import time
22from math import sin, cos
23from testGymCartpoleEnv import CartPoleEnv
24
25useGraphics = True
26if True: #test the model by just integrating in Exudyn and apply force
27
28 env = CartPoleEnv()
29 env.useRenderer = False #set this true to show visualization
30 observation, info = env.reset(seed=42, return_info=True)
31 ts = -time.time()
32
33 for i in range(10000):
34 force = 0.1*(cos(i/50))
35 env.integrateStep(force)
36 # action = env.action_space.sample()
37 # observation, reward, done, info = env.step(action)
38 # if done:
39 # observation, info = env.reset(return_info=True)
40 # env.render()
41 # time.sleep(0.01)
42 ts = ts+time.time()
43 print('measured max. step FPS:', int(10000/ts))
44 env.close()
45
46
47#+++++++++++++++++++++++++++++++++++++++++++++++++
48#reinforment learning algorithm
49
50if True: #do some reinforcement learning with exudyn model
51 import gym
52
53 env = CartPoleEnv(thresholdFactor=5,forceFactor=2)
54
55 env.useRenderer = False
56 total_timesteps = 1000 #for quick test only; does not stabilize
57 if useGraphics:
58 total_timesteps = 100_000 #works sometimes, may need more steps
59
60 from stable_baselines3 import A2C
61 model = A2C('MlpPolicy', env,
62 device='cpu', #usually cpu is faster for this size of networks
63 verbose=1)
64 ts = -time.time()
65 model.learn(total_timesteps=total_timesteps)
66 print('time spent=',ts+time.time())
67
68 model.save('solution/cartpoleLearn')
69
70 #%%+++++++++++++++++++++++++++++++++++++++
71 if useGraphics:
72 env = CartPoleEnv(10)#test with larger threshold
73 env.useRenderer = True
74 obs = env.reset()
75 for i in range(100):
76 action, _state = model.predict(obs, deterministic=True)
77 obs, reward, done, info = env.step(action)
78 env.render()
79 if done:
80 obs = env.reset()
81 time.sleep(0.05) #to see results ...
82
83 env.close()