testGymCartpole.py
You can view and download this file on Github: testGymCartpole.py
1#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2# This is an EXUDYN example
3#
4# Details: This file shows integration with OpenAI gym by testing a cart-pole example
5# Needs input file testGymCartpoleEnv.py which defines the model in the gym environment
6# Works well with Python3.8!
7#
8# Author: Johannes Gerstmayr, Grzegorz Orzechowski
9# Date: 2022-05-17
10# Update: 2024-12-03 (adapted to stable-baselines3 > 2.0)
11#
12# Copyright:This file is part of Exudyn. Exudyn is free software. You can redistribute it and/or modify it under the terms of the Exudyn license. See 'LICENSE.txt' for more details.
13#
14#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
15
16#+++++++++++++++++++++++++++++++++++++++++++++++++
17#conda create -n venvGym python=3.10 numpy matplotlib spyder-kernels=2.4 ipykernel -y
18#pip install pip install wheel==0.38.4 setuptools==66.0.0
19# => this downgrades setuptools to be able to install gym==0.21
20#pip install stable-baselines3==1.7.0
21
22import time
23from math import sin, cos
24from testGymCartpoleEnv import CartPoleEnv
25
26useGraphics = True
27if True: #test the model by just integrating in Exudyn and apply force
28
29 env = CartPoleEnv()
30 env.useRenderer = False #set this true to show visualization
31 observation, info = env.reset(seed=42, return_info=True)
32 ts = -time.time()
33
34 for i in range(10000):
35 force = 0.1*(cos(i/50))
36 env.integrateStep(force)
37 # action = env.action_space.sample()
38 # observation, reward, done, info = env.step(action)
39 # if done:
40 # observation, info = env.reset(return_info=True)
41 # env.render()
42 # time.sleep(0.01)
43 ts = ts+time.time()
44 print('measured max. step FPS:', int(10000/ts))
45 env.close()
46
47
48#+++++++++++++++++++++++++++++++++++++++++++++++++
49#reinforment learning algorithm
50
51if True: #do some reinforcement learning with exudyn model
52 import gym
53
54 env = CartPoleEnv(thresholdFactor=5,forceFactor=2)
55
56 env.useRenderer = False
57 total_timesteps = 1000 #for quick test only; does not stabilize
58 if useGraphics:
59 total_timesteps = 100_000 #works sometimes, may need more steps
60
61 from stable_baselines3 import A2C
62 model = A2C('MlpPolicy', env,
63 device='cpu', #usually cpu is faster for this size of networks
64 verbose=1)
65 ts = -time.time()
66 model.learn(total_timesteps=total_timesteps)
67 print('time spent=',ts+time.time())
68
69 model.save('solution/cartpoleLearn')
70
71 #%%+++++++++++++++++++++++++++++++++++++++
72 if useGraphics:
73 env = CartPoleEnv(10)#test with larger threshold
74 env.useRenderer = True
75 obs, info = env.reset()
76
77 for i in range(200):
78 action, _state = model.predict(obs, deterministic=True)
79 obs, reward, done, info = env.step(action)[:4]
80 env.render()
81 if done:
82 obs, info = env.reset()
83 time.sleep(0.05) #to see results ...
84
85 env.close()