testGymDoublePendulum.py
You can view and download this file on Github: testGymDoublePendulum.py
1#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2# This is an EXUDYN example
3#
4# Details: This file shows integration with OpenAI gym by testing a double pendulum example
5# Needs input file testGymDoublePendulumEnv.py which defines the model in the gym environment
6#
7# Author: Johannes Gerstmayr
8# Date: 2022-05-18
9#
10# Copyright:This file is part of Exudyn. Exudyn is free software. You can redistribute it and/or modify it under the terms of the Exudyn license. See 'LICENSE.txt' for more details.
11#
12#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
13
14
15import time
16from math import sin, cos
17from testGymDoublePendulumEnv import DoublePendulumEnv
18
19useGraphics = True
20#%%+++++++++++++++++++++++++++++++++++++++++++++
21if False: #test the model by just integrating in Exudyn and apply force
22
23 env = DoublePendulumEnv()
24 env.useRenderer = True #set this true to show visualization
25 observation, info = env.reset(seed=42, return_info=True)
26
27 for i in range(10000):
28 force = 0.1*(cos(i/50))
29 env.integrateStep(force)
30 env.render()
31 time.sleep(0.01)
32
33 env.close()
34
35
36#%%+++++++++++++++++++++++++++++++++++++++++++++
37if False: #testing the model with some random input
38 import gym
39 env = DoublePendulumEnv(5)
40 env.useRenderer = True #set this true to show visualization
41 observation, info = env.reset(seed=42, return_info=True)
42
43 ts = -time.time()
44 for _ in range(1000):
45 action = env.action_space.sample()
46 observation, reward, done, info = env.step(action)[:4] #accomodate for steps which return > 4 args
47
48 env.render()
49 if done:
50 observation, info = env.reset(return_info=True)
51 env.close()
52
53 print('time spent=',ts+time.time())
54
55
56#%%+++++++++++++++++++++++++++++++++++++++++++++++++
57#reinforment learning algorithm
58#pip install gym[spaces]
59#pip install pyglet
60
61if True: #do some reinforcement learning with exudyn model
62 import gym
63
64 from stable_baselines3 import A2C
65
66 total_timesteps = 1000 #for quick test only; does not stabilize
67 if useGraphics:
68 total_timesteps = 1000_000 #works sometimes already good
69
70 doLearning = True
71 if doLearning:
72 env = DoublePendulumEnv(1)
73 env.useRenderer = False
74 #env = gym.make('CartPole-v1')
75
76 ts = -time.time()
77 model = A2C('MlpPolicy', env, verbose=1)
78 model.learn(total_timesteps=total_timesteps)
79 print('time spent=',ts+time.time())
80
81 #%%++++++++++++++++++++++++
82 if useGraphics:
83 env = DoublePendulumEnv(10) #allow larger threshold for testing
84 env.useRenderer = True
85 obs = env.reset()
86 for i in range(5000):
87 action, _state = model.predict(obs, deterministic=True)
88 obs, reward, done, info = env.step(action)[:4]
89 env.render()
90 time.sleep(0.01)
91 if done:
92 obs = env.reset()
93 if env.mbs.GetRenderEngineStopFlag(): #stop if user press Q
94 break
95
96 env.close()