testGymDoublePendulum.py

You can view and download this file on Github: testGymDoublePendulum.py

 1#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2# This is an EXUDYN example
 3#
 4# Details:  This file shows integration with OpenAI gym by testing a double pendulum example
 5#           Needs input file testGymDoublePendulumEnv.py which defines the model in the gym environment
 6#
 7# Author:   Johannes Gerstmayr
 8# Date:     2022-05-18
 9#
10# Copyright:This file is part of Exudyn. Exudyn is free software. You can redistribute it and/or modify it under the terms of the Exudyn license. See 'LICENSE.txt' for more details.
11#
12#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
13
14
15import time
16from math import sin, cos
17from testGymDoublePendulumEnv import DoublePendulumEnv
18
19useGraphics = True
20#%%+++++++++++++++++++++++++++++++++++++++++++++
21if False: #test the model by just integrating in Exudyn and apply force
22
23    env = DoublePendulumEnv()
24    env.useRenderer = True #set this true to show visualization
25    observation, info = env.reset(seed=42, return_info=True)
26
27    for i in range(10000):
28        force = 0.1*(cos(i/50))
29        env.integrateStep(force)
30        env.render()
31        time.sleep(0.01)
32
33    env.close()
34
35
36#%%+++++++++++++++++++++++++++++++++++++++++++++
37if False: #testing the model with some random input
38    import gym
39    env = DoublePendulumEnv(5)
40    env.useRenderer = True #set this true to show visualization
41    observation, info = env.reset(seed=42, return_info=True)
42
43    ts = -time.time()
44    for _ in range(1000):
45        action = env.action_space.sample()
46        observation, reward, done, info = env.step(action)[:4] #accomodate for steps which return > 4 args
47
48        env.render()
49        if done:
50            observation, info = env.reset(return_info=True)
51    env.close()
52
53    print('time spent=',ts+time.time())
54
55
56#%%+++++++++++++++++++++++++++++++++++++++++++++++++
57#reinforment learning algorithm
58#pip install gym[spaces]
59#pip install pyglet
60
61if True: #do some reinforcement learning with exudyn model
62    import gym
63
64    from stable_baselines3 import A2C
65
66    total_timesteps = 1000 #for quick test only; does not stabilize
67    if useGraphics:
68        total_timesteps = 1000_000 #works sometimes already good
69
70    doLearning = True
71    if doLearning:
72        env = DoublePendulumEnv(1)
73        env.useRenderer = False
74        #env = gym.make('CartPole-v1')
75
76        ts = -time.time()
77        model = A2C('MlpPolicy', env, verbose=1)
78        model.learn(total_timesteps=total_timesteps)
79        print('time spent=',ts+time.time())
80
81    #%%++++++++++++++++++++++++
82    if useGraphics:
83        env = DoublePendulumEnv(10) #allow larger threshold for testing
84        env.useRenderer = True
85        obs = env.reset()
86        for i in range(5000):
87            action, _state = model.predict(obs, deterministic=True)
88            obs, reward, done, info = env.step(action)[:4]
89            env.render()
90            time.sleep(0.01)
91            if done:
92              obs = env.reset()
93            if env.mbs.GetRenderEngineStopFlag(): #stop if user press Q
94                break
95
96        env.close()