testGymCartpole.py

You can view and download this file on Github: testGymCartpole.py

 1#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2# This is an EXUDYN example
 3#
 4# Details:  This file shows integration with OpenAI gym by testing a cart-pole example
 5#           Needs input file testGymCartpoleEnv.py which defines the model in the gym environment
 6#           Works well with Python3.8!
 7#
 8# Author:   Johannes Gerstmayr, Grzegorz Orzechowski
 9# Date:     2022-05-17
10# Update:   2024-12-03 (adapted to stable-baselines3 > 2.0)
11#
12# Copyright:This file is part of Exudyn. Exudyn is free software. You can redistribute it and/or modify it under the terms of the Exudyn license. See 'LICENSE.txt' for more details.
13#
14#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
15
16#+++++++++++++++++++++++++++++++++++++++++++++++++
17#conda create -n venvGym python=3.10 numpy matplotlib spyder-kernels=2.4 ipykernel -y
18#pip install pip install wheel==0.38.4 setuptools==66.0.0
19#      => this downgrades setuptools to be able to install gym==0.21
20#pip install stable-baselines3==1.7.0
21
22import time
23from math import sin, cos
24from testGymCartpoleEnv import CartPoleEnv
25
26useGraphics = True
27if True: #test the model by just integrating in Exudyn and apply force
28
29    env = CartPoleEnv()
30    env.useRenderer = False #set this true to show visualization
31    observation, info = env.reset(seed=42, return_info=True)
32    ts = -time.time()
33
34    for i in range(10000):
35        force = 0.1*(cos(i/50))
36        env.integrateStep(force)
37        # action = env.action_space.sample()
38        # observation, reward, done, info = env.step(action)
39        # if done:
40            # observation, info = env.reset(return_info=True)
41        # env.render()
42        # time.sleep(0.01)
43    ts = ts+time.time()
44    print('measured max. step FPS:', int(10000/ts))
45    env.close()
46
47
48#+++++++++++++++++++++++++++++++++++++++++++++++++
49#reinforment learning algorithm
50
51if True: #do some reinforcement learning with exudyn model
52    import gym
53
54    env = CartPoleEnv(thresholdFactor=5,forceFactor=2)
55
56    env.useRenderer = False
57    total_timesteps = 1000 #for quick test only; does not stabilize
58    if useGraphics:
59        total_timesteps = 100_000 #works sometimes, may need more steps
60
61    from stable_baselines3 import A2C
62    model = A2C('MlpPolicy', env,
63                device='cpu',  #usually cpu is faster for this size of networks
64                verbose=1)
65    ts = -time.time()
66    model.learn(total_timesteps=total_timesteps)
67    print('time spent=',ts+time.time())
68
69    model.save('solution/cartpoleLearn')
70
71    #%%+++++++++++++++++++++++++++++++++++++++
72    if useGraphics:
73        env = CartPoleEnv(10)#test with larger threshold
74        env.useRenderer = True
75        obs, info = env.reset()
76
77        for i in range(200):
78            action, _state = model.predict(obs, deterministic=True)
79            obs, reward, done, info = env.step(action)[:4]
80            env.render()
81            if done:
82              obs, info = env.reset()
83            time.sleep(0.05) #to see results ...
84
85        env.close()