-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathkeyboard_agent.py
68 lines (56 loc) · 1.86 KB
/
keyboard_agent.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#!/usr/bin/env python
from __future__ import print_function
import sys, gym
#
# Test yourself as a learning agent! Pass environment name as a command-line argument.
#
env = gym.make('LunarLander-v2' if len(sys.argv)<2 else sys.argv[1])
ACTIONS = env.action_space.n
ROLLOUT_TIME = 10000
SKIP_CONTROL = 0 # Use previous control decision SKIP_CONTROL times, that's how you
# can test what skip is still usable.
human_agent_action = 0
human_wants_restart = False
human_sets_pause = False
def key_press(key, mod):
global human_agent_action, human_wants_restart, human_sets_pause
if key==0xff0d: human_wants_restart = True
if key==32: human_sets_pause = not human_sets_pause
a = int( key - ord('0') )
if a <= 0 or a > ACTIONS: return
human_agent_action = a-1
def key_release(key, mod):
global human_agent_action
a = int( key - ord('0') )
if a <= 0 or a > ACTIONS: return
if human_agent_action == a-1:
human_agent_action = a-1
env.render()
env.viewer.window.on_key_press = key_press
env.viewer.window.on_key_release = key_release
def rollout(env):
global human_agent_action, human_wants_restart, human_sets_pause
human_wants_restart = False
obser = env.reset()
skip = 0
for t in range(ROLLOUT_TIME):
if not skip:
print("taking action {}".format(human_agent_action))
a = human_agent_action
skip = SKIP_CONTROL
else:
skip -= 1
obser, r, done, info = env.step(a)
env.render()
if done: break
if human_wants_restart: break
while human_sets_pause:
env.render()
import time
time.sleep(0.1)
print("ACTIONS={}".format(ACTIONS))
print("Press keys 1 2 3 ... to take actions 1 2 3 ...")
print("No keys pressed is taking action 0")
while 1:
rollout(env)
print("Next Episode")