Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions dqn/agent.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from __future__ import print_function
from functools import reduce
import os
import time
import random
Expand Down Expand Up @@ -224,7 +225,7 @@ def build_dqn(self):

q_summary = []
avg_q = tf.reduce_mean(self.q, 0)
for idx in xrange(self.env.action_size):
for idx in range(self.env.action_size):
q_summary.append(tf.summary.histogram('q/%s' % idx, avg_q[idx]))
self.q_summary = tf.summary.merge(q_summary, 'q_summary')

Expand Down Expand Up @@ -325,7 +326,7 @@ def build_dqn(self):

tf.initialize_all_variables().run()

self._saver = tf.train.Saver(self.w.values() + [self.step_op], max_to_keep=30)
self._saver = tf.train.Saver(list(self.w.values()) + [self.step_op], max_to_keep=30)

self.load_model()
self.update_target_q_network()
Expand Down Expand Up @@ -373,7 +374,7 @@ def play(self, n_step=10000, n_episode=100, test_ep=None, render=False):
self.env.env.monitor.start(gym_dir)

best_reward, best_idx = 0, 0
for idx in xrange(n_episode):
for idx in range(n_episode):
screen, reward, action, terminal = self.env.new_random_game()
current_reward = 0

Expand Down
6 changes: 3 additions & 3 deletions dqn/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def __init__(self, config):
config.screen_width, config.screen_height, config.action_repeat, config.random_start

self.display = config.display
self.dims = (screen_width, screen_height)
self.dims = (screen_height, screen_width)

self._screen = None
self.reward = 0
Expand All @@ -26,7 +26,7 @@ def new_game(self, from_random_game=False):

def new_random_game(self):
self.new_game(True)
for _ in xrange(random.randint(0, self.random_start - 1)):
for _ in range(random.randint(0, self.random_start - 1)):
self._step(0)
self.render()
return self.screen, 0, 0, self.terminal
Expand Down Expand Up @@ -70,7 +70,7 @@ def act(self, action, is_training=True):
cumulated_reward = 0
start_lives = self.lives

for _ in xrange(self.action_repeat):
for _ in range(self.action_repeat):
self._step(action)
cumulated_reward = cumulated_reward + self.reward

Expand Down