From ba292f0589aa354707332384e4f5483929577293 Mon Sep 17 00:00:00 2001 From: Marc Tuscher Date: Sun, 8 Jul 2018 17:43:47 +0200 Subject: [PATCH] fixed screen dims bug in environment wrapper; python3 compatibility --- dqn/agent.py | 7 ++++--- dqn/environment.py | 6 +++--- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/dqn/agent.py b/dqn/agent.py index d9e437d..dd20f3c 100644 --- a/dqn/agent.py +++ b/dqn/agent.py @@ -1,4 +1,5 @@ from __future__ import print_function +from functools import reduce import os import time import random @@ -224,7 +225,7 @@ def build_dqn(self): q_summary = [] avg_q = tf.reduce_mean(self.q, 0) - for idx in xrange(self.env.action_size): + for idx in range(self.env.action_size): q_summary.append(tf.summary.histogram('q/%s' % idx, avg_q[idx])) self.q_summary = tf.summary.merge(q_summary, 'q_summary') @@ -325,7 +326,7 @@ def build_dqn(self): tf.initialize_all_variables().run() - self._saver = tf.train.Saver(self.w.values() + [self.step_op], max_to_keep=30) + self._saver = tf.train.Saver(list(self.w.values()) + [self.step_op], max_to_keep=30) self.load_model() self.update_target_q_network() @@ -373,7 +374,7 @@ def play(self, n_step=10000, n_episode=100, test_ep=None, render=False): self.env.env.monitor.start(gym_dir) best_reward, best_idx = 0, 0 - for idx in xrange(n_episode): + for idx in range(n_episode): screen, reward, action, terminal = self.env.new_random_game() current_reward = 0 diff --git a/dqn/environment.py b/dqn/environment.py index 9f4e9e1..3631ef4 100644 --- a/dqn/environment.py +++ b/dqn/environment.py @@ -11,7 +11,7 @@ def __init__(self, config): config.screen_width, config.screen_height, config.action_repeat, config.random_start self.display = config.display - self.dims = (screen_width, screen_height) + self.dims = (screen_height, screen_width) self._screen = None self.reward = 0 @@ -26,7 +26,7 @@ def new_game(self, from_random_game=False): def new_random_game(self): self.new_game(True) - for _ in xrange(random.randint(0, self.random_start - 1)): + for _ in range(random.randint(0, self.random_start - 1)): self._step(0) self.render() return self.screen, 0, 0, self.terminal @@ -70,7 +70,7 @@ def act(self, action, is_training=True): cumulated_reward = 0 start_lives = self.lives - for _ in xrange(self.action_repeat): + for _ in range(self.action_repeat): self._step(action) cumulated_reward = cumulated_reward + self.reward