Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
node_modules
activate
node_modules
activate
ENV
*.pyc
save.json
__pycache__
splendor_venv
60 changes: 30 additions & 30 deletions client/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
uuid = None
pid = None
start_key = None
server = 'http://localhost:5000'
server = 'http://localhost:8000'
curr_state = None
card_hash = {}
reverse_hash = {}
Expand All @@ -19,7 +19,7 @@ def create():
gid = resp.json()['game']
start_key = resp.json()['start']
handle_resp(resp, False)
print 'created game {0} with start_key {1}'.format(gid, start_key)
print('created game {0} with start_key {1}'.format(gid, start_key))

def join(game=None):
global gid, uuid, pid, start_key
Expand All @@ -31,20 +31,20 @@ def join(game=None):
j = resp.json()
uuid = j['uuid']
pid = j['id']
print 'joined as player {0}'.format(pid)
print('joined as player {0}'.format(pid))
if not start_key:
poll()

def start():
resp = requests.post(server + '/start/{0}/{1}'.format(gid, start_key))
if not resp.json():
print 'started'
print('started')
poll()
else:
print resp.json()
print(resp.json())

def print_card(card):
print '{0}> ({1})--[{2}] | b:{3} u:{4} w:{5} g:{6} r:{7}'.format(
print('{0}> ({1})--[{2}] | b:{3} u:{4} w:{5} g:{6} r:{7}'.format(
reverse_hash[card['uuid']],
card['color'],
card['points'],
Expand All @@ -53,41 +53,41 @@ def print_card(card):
card['cost']['w'],
card['cost']['g'],
card['cost']['r']
)
))

def print_nobles(nobles):
print ' << Nobles >>'
print (' << Nobles >>')
for noble in nobles:
print ' [{0}] b:{1} u:{2} w:{3} g:{4} r:{5}'.format(
print (' [{0}] b:{1} u:{2} w:{3} g:{4} r:{5}'.format(
noble['points'],
noble['requirement']['b'],
noble['requirement']['u'],
noble['requirement']['w'],
noble['requirement']['g'],
noble['requirement']['r'],
)
))

def print_gems(target):
print 'Gems <> b:{0} u:{1} w:{2} g:{3} r:{4} *:{5}'.format(
print ('Gems <> b:{0} u:{1} w:{2} g:{3} r:{4} *:{5}'.format(
target['gems'].get('b', '-'),
target['gems'].get('u', '-'),
target['gems'].get('w', '-'),
target['gems'].get('g', '-'),
target['gems'].get('r', '-'),
target['gems'].get('*', '-'),
)
))

def print_player(player):
print 'Player {0} :: [{1}]'.format(player['id'], player['score'])
print '================'
print ('Player {0} :: [{1}]'.format(player['id'], player['score']))
print ('================')
print_gems(player)
print 'Cards <> b:{0} u:{1} w:{2} g:{3} r:{4}'.format(
print ('Cards <> b:{0} u:{1} w:{2} g:{3} r:{4}'.format(
len(player['cards']['b']),
len(player['cards']['u']),
len(player['cards']['w']),
len(player['cards']['g']),
len(player['cards']['r']),
)
))
print_nobles(player['nobles'])

def print_state():
Expand All @@ -99,21 +99,21 @@ def print_state():
reverse_hash = {}
count = 1

print 'Gems'
print '----'
print ('Gems')
print ('----')
print_gems(curr_state)
print ''
print ('')
print_nobles(curr_state['nobles'])
print ''
print ('')
for k, v in curr_state['cards'].iteritems():
print '{0} -> {1} remaining'.format(k, curr_state['decks'][k])
print '-----------------------'
print ('{0} -> {1} remaining'.format(k, curr_state['decks'][k]))
print ('-----------------------')
for card in v:
card_hash[count] = card['uuid']
reverse_hash[card['uuid']] = count
count += 1
print_card(card)
print ''
print ('')
for player in curr_state['players']:
print_player(player)

Expand All @@ -131,10 +131,10 @@ def handle_resp(resp, do_poll=True):

if 'result' in resp.json():
if resp.json()['result'].get('error'):
print resp.json()['result']['error']
print(resp.json()['result']['error'])
return
else:
print resp.json()['result']
print(resp.json()['result'])

curr_state = resp.json()['state']
print_state()
Expand All @@ -151,14 +151,14 @@ def next():

def list_games():
resp = requests.get(server + '/list')
print 'Games:'
print '------'
print ('Games:')
print ('------')
for game in resp.json()['games']:
active = 'waiting'
if game['in_progress']:
active = 'in progress'
print '{0} -> {1} player(s) | {2}'.format(game['uuid'], game['players'], active)
print ''
print ('{0} -> {1} player(s) | {2}'.format(game['uuid'], game['players'], active))
print ('')

class Client(cmd.Cmd):
prompt = '> '
Expand Down Expand Up @@ -203,7 +203,7 @@ def do_print(self, line):
print_state()

def do_EOF(self, line):
print ''
print ('')
return True

def do_list(self, line):
Expand Down
Empty file added client/test_agent.py
Empty file.
12 changes: 0 additions & 12 deletions docker-compose.yml

This file was deleted.

4 changes: 3 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
flask
requests

torch
torchvision
gym
1 change: 1 addition & 0 deletions server/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__all__ = ['player_and_game']
188 changes: 188 additions & 0 deletions server/dqn.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
import gym
import collections
import random

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from flask import Flask
from splendor_env import *
import numpy as np

#Hyperparameters
learning_rate = 0.0005
gamma = 0.98
buffer_limit = 50000
batch_size = 32

class ReplayBuffer():
def __init__(self):
self.buffer = collections.deque(maxlen=buffer_limit)

def put(self, transition):
self.buffer.append(transition)

def sample(self, n):
mini_batch = random.sample(self.buffer, n)
s_lst, a_lst, r_lst, s_prime_lst, done_mask_lst = [], [], [], [], []

for transition in mini_batch:
s, a, r, s_prime, done_mask = transition
s_lst.append(s)
a_lst.append([a])
r_lst.append([r])
s_prime_lst.append(s_prime)
done_mask_lst.append([done_mask])

return torch.tensor(s_lst, dtype=torch.float), torch.tensor(a_lst), \
torch.tensor(r_lst), torch.tensor(s_prime_lst, dtype=torch.float), \
torch.tensor(done_mask_lst)

def size(self):
return len(self.buffer)

class DQN(nn.Module):
def __init__(self):
super(DQN, self).__init__()
self.fc1 = nn.Linear(121, 256)
self.fc2 = nn.Linear(256, 512)
self.fc3 = nn.Linear(512, 256)
self.fc4 = nn.Linear(256, 128)
self.fc5 = nn.Linear(128, 27)

def forward(self, x):
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = F.relu(self.fc3(x))
x = F.relu(self.fc4(x))
x = self.fc5(x)
return x

class Agent():
def __init__(self):
self.model = DQN()
self.target_model = DQN()
self.model.load_state_dict(self.model.state_dict())
self.memory = ReplayBuffer()
self.optimizer = optim.Adam(self.model.parameters(), lr=learning_rate)
self.action = [[1,1,1,0,0, 0,0],
[1,1,0,1,0, 0,0],
[1,1,0,0,1, 0,0],
[1,0,1,1,0, 0,0],
[1,0,1,0,1, 0,0],
[1,0,0,1,1, 0,0],
[0,1,1,1,0, 0,0],
[0,1,1,0,1, 0,0],
[0,1,0,1,1, 0,0],
[0,0,1,1,1, 0,0],
[2,0,0,0,0, 0,0],
[0,2,0,0,0, 0,0],
[0,0,2,0,0, 0,0],
[0,0,0,2,0, 0,0],
[0,0,0,0,2, 0,0],
[0,0,0,0,0, 1,0],
[0,0,0,0,0, 1,1],
[0,0,0,0,0, 1,2],
[0,0,0,0,0, 1,3],
[0,0,0,0,0, 2,0],
[0,0,0,0,0, 2,1],
[0,0,0,0,0, 2,2],
[0,0,0,0,0, 2,3],
[0,0,0,0,0, 3,0],
[0,0,0,0,0, 3,1],
[0,0,0,0,0, 3,2],
[0,0,0,0,0, 3,3]]


def select_action(self, obs, epsilon):
out = self.model.forward(obs)
coin = random.random()
if coin < epsilon:
return random.randint(0,26)
else:
return random.randint(14,26)
# return out.argmax().item()

def train(q, q_target, memory, optimizer):
for i in range(10):
s,a,r,s_prime,done_mask = memory.sample(batch_size)

q_out = q(s)
q_a = q_out.gather(1,a)
max_q_prime = q_target(s_prime).max(1)[0].unsqueeze(1)
target = r + gamma * max_q_prime * done_mask
loss = F.smooth_l1_loss(q_a, target)

optimizer.zero_grad()
loss.backward()
optimizer.step()


def state2np(state_dict):
new_state = np.array([])
for state in state_dict.values():
state = np.array(state).flatten()
new_state = np.concatenate((new_state, state), axis=0)
return new_state


def main():
GM = GameManager("Aircraft")
GM.join_game()
GM.join_game()
GM.start_game()
env = GM.game

print_interval = 20
score = 0.0

agent = Agent()

for n_epi in range(2):
epsilon = max(0.01, 0.08 - 0.01*(n_epi/200)) #Linear annealing from 8% to 1%
s = env.reset()
s = state2np(s)

done = False
while not done:
while True:
a = agent.select_action(torch.from_numpy(s).float(), epsilon)
dic = env.filter()

b = agent.action[a]
if b[5]>0 and dic['cards'][b[5]-1][b[6]]==0:
continue
if b[5]>0 and dic['cards'][b[5]-1][b[6]]==1:
break
flag = True
for i in range(4):
if dic['gems'][i]<b[i]:
flag = False
break
if flag:
break
print(a)
s_prime, r, done, info = env.step(agent.action[a])
print(s_prime['player_state'][0], s_prime['player_state'][1])
s_prime = state2np(s_prime)
done_mask = 0.0 if done else 1.0
agent.memory.put((s,a,r/100.0,s_prime, done_mask))
s = s_prime

score += r
if agent.memory.size()>2000:
agent.train(agent.model, agent.target_model, agent.memory, agent.optimizer)

if n_epi%print_interval==0 and n_epi!=0:
agent.target_model.load_state_dict(agent.model.state_dict())
print("n_episode :{}, score : {:.1f}, n_buffer : {}, eps : {:.1f}%".format(
n_epi, score/print_interval, agent.memory.size(), epsilon*100))
if done:
break
#score = 0.0
print("Done!")
env.close()

if __name__ == '__main__':
main()
Loading