Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -95,3 +95,6 @@ Desktop.ini
*claude*
*Claude*
*CLAUDE*
**/.ipynb_checkpoints/
**/.DS_Store
**/__pycache__/
7 changes: 7 additions & 0 deletions src/envs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,13 @@ Executes Python code in a sandboxed environment. Demonstrates:

See: [`coding_env/README.md`](coding_env/README.md)

### Connect4 Environment
Location: `src/envs/connect4_env/`

Wraps the `gym-connect4` implementation to provide a turnkey board-game benchmark that follows the OpenEnv API, including typed models, HTTP client, and Docker image.

See: [`connect4_env/README.md`](connect4_env/README.md)

## Best Practices

### 1. Type Safety
Expand Down
21 changes: 21 additions & 0 deletions src/envs/connect_four/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Connect Four (OpenSpiel) — OpenEnv Wrapper

This environment wraps **OpenSpiel**’s `connect_four` and exposes an OpenEnv-style API.

## Observation
- **Board**: `6 x 7` int grid in the _agent’s_ view
- `0` empty, `+1` agent discs (player 0), `-1` opponent discs (player 1).
- **Legal actions**: playable columns `[0..6]`.
- **current_player**: `+1` if agent to move, `-1` otherwise.
- **reward**: scalar, agent centric (`+1` win, `-1` loss, `0` otherwise).

## Endpoints
- `POST /reset` → `{ observation, state }`
- `POST /step` w/ `{"column": int}` → `{ observation, state }`
- `GET /state` → current metadata
- `POST /close` → cleanup

## Local run
```bash
pip install "open_spiel>=1.6" fastapi "uvicorn[standard]" numpy
uvicorn src.envs.connect_four.server.app:app --host 0.0.0.0 --port 8020
9 changes: 9 additions & 0 deletions src/envs/connect_four/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from .models import ConnectFourAction, ConnectFourObservation, ConnectFourState
from .client import ConnectFourEnvClient

__all__ = [
"ConnectFourAction",
"ConnectFourObservation",
"ConnectFourState",
"ConnectFourEnvClient",
]
40 changes: 40 additions & 0 deletions src/envs/connect_four/client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from __future__ import annotations
import requests
from typing import Tuple
from .models import ConnectFourAction, ConnectFourObservation, ConnectFourState


class ConnectFourEnvClient:
"""
Tiny HTTP client for the Connect Four server.

Example:
env = ConnectFourEnvClient("http://localhost:8020")
obs, st = env.reset()
obs, st = env.step(ConnectFourAction(column=3))
"""
def __init__(self, base_url: str):
self.base = base_url.rstrip("/")

def reset(self) -> Tuple[ConnectFourObservation, ConnectFourState]:
r = requests.post(f"{self.base}/reset", timeout=30)
r.raise_for_status()
payload = r.json()
return ConnectFourObservation(**payload["observation"]), ConnectFourState(**payload["state"])

def step(self, action: ConnectFourAction) -> Tuple[ConnectFourObservation, ConnectFourState]:
r = requests.post(f"{self.base}/step", json=action.model_dump(), timeout=30)
r.raise_for_status()
payload = r.json()
return ConnectFourObservation(**payload["observation"]), ConnectFourState(**payload["state"])

def state(self) -> ConnectFourState:
r = requests.get(f"{self.base}/state", timeout=15)
r.raise_for_status()
return ConnectFourState(**r.json())

def close(self) -> None:
try:
requests.post(f"{self.base}/close", timeout=10)
except Exception:
pass
31 changes: 31 additions & 0 deletions src/envs/connect_four/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from __future__ import annotations
from typing import Any, Dict, List, Optional
from pydantic import BaseModel, Field


class ConnectFourAction(BaseModel):
column: int = Field(..., ge=0, le=6, description="Playable column 0..6")


class ConnectFourObservation(BaseModel):
# 6x7 int grid: 0 empty, +1 agent discs, -1 opponent discs
board: List[List[int]]
# list of playable columns (0..6), empty when done=True
legal_actions: List[int]
# +1 if agent (player 0) to move, -1 otherwise
current_player: int
# last column played, or None at the start
last_move: Optional[int] = None
# terminal flag
done: bool
# scalar reward in agent’s perspective: +1 win, -1 loss, 0 else
reward: float
# passthrough metadata
info: Dict[str, Any] = {}


class ConnectFourState(BaseModel):
rows: int = 6
cols: int = 7
move_count: int = 0
episode_id: str = ""
24 changes: 24 additions & 0 deletions src/envs/connect_four/server/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
FROM python:3.11-slim

# System basics (git not strictly required for OpenSpiel but handy for debugging)
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential git \
&& rm -rf /var/lib/apt/lists/*

# Python deps
# - open_spiel from PyPI (>=1.6 ships Linux wheels)
# - pin numpy<2.0 for broad compatibility with older stacks
RUN pip install --no-cache-dir "fastapi>=0.112" "uvicorn[standard]>=0.30" "numpy>=1.24,<2.0" "open_spiel>=1.6"

# Copy project
WORKDIR /app
COPY . /app/

# Defaults (override at runtime)
ENV PORT=8020
ENV OPENSPIEL_GAME=connect_four
ENV CONNECT4_AUTOPLAY_OPPONENT=false
ENV CONNECT4_OPP_POLICY=random

EXPOSE 8020
CMD ["sh", "-c", "uvicorn src.envs.connect_four.server.app:app --host 0.0.0.0 --port ${PORT}"]
11 changes: 11 additions & 0 deletions src/envs/connect_four/server/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

"""Connect Four environment server components."""

from .connect_four_environment import ConnectFourEnvironment

__all__ = ["ConnectFourEnvironment"]
70 changes: 70 additions & 0 deletions src/envs/connect_four/server/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
from __future__ import annotations
import os
from typing import Optional

from fastapi import FastAPI
from pydantic import BaseModel

from ..models import ConnectFourAction, ConnectFourObservation, ConnectFourState
from .connect_four_environment import (
ConnectFourEnvironment,
ConnectFourConfig,
)

# ------------ env config from environment variables ------------
PORT = int(os.getenv("PORT", "8020"))
GAME_STRING = os.getenv("OPENSPIEL_GAME", "connect_four")
AUTO_OPP = os.getenv("CONNECT4_AUTOPLAY_OPPONENT", "false").lower() in {"1", "true", "yes"}
OPP_POLICY = os.getenv("CONNECT4_OPP_POLICY", "random") # random | lowest | highest

# ------------------------- FastAPI app -------------------------
app = FastAPI(title="OpenEnv • Connect Four (OpenSpiel)", version="1.0.0")

_env: Optional[ConnectFourEnvironment] = None
_state = ConnectFourState()

def _dump(model: BaseModel) -> dict:
return model.model_dump() if hasattr(model, "model_dump") else model.dict()

def _ensure_env() -> ConnectFourEnvironment:
global _env
if _env is None:
cfg = ConnectFourConfig(
game_string=GAME_STRING,
autoplay_opponent=AUTO_OPP,
opponent_policy=OPP_POLICY,
)
_env = ConnectFourEnvironment(cfg)
return _env

# --------------------------- endpoints --------------------------

@app.post("/reset")
def reset():
env = _ensure_env()
obs_dict, st_dict = env.reset()
global _state
_state = ConnectFourState(**st_dict)
return {"observation": _dump(ConnectFourObservation(**obs_dict)), "state": _dump(_state)}

@app.post("/step")
def step(action: ConnectFourAction):
env = _ensure_env()
obs_dict, st_dict = env.step(action.column)
global _state
_state = ConnectFourState(**st_dict)
return {"observation": _dump(ConnectFourObservation(**obs_dict)), "state": _dump(_state)}

@app.get("/state")
def state():
return _dump(_state)

@app.post("/close")
def close():
global _env
try:
if _env is not None:
_env.close()
finally:
_env = None
return {"ok": True}
Loading