diff --git a/.github/workflows/deploy-hf-env.yml b/.github/workflows/deploy-hf-env.yml
index d84833df..12849a3f 100644
--- a/.github/workflows/deploy-hf-env.yml
+++ b/.github/workflows/deploy-hf-env.yml
@@ -15,6 +15,7 @@ on:
           - 'chat_env'
           - 'atari_env'
           - 'openspiel_env'
+          - 'maze_env'
       custom_environment:
         description: 'Custom environment to deploy (leave empty for none)'
         required: false
@@ -63,7 +64,7 @@ jobs:
             if [ "${{ github.event.inputs.environment }}" = "all" ]; then
               echo "deploy_all=true" >> $GITHUB_OUTPUT
               echo "use_matrix=true" >> $GITHUB_OUTPUT
-              echo "environments=echo_env,coding_env,chat_env,atari_env,openspiel_env" >> $GITHUB_OUTPUT
+              echo "environments=echo_env,coding_env,chat_env,atari_env,openspiel_env,maze_env" >> $GITHUB_OUTPUT
               echo "Manual trigger - deploying all environments with matrix"
             else
               echo "deploy_all=false" >> $GITHUB_OUTPUT
@@ -78,14 +79,14 @@ jobs:
           if git diff --name-only HEAD~1 HEAD | grep -E '^src/core/' > /dev/null; then
             echo "deploy_all=true" >> $GITHUB_OUTPUT
             echo "use_matrix=true" >> $GITHUB_OUTPUT
-            echo "environments=echo_env,coding_env,chat_env,atari_env,openspiel_env" >> $GITHUB_OUTPUT
+            echo "environments=echo_env,coding_env,chat_env,atari_env,openspiel_env,maze_env" >> $GITHUB_OUTPUT
             echo "Core files changed - deploying all environments with matrix"
             exit 0
           fi
           
           # Check which specific environments changed
           changed_envs=()
-          for env in echo_env coding_env chat_env atari_env openspiel_env; do
+          for env in echo_env coding_env chat_env atari_env openspiel_env maze_env; do
             if git diff --name-only HEAD~1 HEAD | grep -E "^src/envs/$env/" > /dev/null; then
               changed_envs+=("$env")
             fi
@@ -110,7 +111,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        environment: [echo_env, coding_env, chat_env, atari_env, openspiel_env]
+        environment: [echo_env, coding_env, chat_env, atari_env, openspiel_env, maze_env]
     permissions:
       contents: read
     
diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml
index 32452a1a..92062cd9 100644
--- a/.github/workflows/docker-build.yml
+++ b/.github/workflows/docker-build.yml
@@ -79,6 +79,8 @@ jobs:
             dockerfile: src/envs/atari_env/server/Dockerfile
           - name: git-env
             dockerfile: src/envs/git_env/server/Dockerfile
+          - name: maze-env
+            dockerfile: src/envs/maze_env/server/Dockerfile
           - name: my-env  # Add your environment here
             dockerfile: src/envs/connect4_env/server/Dockerfile
           - name: textarena-env
diff --git a/examples/maze_human.py b/examples/maze_human.py
new file mode 100644
index 00000000..1efe3912
--- /dev/null
+++ b/examples/maze_human.py
@@ -0,0 +1,99 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+Simple example of using Maze environment with OpenEnv.
+
+This demonstrates:
+1. Connecting to the Maze environment server
+2. Resetting the environment
+3. Taking actions
+4. Observing rewards
+5. Inspecting environment state
+
+Usage:
+    python examples/maze_simple.py
+"""
+
+import sys
+from pathlib import Path
+
+# Add src to path
+sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
+import numpy as np
+from envs.maze_env import MazeEnv, MazeAction
+
+
+def main():
+    print("🧩 Simple Maze Environment Example")
+    print("=" * 60)
+
+    # Connect to environment server
+    # Ensure server is running: python -m envs.maze_env.server.app
+    env = MazeEnv(base_url="http://localhost:8000")
+    maze = np.array([
+            [0, 1, 0, 0, 0, 0, 0, 0],
+            [0, 1, 0, 1, 0, 1, 0, 0],
+            [0, 0, 0, 1, 1, 0, 1, 0],
+            [0, 1, 0, 1, 0, 0, 0, 0],
+            [1, 0, 0, 1, 0, 1, 0, 0],
+            [0, 0, 0, 1, 0, 1, 1, 1],
+            [0, 1, 1, 0, 0, 0, 0, 0],
+            [0, 0, 0, 0, 0, 1, 0, 0]
+        ])
+    try:
+        # Reset environment
+        print("\n📍 Resetting environment...")
+        result = env.reset()
+
+        print(f"   Initial position: {result.observation.position}")
+        print(f"   Legal actions: {result.observation.legal_actions}")
+        # Note: Initial total reward is 0 however it is observed it doesn't resets if you run this example again during the same server app session
+        print(f"   Initial Total reward: {result.observation.total_reward}")
+        # Run one episode
+        print("\n🚶 Navigating through maze...")
+        step = 0
+
+        while not result.done and step < 25:
+            # Choose random legal action
+            print(f"   Current position: {result.observation.position}")
+            print(f"   Legal actions: {result.observation.legal_actions}")
+            env.render_ascii_maze(maze,result.observation.position,[0,0],[maze.shape[0],maze.shape[1]])
+            action_id = int(input("Make any move from the legal actions"))
+            # Take action
+            result = env.step(MazeAction(action=action_id))
+            reward = result.observation.total_reward or 0
+
+            print(f"   Step {step + 1}: action={action_id}, pos={result.observation.position}, reward={reward:.2f}, done={result.done}")
+            step += 1
+            print("-----------------------------------------------------")
+
+        print("\n✅ Episode finished!")
+        print(f"   Total steps: {step}")
+        print(f"   Total reward: {reward}")
+
+        # Get environment state
+        state = env.state()
+        print("\n📊 Environment State:")
+        print(f"   Episode ID: {state.episode_id}")
+        print(f"   Step count: {state.step_count}")
+        print(f"   Done: {state.done}")
+
+    except Exception as e:
+        print(f"\n❌ Error: {e}")
+        print("\nMake sure the server is running:")
+        print("  python -m envs.maze_env.server.app")
+        print("\nOr start with Docker:")
+        print("  docker run -p 8000:8000 maze-env:latest")
+
+    finally:
+        env.close()
+        print("\n👋 Done!")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/maze_simple.py b/examples/maze_simple.py
new file mode 100644
index 00000000..2b5f5e5f
--- /dev/null
+++ b/examples/maze_simple.py
@@ -0,0 +1,103 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+Simple example of using Maze environment with OpenEnv.
+
+This demonstrates:
+1. Connecting to the Maze environment server
+2. Resetting the environment
+3. Taking actions
+4. Observing rewards
+5. Inspecting environment state
+
+Usage:
+    python examples/maze_simple.py
+"""
+
+import sys
+from pathlib import Path
+
+# Add src to path
+sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
+import numpy as np
+from envs.maze_env import MazeEnv, MazeAction
+
+
+def main():
+    print("🧩 Simple Maze Environment Example")
+    print("=" * 60)
+
+    # Connect to environment server
+    # Ensure server is running: python -m envs.maze_env.server.app
+    env = MazeEnv(base_url="http://localhost:8000")
+    maze = np.array([
+            [0, 1, 0, 0, 0, 0, 0, 0],
+            [0, 1, 0, 1, 0, 1, 0, 0],
+            [0, 0, 0, 1, 1, 0, 1, 0],
+            [0, 1, 0, 1, 0, 0, 0, 0],
+            [1, 0, 0, 1, 0, 1, 0, 0],
+            [0, 0, 0, 1, 0, 1, 1, 1],
+            [0, 1, 1, 0, 0, 0, 0, 0],
+            [0, 0, 0, 0, 0, 1, 0, 0]
+        ])
+    try:
+        # Reset environment
+        print("\n📍 Resetting environment...")
+        result = env.reset()
+
+        print(f"   Initial position: {result.observation.position}")
+        print(f"   Legal actions: {result.observation.legal_actions}")
+        # Note: Initial total reward is 0 however it is observed it doesn't resets if you run this example again during the same server app session
+        print(f"   Initial Total reward: {result.observation.total_reward}")
+
+        # Run one episode
+        print("\n🚶 Navigating through maze...")
+        step = 0
+        total_reward = 0
+
+        while not result.done and step < 20:
+            # Choose random legal action
+            print(f"   Current position: {result.observation.position}")
+            print(f"   Legal actions: {result.observation.legal_actions}")
+            env.render_ascii_maze(maze,result.observation.position,[0,0],[maze.shape[0],maze.shape[1]])
+            action_id = result.observation.legal_actions[step % len(result.observation.legal_actions)]
+            # Take action
+            result = env.step(MazeAction(action=action_id))
+
+            reward = result.reward or 0
+            total_reward += reward
+
+            print(f"   Step {step + 1}: action={action_id}, pos={result.observation.position}, reward={reward:.2f}, done={result.done}")
+            step += 1
+            print("-----------------------------------------------------")
+
+        print("\n✅ Episode finished!")
+        print(f"   Total steps: {step}")
+        print(f"   Total reward: {total_reward}")
+
+        # Get environment state
+        state = env.state()
+        print("\n📊 Environment State:")
+        print(f"   Episode ID: {state.episode_id}")
+        print(f"   Step count: {state.step_count}")
+        print(f"   Done: {state.done}")
+
+    except Exception as e:
+        print(f"\n❌ Error: {e}")
+        print("\nMake sure the server is running:")
+        print("  python -m envs.maze_env.server.app")
+        print("\nOr start with Docker:")
+        print("  docker run -p 8000:8000 maze-env:latest")
+
+    finally:
+        env.close()
+        print("\n👋 Done!")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/deploy_to_hf.sh b/scripts/deploy_to_hf.sh
index 20256c9a..ef212ffb 100755
--- a/scripts/deploy_to_hf.sh
+++ b/scripts/deploy_to_hf.sh
@@ -290,6 +290,13 @@ DOCKERFILE_EOF
             echo "OpenSpiel builds can take 10-15 minutes due to C++ compilation"
             return  # Skip the common parts since OpenSpiel has its own complete Dockerfile
             ;;
+        "maze_env")
+         cat >> "$CURRENT_STAGING_DIR/Dockerfile" << 'DOCKERFILE_EOF'
+# Install additional dependencies for ChatEnvironment
+RUN pip install --no-cache-dir numpy
+DOCKERFILE_EOF
+            # Maze env requre
+            ;;
     esac
 
     # Add common parts
diff --git a/scripts/prepare_hf_deployment.sh b/scripts/prepare_hf_deployment.sh
index 23fd4779..381edffd 100755
--- a/scripts/prepare_hf_deployment.sh
+++ b/scripts/prepare_hf_deployment.sh
@@ -157,6 +157,7 @@ README_EOF
         "chat_env") ENV_CLASS="ChatEnv" ;;
         "atari_env") ENV_CLASS="AtariEnv" ;;
         "openspiel_env") ENV_CLASS="OpenSpielEnv" ;;
+        "maze_env") ENV_CLASS="MazeEnv" ;;
         *) ENV_CLASS="Env" ;;
     esac
 
diff --git a/src/envs/maze_env/README.md b/src/envs/maze_env/README.md
new file mode 100644
index 00000000..c2b4e5cd
--- /dev/null
+++ b/src/envs/maze_env/README.md
@@ -0,0 +1,123 @@
+# Maze Environment
+
+Integration of Maze game with the OpenEnv framework.
+
+## Architecture
+
+```
+┌────────────────────────────────────┐
+│ RL Training Code (Client)          │
+│   MazeEnv.step(action)             │
+└──────────────┬─────────────────────┘
+               │ HTTP
+┌──────────────▼─────────────────────┐
+│ FastAPI Server (Docker)            │
+│   MazeEnvironment                  │
+│     ├─ Wraps Maze environment      │
+│     └─ Agent controls player       │
+└────────────────────────────────────┘
+```
+
+## Installation & Usage
+
+### Option 1: Local Development (without Docker)
+
+**Requirements:**
+- Python 3.11+
+- Numpy
+
+```python
+from envs.maze_env import MazeEnv, MazeAction
+
+# Start local server manually
+# python -m envs.maze_env.server.app
+
+# Connect to local server
+env = MazeEnv(base_url="http://localhost:8000")
+
+# Reset environment
+result = env.reset()
+print(f"Initial state: {result.observation.info_state}")
+print(f"Legal actions: {result.observation.legal_actions}")
+
+# Take actions
+for _ in range(10):
+    action_id = result.observation.legal_actions[0]  # Choose first legal action
+    result = env.step(MazeAction(action_id=action_id))
+    print(f"Reward: {result.reward}, Done: {result.done}")
+    if result.done:
+        break
+
+# Cleanup
+env.close()
+```
+
+### Option 2: Docker (Recommended)
+
+**Build Docker image:**
+
+```bash
+cd OpenEnv
+docker build -f src/envs/maze_env/server/Dockerfile -t maze-env:latest .
+```
+
+**Use with from_docker_image():**
+
+```python
+from envs.maze_env import MazeEnv, MazeAction
+
+# Automatically starts container
+env = MazeEnv.from_docker_image("maze-env:latest")
+
+result = env.reset()
+result = env.step(MazeAction(action_id=0))
+
+env.close()  # Stops container
+```
+
+## Configuration
+
+### Variables
+
+- `maze` : Maze as a numpy array saved in mazearray.py
+
+### Example
+
+```bash
+docker run -p 8000:8000 maze-env:latest
+```
+
+## API Reference
+
+### MazeAction
+
+```python
+@dataclass
+class MazeAction(Action):
+    action: int                        # Action to be taken
+```
+
+### MazeObservation
+
+```python
+@dataclass
+class MazeObservation(Observation):
+    position: List[int]  # [row, col]
+    total_reward: float  # Total reward
+    legal_actions: List[int] = field(default_factory=list)  # Legal action based on the current position
+```
+
+### MazeState
+
+```python
+@dataclass
+class MazeState(State):
+    episode_id: str     # Episode
+    step_count: int     # Number of steps
+    done: bool = False  # Solve status
+
+```
+
+## References
+
+- [Maze Environment](https://github.com/erikdelange/Reinforcement-Learning-Maze)
diff --git a/src/envs/maze_env/__init__.py b/src/envs/maze_env/__init__.py
new file mode 100644
index 00000000..0c2c79f7
--- /dev/null
+++ b/src/envs/maze_env/__init__.py
@@ -0,0 +1,16 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+Maze Environment Integration.
+
+This module provides integration between Maze game and the OpenEnv framework.
+"""
+
+from .client import MazeEnv
+from .models import MazeAction, MazeObservation, MazeState
+
+__all__ = ["MazeEnv", "MazeAction", "MazeObservation", "MazeState"]
diff --git a/src/envs/maze_env/client.py b/src/envs/maze_env/client.py
new file mode 100644
index 00000000..dfbc1013
--- /dev/null
+++ b/src/envs/maze_env/client.py
@@ -0,0 +1,92 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+MazeEnv HTTP Client.
+
+This module provides the client for connecting to a Maze Environment server
+over HTTP.
+"""
+
+from __future__ import annotations
+
+from typing import Any, Dict, List, TYPE_CHECKING
+
+from core.client_types import StepResult
+from core.http_env_client import HTTPEnvClient
+
+from .models import MazeAction, MazeObservation, MazeState
+
+if TYPE_CHECKING:
+    pass
+
+
+class MazeEnv(HTTPEnvClient[MazeAction, MazeObservation]):
+    """HTTP client for Maze Environment."""
+
+    def render_ascii_maze(
+        self,
+        maze: List[List[int]],
+        position: List[int],
+        start: List[int],
+        goal: List[int],
+    ) -> None:
+        """
+        Render the maze grid as ASCII art in the terminal.
+        - 0 = free cell
+        - 1 = wall
+        - S = start
+        - G = goal
+        - P = player
+        - E = exit
+        """
+        print("\nCurrent Maze State:")
+        rows, cols = len(maze), len(maze[0])
+        for r in range(rows):
+            line = ""
+            for c in range(cols):
+                if [r, c] == position:
+                    line += "P "
+                elif [r, c] == start:
+                    line += "S "
+                elif [r, c] == goal:
+                    line += "G "
+                elif maze[r][c] == 1:
+                    line += "█ "
+                elif r == rows - 1 and c == cols - 1:
+                    line += "E "
+                else:
+                    line += ". "
+            print(line)
+        print()
+
+    def _step_payload(self, action: MazeAction) -> Dict[str, Any]:
+        """Prepare payload to send to the environment server."""
+        return {"action": action.action}
+
+    def _parse_result(self, payload: Dict[str, Any]) -> StepResult[MazeObservation]:
+        """Parse the response from the server into MazeObservation + reward/done."""
+        obs_data = payload.get("observation", {})
+
+        observation = MazeObservation(
+            position=obs_data.get("position", []),
+            total_reward=obs_data.get("total_reward", 0.0),
+            legal_actions=obs_data.get("legal_actions", []),
+        )
+
+        return StepResult(
+            observation=observation,
+            reward=payload.get("reward", 0.0),
+            done=payload.get("done", False),
+        )
+
+    def _parse_state(self, payload: Dict[str, Any]) -> MazeState:
+        """Parse environment state from payload."""
+        return MazeState(
+            episode_id=payload.get("episode_id", ""),
+            step_count=payload.get("step_count", 0),
+            done=payload.get("done", False),
+        )
diff --git a/src/envs/maze_env/models.py b/src/envs/maze_env/models.py
new file mode 100644
index 00000000..35a00b14
--- /dev/null
+++ b/src/envs/maze_env/models.py
@@ -0,0 +1,37 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+Data models for Maze Environment.
+
+This module defines the Action, Observation, and State types for Maze games.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import List
+
+from core.env_server import Action, Observation, State
+
+
+@dataclass
+class MazeAction(Action):
+    action: int
+
+
+@dataclass
+class MazeObservation(Observation):
+    position: List[int]  # [row, col]
+    total_reward: float
+    legal_actions: List[int] = field(default_factory=list)
+
+
+@dataclass
+class MazeState(State):
+    episode_id: str
+    step_count: int
+    done: bool = False
diff --git a/src/envs/maze_env/server/Dockerfile b/src/envs/maze_env/server/Dockerfile
new file mode 100644
index 00000000..2d2e3d6f
--- /dev/null
+++ b/src/envs/maze_env/server/Dockerfile
@@ -0,0 +1,43 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# Use the pre-built OpenEnv base image
+# Built from: docker build -t openenv-base:latest -f src/core/Dockerfile.openenv-base .
+# In CI, this can be overridden to use GHCR or other registries
+ARG OPENENV_BASE_IMAGE=openenv-base:latest
+FROM ${OPENENV_BASE_IMAGE}
+
+# Install Python dependencies that all environments need
+RUN pip install --no-cache-dir \
+    numpy>=2.3.4 \
+    matplotlib>=3.10.7
+
+# Set working directory
+WORKDIR /app
+
+# Copy OpenEnv core (already expected in base image but ensure updated)
+COPY src/core/ /app/src/core/
+
+# Copy Maze environment
+COPY src/envs/maze_env/ /app/src/envs/maze_env/
+
+# Copy README for web interface documentation
+COPY src/envs/maze_env/README.md /app/README.md
+
+# Extend Python path for OpenEnv (base image sets PYTHONPATH=/app/src)
+# We prepend Maze paths
+ENV PYTHONPATH=/app/src
+
+
+# Health check (curl provided by openenv-base)
+HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
+    CMD curl -f http://localhost:8000/health || exit 1
+
+# Expose default port
+EXPOSE 8000
+
+# Run the FastAPI server (uvicorn installed by openenv-base)
+CMD ["uvicorn", "envs.maze_env.server.app:app", "--host", "0.0.0.0", "--port", "8000"]
diff --git a/src/envs/maze_env/server/__init__.py b/src/envs/maze_env/server/__init__.py
new file mode 100644
index 00000000..1fca47db
--- /dev/null
+++ b/src/envs/maze_env/server/__init__.py
@@ -0,0 +1,11 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""Server-side implementation for Maze environments."""
+from .maze import Maze, Status
+from .maze_environment import MazeEnvironment
+
+__all__ = ["Maze", "MazeEnvironment", "Status"]
diff --git a/src/envs/maze_env/server/app.py b/src/envs/maze_env/server/app.py
new file mode 100644
index 00000000..d81ed695
--- /dev/null
+++ b/src/envs/maze_env/server/app.py
@@ -0,0 +1,44 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+FastAPI application for the Maze Environment.
+
+This module creates an HTTP server that exposes Maze game
+over HTTP endpoints, making them compatible with HTTPEnvClient.
+
+Usage:
+    # Development (with auto-reload):
+    uvicorn envs.maze_env.server.app:app --reload --host 0.0.0.0 --port 8000
+
+    # Production:
+    uvicorn envs.maze_env.server.app:app --host 0.0.0.0 --port 8000 --workers 4
+
+    # Or run directly:
+    python -m envs.maze_env.server.app
+
+Variables:
+    maze: np.array - Maze as a numpy array
+"""
+
+from core.env_server import create_app
+from ..models import MazeAction, MazeObservation
+from .maze_environment import MazeEnvironment
+from .mazearray import maze
+
+# Get game configuration from environment variables
+
+# Create the environment instance
+env = MazeEnvironment(maze_array=maze, start_cell=(0, 0), exit_cell=(7, 7))
+
+# Create the FastAPI app with web interface and README integration
+app = create_app(env, MazeAction, MazeObservation, env_name="maze_env")
+
+
+if __name__ == "__main__":
+    import uvicorn
+
+    uvicorn.run(app, host="0.0.0.0", port=8000)
diff --git a/src/envs/maze_env/server/maze.py b/src/envs/maze_env/server/maze.py
new file mode 100644
index 00000000..1385654f
--- /dev/null
+++ b/src/envs/maze_env/server/maze.py
@@ -0,0 +1,395 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# Derived from https://github.com/erikdelange/Reinforcement-Learning-Maze/blob/master/main.py (MIT LICENSE)
+# Original Author: Erik de Lange <erikdelange@users.noreply.github.com>
+
+import logging
+from enum import Enum, IntEnum
+
+import numpy as np
+
+
+class Cell(IntEnum):
+    EMPTY = 0  # indicates empty cell where the agent can move to
+    OCCUPIED = 1  # indicates cell which contains a wall and cannot be entered
+    CURRENT = 2  # indicates current cell of the agent
+
+
+class Action(IntEnum):
+    MOVE_LEFT = 2
+    MOVE_RIGHT = 3
+    MOVE_UP = 0
+    MOVE_DOWN = 1
+
+
+class Render(Enum):
+    NOTHING = 0
+    TRAINING = 1
+    MOVES = 2
+
+
+class Status(Enum):
+    WIN = 0
+    LOSE = 1
+    PLAYING = 2
+
+
+class Maze:
+    """A maze with walls. An agent is placed at the start cell and must find the exit cell by moving through the maze.
+
+    The layout of the maze and the rules how to move through it are called the environment. An agent is placed
+    at start_cell. The agent chooses actions (move left/right/up/down) in order to reach the exit_cell. Every
+    action results in a reward or penalty which are accumulated during the game. Every move gives a small
+    penalty (-0.05), returning to a cell the agent visited earlier a bigger penalty (-0.25) and running into
+    a wall a large penalty (-0.75). The reward (+10.0) is collected when the agent reaches the exit. The
+    game always reaches a terminal state; the agent either wins or looses. Obviously reaching the exit means
+    winning, but if the penalties the agent is collecting during play exceed a certain threshold the agent is
+    assumed to wander around clueless and looses.
+
+    A note on cell coordinates:
+    The cells in the maze are stored as (col, row) or (x, y) tuples. (0, 0) is the upper left corner of the maze.
+    This way of storing coordinates is in line with what matplotlib's plot() function expects as inputs. The maze
+    itself is stored as a 2D numpy array so cells are accessed via [row, col]. To convert a (col, row) tuple
+    to (row, col) use (col, row)[::-1]
+    """
+
+    actions = [
+        Action.MOVE_LEFT,
+        Action.MOVE_RIGHT,
+        Action.MOVE_UP,
+        Action.MOVE_DOWN,
+    ]  # all possible actions
+
+    reward_exit = 10.0  # reward for reaching the exit cell
+    penalty_move = (
+        -0.05
+    )  # penalty for a move which did not result in finding the exit cell
+    penalty_visited = -0.25  # penalty for returning to a cell which was visited earlier
+    penalty_impossible_move = (
+        -0.75
+    )  # penalty for trying to enter an occupied cell or moving out of the maze
+
+    def __init__(self, maze, start_cell=(0, 0), exit_cell=None):
+        """Create a new maze game.
+
+        :param numpy.array maze: 2D array containing empty cells (= 0) and cells occupied with walls (= 1)
+        :param tuple start_cell: starting cell for the agent in the maze (optional, else upper left)
+        :param tuple exit_cell: exit cell which the agent has to reach (optional, else lower right)
+        """
+        self.maze = maze
+
+        self.__minimum_reward = (
+            -0.5 * self.maze.size
+        )  # stop game if accumulated reward is below this threshold
+
+        nrows, ncols = self.maze.shape
+        self.cells = [(col, row) for col in range(ncols) for row in range(nrows)]
+        self.empty = [
+            (col, row)
+            for col in range(ncols)
+            for row in range(nrows)
+            if self.maze[row, col] == Cell.EMPTY
+        ]
+        self.__exit_cell = (ncols - 1, nrows - 1) if exit_cell is None else exit_cell
+        self.empty.remove(self.__exit_cell)
+
+        # Check for impossible maze layout
+        if self.__exit_cell not in self.cells:
+            raise Exception(
+                "Error: exit cell at {} is not inside maze".format(self.__exit_cell)
+            )
+        if self.maze[self.__exit_cell[::-1]] == Cell.OCCUPIED:
+            raise Exception(
+                "Error: exit cell at {} is not free".format(self.__exit_cell)
+            )
+
+        # Variables for rendering using Matplotlib
+        self.__render = Render.NOTHING  # what to render
+        self.__ax1 = None  # axes for rendering the moves
+        self.__ax2 = None  # axes for rendering the best action per cell
+
+        self.reset(start_cell)
+
+    def reset(self, start_cell=(0, 0)):
+        """Reset the maze to its initial state and place the agent at start_cell.
+
+        :param tuple start_cell: here the agent starts its journey through the maze (optional, else upper left)
+        :return: new state after reset
+        """
+        if start_cell not in self.cells:
+            raise Exception(
+                "Error: start cell at {} is not inside maze".format(start_cell)
+            )
+        if self.maze[start_cell[::-1]] == Cell.OCCUPIED:
+            raise Exception("Error: start cell at {} is not free".format(start_cell))
+        if start_cell == self.__exit_cell:
+            raise Exception(
+                "Error: start- and exit cell cannot be the same {}".format(start_cell)
+            )
+
+        self.__previous_cell = self.__current_cell = start_cell
+        self.__total_reward = 0.0  # accumulated reward
+        self.__visited = set()  # a set() only stores unique values
+
+        if self.__render in (Render.TRAINING, Render.MOVES):
+            # render the maze
+            nrows, ncols = self.maze.shape
+            self.__ax1.clear()
+            self.__ax1.set_xticks(np.arange(0.5, nrows, step=1))
+            self.__ax1.set_xticklabels([])
+            self.__ax1.set_yticks(np.arange(0.5, ncols, step=1))
+            self.__ax1.set_yticklabels([])
+            self.__ax1.grid(True)
+            self.__ax1.plot(
+                *self.__current_cell, "rs", markersize=30
+            )  # start is a big red square
+            self.__ax1.text(
+                *self.__current_cell, "Start", ha="center", va="center", color="white"
+            )
+            self.__ax1.plot(
+                *self.__exit_cell, "gs", markersize=30
+            )  # exit is a big green square
+            self.__ax1.text(
+                *self.__exit_cell, "Exit", ha="center", va="center", color="white"
+            )
+            self.__ax1.imshow(self.maze, cmap="binary")
+            self.__ax1.get_figure().canvas.draw()
+            self.__ax1.get_figure().canvas.flush_events()
+
+        return self.__observe()
+
+    def __draw(self):
+        """Draw a line from the agents previous cell to its current cell."""
+        self.__ax1.plot(
+            *zip(*[self.__previous_cell, self.__current_cell]), "bo-"
+        )  # previous cells are blue dots
+        self.__ax1.plot(*self.__current_cell, "ro")  # current cell is a red dot
+        self.__ax1.get_figure().canvas.draw()
+        self.__ax1.get_figure().canvas.flush_events()
+
+    def step(self, action):
+        """Move the agent according to 'action' and return the new state, reward and game status.
+
+        :param Action action: the agent will move in this direction
+        :return: state, reward, status
+        """
+        reward = self.__execute(action)
+        self.__total_reward += reward
+        status = self.__status()
+        state = self.__observe()
+        logging.debug(
+            "action: {:10s} | reward: {: .2f} | status: {}".format(
+                Action(action).name, reward, status
+            )
+        )
+        return state, reward, status
+
+    def __execute(self, action):
+        """Execute action and collect the reward or penalty.
+
+        :param Action action: direction in which the agent will move
+        :return float: reward or penalty which results from the action
+        """
+        possible_actions = self.__possible_actions(self.__current_cell)
+
+        if not possible_actions:
+            reward = (
+                self.__minimum_reward - 1
+            )  # cannot move anywhere, force end of game
+        elif action in possible_actions:
+            col, row = self.__current_cell
+            if action == Action.MOVE_LEFT:
+                col -= 1
+            elif action == Action.MOVE_UP:
+                row -= 1
+            if action == Action.MOVE_RIGHT:
+                col += 1
+            elif action == Action.MOVE_DOWN:
+                row += 1
+
+            self.__previous_cell = self.__current_cell
+            self.__current_cell = (col, row)
+
+            if self.__render != Render.NOTHING:
+                self.__draw()
+
+            if self.__current_cell == self.__exit_cell:
+                reward = Maze.reward_exit  # maximum reward when reaching the exit cell
+            elif self.__current_cell in self.__visited:
+                reward = (
+                    Maze.penalty_visited
+                )  # penalty when returning to a cell which was visited earlier
+            else:
+                reward = (
+                    Maze.penalty_move
+                )  # penalty for a move which did not result in finding the exit cell
+
+            self.__visited.add(self.__current_cell)
+        else:
+            reward = (
+                Maze.penalty_impossible_move
+            )  # penalty for trying to enter an occupied cell or move out of the maze
+
+        return reward
+
+    def __possible_actions(self, cell=None):
+        """Create a list with all possible actions from 'cell', avoiding the maze's edges and walls.
+
+        :param tuple cell: location of the agent (optional, else use current cell)
+        :return list: all possible actions
+        """
+        if cell is None:
+            col, row = self.__current_cell
+        else:
+            col, row = cell
+
+        possible_actions = Maze.actions.copy()  # initially allow all
+
+        # now restrict the initial list by removing impossible actions
+        nrows, ncols = self.maze.shape
+        if row == 0 or (row > 0 and self.maze[row - 1, col] == Cell.OCCUPIED):
+            possible_actions.remove(Action.MOVE_UP)
+        if row == nrows - 1 or (
+            row < nrows - 1 and self.maze[row + 1, col] == Cell.OCCUPIED
+        ):
+            possible_actions.remove(Action.MOVE_DOWN)
+
+        if col == 0 or (col > 0 and self.maze[row, col - 1] == Cell.OCCUPIED):
+            possible_actions.remove(Action.MOVE_LEFT)
+        if col == ncols - 1 or (
+            col < ncols - 1 and self.maze[row, col + 1] == Cell.OCCUPIED
+        ):
+            possible_actions.remove(Action.MOVE_RIGHT)
+
+        return possible_actions
+
+    def __status(self):
+        """Return the game status.
+
+        :return Status: current game status (WIN, LOSE, PLAYING)
+        """
+        if self.__current_cell == self.__exit_cell:
+            return Status.WIN
+
+        if (
+            self.__total_reward < self.__minimum_reward
+        ):  # force end of game after too much loss
+            return Status.LOSE
+
+        return Status.PLAYING
+
+    def __observe(self):
+        """Return the state of the maze - in this game the agents current location.
+
+        :return numpy.array [1][2]: agents current location
+        """
+        return np.array([[*self.__current_cell]])
+
+    def play(self, model, start_cell=(0, 0)):
+        """Play a single game, choosing the next move based a prediction from 'model'.
+
+        :param class AbstractModel model: the prediction model to use
+        :param tuple start_cell: agents initial cell (optional, else upper left)
+        :return Status: WIN, LOSE
+        """
+        self.reset(start_cell)
+
+        state = self.__observe()
+
+        while True:
+            action = model.predict(state=state)
+            state, reward, status = self.step(action)
+            if status in (Status.WIN, Status.LOSE):
+                return status
+
+    def check_win_all(self, model):
+        """Check if the model wins from all possible starting cells."""
+        previous = self.__render
+        self.__render = (
+            Render.NOTHING
+        )  # avoid rendering anything during execution of the check games
+
+        win = 0
+        lose = 0
+
+        for cell in self.empty:
+            if self.play(model, cell) == Status.WIN:
+                win += 1
+            else:
+                lose += 1
+
+        self.__render = previous  # restore previous rendering setting
+
+        logging.info(
+            "won: {} | lost: {} | win rate: {:.5f}".format(
+                win, lose, win / (win + lose)
+            )
+        )
+
+        result = True if lose == 0 else False
+
+        return result, win / (win + lose)
+
+    def render_q(self, model):
+        """Render the recommended action(s) for each cell as provided by 'model'.
+
+        :param class AbstractModel model: the prediction model to use
+        """
+
+        def clip(n):
+            return max(min(1, n), 0)
+
+        if self.__render == Render.TRAINING:
+            nrows, ncols = self.maze.shape
+
+            self.__ax2.clear()
+            self.__ax2.set_xticks(np.arange(0.5, nrows, step=1))
+            self.__ax2.set_xticklabels([])
+            self.__ax2.set_yticks(np.arange(0.5, ncols, step=1))
+            self.__ax2.set_yticklabels([])
+            self.__ax2.grid(True)
+            self.__ax2.plot(
+                *self.__exit_cell, "gs", markersize=30
+            )  # exit is a big green square
+            self.__ax2.text(
+                *self.__exit_cell, "Exit", ha="center", va="center", color="white"
+            )
+
+            for cell in self.empty:
+                q = model.q(cell) if model is not None else [0, 0, 0, 0]
+                a = np.nonzero(q == np.max(q))[0]
+
+                for action in a:
+                    dx = 0
+                    dy = 0
+                    if action == Action.MOVE_LEFT:
+                        dx = -0.2
+                    if action == Action.MOVE_RIGHT:
+                        dx = +0.2
+                    if action == Action.MOVE_UP:
+                        dy = -0.2
+                    if action == Action.MOVE_DOWN:
+                        dy = 0.2
+
+                    # color (from red to green) represents the certainty of the preferred action(s)
+                    maxv = 1
+                    minv = -1
+                    color = clip(
+                        (q[action] - minv) / (maxv - minv)
+                    )  # normalize in [-1, 1]
+
+                    self.__ax2.arrow(
+                        *cell,
+                        dx,
+                        dy,
+                        color=(1 - color, color, 0),
+                        head_width=0.2,
+                        head_length=0.1,
+                    )
+
+            self.__ax2.imshow(self.maze, cmap="binary")
+            self.__ax2.get_figure().canvas.draw()
diff --git a/src/envs/maze_env/server/maze_environment.py b/src/envs/maze_env/server/maze_environment.py
new file mode 100644
index 00000000..b9675bcf
--- /dev/null
+++ b/src/envs/maze_env/server/maze_environment.py
@@ -0,0 +1,198 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+Maze Environment Server Implementation.
+
+This module wraps Maze's environment and exposes it
+via the OpenEnv Environment interface.
+"""
+
+from typing import List, Tuple, Optional
+from core.env_server import Environment
+from .maze import Maze
+from ..models import MazeAction, MazeObservation, MazeState
+
+try:
+    import numpy as np
+except ImportError as e:
+    raise ImportError(
+        "Numpy is not installed. "
+        "Please install it following instructions at: "
+        "pip install numpy"
+    ) from e
+
+
+class MazeEnvironment(Environment):
+    """
+    Maze Environment wrapper for OpenEnv.
+
+    This environment wraps Maze game and provides a single-agent interface.
+
+    Args:
+        maze_array: Maze array as numpy array
+        start cell: Start of the maze
+        exit_cell: Exit for the maze
+    """
+
+    def __init__(
+        self,
+        maze_array: np.ndarray,
+        start_cell: Tuple[int, int] = (0, 0),
+        exit_cell: Optional[Tuple[int, int]] = (7, 7),
+    ):
+        # Create underlying Maze instance (matches your working code)
+        self.env = Maze(maze=maze_array, start_cell=start_cell, exit_cell=exit_cell)
+        self.total_reward = 0
+        self.start_cell = start_cell
+        self.exit_cell = exit_cell
+        # env.reset() will be called in reset(); state initialized to None until then
+        self.state: Optional[MazeState] = None
+
+    def reset(self) -> MazeObservation:
+        """Reset environment and return initial observation (MazeObservation)."""
+        observation = (
+            self.env.reset()
+        )  # typically returns np.array([row, col]) or similar
+        # initialize episode state
+        self.state = MazeState(episode_id="episode_1", step_count=0, done=False)
+
+        # build MazeObservation; convert numpy to list for JSON-serializable dataclass fields
+        pos_list = (
+            observation.tolist()
+            if hasattr(observation, "tolist")
+            else list(observation)
+        )
+        self.total_reward = 0
+        legal_actions = self._compute_legal_actions(pos_list[0])
+
+        return MazeObservation(
+            position=pos_list,
+            total_reward=self.total_reward,
+            legal_actions=legal_actions,
+        )
+
+    def step(self, action: MazeAction) -> MazeObservation:
+        """
+        Step function that manipulates the maze position grid
+        and applies rewards/penalties for movement outcomes.
+        """
+
+        # --- Get current position ---
+        if hasattr(self.env, "agent_position"):
+            row, col = self.env.agent_position
+        elif hasattr(self.env, "_Maze__current_cell"):
+            row, col = self.env._Maze__current_cell
+        else:
+            row, col = self.env._Maze__start_cell
+
+        maze = np.array(self.env.maze)
+
+        # --- Define movement directions ---
+        # 0 = UP, 1 = DOWN, 2 = LEFT, 3 = RIGHT
+        move_map = {
+            0: (-1, 0),
+            1: (1, 0),
+            2: (0, -1),
+            3: (0, 1),
+        }
+
+        # --- Reward settings ---
+        reward_exit = 10.0  # reward for reaching the exit cell
+        reward_move = 0.05  # reward for a move that didn't find the exit but is valid
+        penalty_visited = -0.25  # penalty for revisiting a cell
+        penalty_impossible = -0.75  # penalty for invalid move (wall/outside)
+
+        dr, dc = move_map.get(action.action, (0, 0))
+        new_r, new_c = row + dr, col + dc
+
+        # Keep track of visited cells
+        if not hasattr(self, "_visited"):
+            self._visited = set()
+        self._visited.add((row, col))
+
+        # --- Check if move is valid ---
+        valid_move = (
+            0 <= new_r < maze.shape[0]
+            and 0 <= new_c < maze.shape[1]
+            and maze[new_r, new_c] != 1
+        )
+
+        reward = 0.0
+        done = False
+
+        if valid_move:
+            # Update position
+            row, col = new_r, new_c
+
+            if self.exit_cell and (row, col) == self.exit_cell:
+                reward += reward_exit
+                done = True
+                self._visited = set()
+            elif (row, col) in self._visited:
+                reward += penalty_visited
+            else:
+                reward += reward_move
+        else:
+            # Invalid move
+            reward += penalty_impossible
+
+        # --- Update environment position ---
+        if hasattr(self.env, "agent_position"):
+            self.env.agent_position = (row, col)
+        elif hasattr(self.env, "_Maze__current_cell"):
+            self.env._Maze__current_cell = (row, col)
+
+        # --- Total reward update ---
+        self.total_reward += reward
+
+        # --- Update state ---
+        if self.state is None:
+            self.state = MazeState(episode_id="episode_1", step_count=0, done=done)
+        self.state.step_count += 1
+        self.state.done = done
+
+        # --- Observation ---
+        pos_list = [row, col]
+        legal_actions = self._compute_legal_actions(pos_list)
+        # --- Return observation ---
+        return MazeObservation(
+            position=pos_list,
+            total_reward=self.total_reward,
+            legal_actions=legal_actions,
+            done=done,
+        )
+
+    def state(self) -> Optional[MazeState]:
+        """Return the current MazeState object."""
+        return self.state
+
+    def _compute_legal_actions(self, pos: List[int]) -> List[int]:
+        """
+        Compute which actions are legal given the current normalized position [row, col].
+        (0=UP, 1=DOWN, 2=LEFT, 3=RIGHT)
+        """
+        actions: List[int] = []
+        if not pos or len(pos) < 2:
+            return actions
+
+        row, col = int(pos[0]), int(pos[1])
+        nrows, ncols = self.env.maze.shape
+
+        # UP
+        if row > 0 and self.env.maze[row - 1, col] == 0:
+            actions.append(0)
+        # DOWN
+        if row < nrows - 1 and self.env.maze[row + 1, col] == 0:
+            actions.append(1)
+        # LEFT
+        if col > 0 and self.env.maze[row, col - 1] == 0:
+            actions.append(2)
+        # RIGHT
+        if col < ncols - 1 and self.env.maze[row, col + 1] == 0:
+            actions.append(3)
+
+        return actions
diff --git a/src/envs/maze_env/server/mazearray.py b/src/envs/maze_env/server/mazearray.py
new file mode 100644
index 00000000..3cd7dbd6
--- /dev/null
+++ b/src/envs/maze_env/server/mazearray.py
@@ -0,0 +1,15 @@
+import numpy as np
+
+# Maze
+maze = np.array(
+    [
+        [0, 1, 0, 0, 0, 0, 0, 0],
+        [0, 1, 0, 1, 0, 1, 0, 0],
+        [0, 0, 0, 1, 1, 0, 1, 0],
+        [0, 1, 0, 1, 0, 0, 0, 0],
+        [1, 0, 0, 1, 0, 1, 0, 0],
+        [0, 0, 0, 1, 0, 1, 1, 1],
+        [0, 1, 1, 0, 0, 0, 0, 0],
+        [0, 0, 0, 0, 0, 1, 0, 0],
+    ]
+)