env.py

import re
import random
from typing import Any, Dict, List, Optional, Tuple
import textarena as ta


class MazeConquerorsEnv(ta.Env):
    """Turn-based two-player deterministic environment for Maze Conquerors."""

    def __init__(self, maze_size: int = 7, turn_limit: int = 30):
        self.maze_size = maze_size
        self.turn_limit = turn_limit
        self.state: Optional[ta.TwoPlayerState] = None
        # Precompile regexes for action grammar
        self.move_pattern = re.compile(r'^\[Move:(up|down|left|right)\]$')
        self.scan_pattern = re.compile(r'^\[Scan:[1-3]\]$')
        self.claim_pattern = re.compile(r'^\[Claim\]$')
        self.wait_pattern = re.compile(r'^\[Wait\]$')

    # ------------------------------------------------------------------ #
    # Helper: extract boxed content
    # ------------------------------------------------------------------ #
    def _extract_answer_content(self, action: str) -> str:
        match = re.search(r'\\boxed\{\{([^}]*)\}\}', action)
        if not match:
            match = re.search(r'\\boxed\{([^}]*)\}', action)
        return match.group(1).strip() if match else action.strip()

    # ------------------------------------------------------------------ #
    # Maze generation
    # ------------------------------------------------------------------ #
    def _generate_maze(self, seed: int) -> List[List[str]]:
        random.seed(seed)
        size = self.maze_size
        grid = [["." for _ in range(size)] for _ in range(size)]
        # place walls and runes
        for i in range(size):
            for j in range(size):
                roll = random.random()
                if roll < 0.15:
                    grid[i][j] = "#"
                elif roll < 0.25:
                    grid[i][j] = "R"
        # mark start and goal positions
        grid[0][0] = "S"
        grid[size - 1][size - 1] = "G"
        return grid

    def _initial_visible_tiles(self, pos: Tuple[int, int]) -> List[List[int]]:
        visible = []
        for dx in (-1, 0, 1):
            for dy in (-1, 0, 1):
                x, y = pos[0] + dx, pos[1] + dy
                if 0 <= x < self.maze_size and 0 <= y < self.maze_size:
                    visible.append([x, y])
        return visible

    # ------------------------------------------------------------------ #
    # Reset
    # ------------------------------------------------------------------ #
    def reset(self, num_players: int, seed: Optional[int] = None):
        """
        Resets the environment to an initial state.
        """
        if num_players != 2:
            raise ValueError("Maze Conquerors requires exactly two players.")

        self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.turn_limit)
        if seed is None:
            seed = random.randint(0, 9999999)
        maze_layout = self._generate_maze(seed)

        game_state = {
            "global_turn": 0,
            "turn_limit": self.turn_limit,
            "maze_dimensions": [self.maze_size, self.maze_size],
            "seed": seed,
            "maze_layout": maze_layout,
            "players": {
                "ExplorerA": {
                    "position": [0, 0],
                    "runes_collected": 0,
                    "moves_remaining": 5,
                    "visible_tiles": self._initial_visible_tiles((0, 0)),
                    "last_action": None,
                    "is_trapped": False,
                },
                "ExplorerB": {
                    "position": [self.maze_size - 1, self.maze_size - 1],
                    "runes_collected": 0,
                    "moves_remaining": 5,
                    "visible_tiles": self._initial_visible_tiles(
                        (self.maze_size - 1, self.maze_size - 1)
                    ),
                    "last_action": None,
                    "is_trapped": False,
                },
            },
            "observation_log": [],
            "game_status": "active",
            "winner": None,
        }

        roles = {0: "ExplorerA", 1: "ExplorerB"}
        self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt, role_mapping=roles)
        self.state.add_observation("Maze Conquerors initialized!", ta.ObservationType.GAME_MESSAGE)
        return self.state

    # ------------------------------------------------------------------ #
    # Step
    # ------------------------------------------------------------------ #
    def step(self, action: str) -> Tuple[bool, ta.Info]:
        """
        Perform a single environment step for the current player.
        """
        player_id = self.state.current_player_id
        role = "ExplorerA" if player_id == 0 else "ExplorerB"
        gs = self.state.game_state
        player = gs["players"][role]
        maze = gs["maze_layout"]

        # Record raw action
        self.state.add_observation(action, ta.ObservationType.PLAYER_ACTION, from_id=player_id)
        content = self._extract_answer_content(action)
        player["last_action"] = content

        # Validate action syntax
        if not (
            self.move_pattern.match(content)
            or self.scan_pattern.match(content)
            or self.claim_pattern.match(content)
            or self.wait_pattern.match(content)
        ):
            self.state.set_invalid_move(reason="Invalid format: action not recognized.")
            return self.state.step()

        action_result = None
        if content.startswith("[Move:"):
            direction = content[6:-1]
            dx, dy = {"up": (-1, 0), "down": (1, 0), "left": (0, -1), "right": (0, 1)}[direction]
            newx, newy = player["position"][0] + dx, player["position"][1] + dy
            if not (0 <= newx < self.maze_size and 0 <= newy < self.maze_size):
                self.state.set_invalid_move("Invalid move: outside maze bounds.")
            elif maze[newx][newy] == "#":
                self.state.set_invalid_move("Invalid move: path blocked.")
            else:
                player["position"] = [newx, newy]
                player["visible_tiles"] = self._initial_visible_tiles((newx, newy))
                action_result = "moved successfully"
        elif content.startswith("[Scan:"):
            radius = int(content[6:-1])
            new_visible = []
            px, py = player["position"]
            for i in range(px - radius, px + radius + 1):
                for j in range(py - radius, py + radius + 1):
                    if 0 <= i < self.maze_size and 0 <= j < self.maze_size:
                        new_visible.append([i, j])
            player["visible_tiles"] = list({tuple(v) for v in player["visible_tiles"] + new_visible})
            action_result = "revealed tiles"
        elif content == "[Claim]":
            px, py = player["position"]
            if maze[px][py] == "R":
                player["runes_collected"] += 1
                maze[px][py] = "."
                action_result = "claimed rune"
            else:
                self.state.set_invalid_move("Invalid claim: no rune present.")
        elif content == "[Wait]":
            action_result = "waited"

        gs["observation_log"].append(
            {"turn": gs["global_turn"], "player": role, "action": content, "result": action_result or "invalid"}
        )

        # Advance global turn
        gs["global_turn"] += 1

        # Terminal condition check
        done = self._check_terminal_conditions()
        if done:
            return True, {}

        return self.state.step()

    # ------------------------------------------------------------------ #
    # Terminal conditions
    # ------------------------------------------------------------------ #
    def _check_terminal_conditions(self) -> bool:
        gs = self.state.game_state
        turn = gs["global_turn"]
        if turn >= gs["turn_limit"]:
            self._determine_winner(reason="Turn limit reached.")
            return True
        # Check if all runes collected
        if not any("R" in row for row in gs["maze_layout"]):
            self._determine_winner(reason="All runes collected.")
            return True
        return False

    # ------------------------------------------------------------------ #
    # Winner determination
    # ------------------------------------------------------------------ #
    def _determine_winner(self, reason: str):
        gs = self.state.game_state
        a, b = gs["players"]["ExplorerA"], gs["players"]["ExplorerB"]
        if a["runes_collected"] > b["runes_collected"]:
            self.state.set_winner(player_id=0, reason=reason)
        elif a["runes_collected"] < b["runes_collected"]:
            self.state.set_winner(player_id=1, reason=reason)
        else:
            core = (self.maze_size // 2, self.maze_size // 2)
            dist_a = abs(a["position"][0] - core[0]) + abs(a["position"][1] - core[1])
            dist_b = abs(b["position"][0] - core[0]) + abs(b["position"][1] - core[1])
            if dist_a < dist_b:
                self.state.set_winner(player_id=0, reason=reason)
            elif dist_b < dist_a:
                self.state.set_winner(player_id=1, reason=reason)
            else:
                self.state.set_draw(reason=reason)

    # ------------------------------------------------------------------ #
    # Prompt
    # ------------------------------------------------------------------ #
    def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str:
        role = "ExplorerA" if player_id == 0 else "ExplorerB"
        player = game_state["players"][role]
        status = (
            f"You are {role}, traversing an ancient shifting labyrinth to gather mystical runes.\n"
            f"Turn {game_state['global_turn']} of {game_state['turn_limit']}.\n"
            f"You have collected {player['runes_collected']} runes.\n"
        )
        surroundings = "Your visible tiles: " + str(player["visible_tiles"]) + "\n"
        grammar = (
            "Allowed actions:\n"
            "[Move:up], [Move:down], [Move:left], [Move:right]\n"
            "[Scan:1–3], [Claim], [Wait]\n"
            "Put your final answer within \\boxed{{}} at the end of your response.\n"
            "Example valid response:\n"
            "I will explore the passage ahead.\n"
            "\\boxed{{[Move:right]}}\n"
        )
        return status + surroundings + grammar

    # ------------------------------------------------------------------ #
    # Boilerplate
    # ------------------------------------------------------------------ #
    def get_observation(self) -> Tuple[int, List]:
        return self.state.current_player_id, []
    def close(self) -> Tuple[Dict, Dict]:
        return self.state.rewards, self.state.game_info