testtest8/env.py

```python
import re
import random
from typing import Any, Dict, List, Optional, Tuple

import textarena as ta


class EchoMazeEnv(ta.Env):
    """
    EchoMaze: The Labyrinth Duel
    Deterministic, two-player, turn-based maze exploration game.
    """

    def __init__(self, max_turns: int = 60, maze_size: int = 9):
        """
        Initialize environment config (not game-state).
        """
        self.max_turns = max_turns
        self.maze_size = maze_size
        self.valid_actions = [
            "[Scan]",
            "[Mark]",
            "[Rest]",
            "[Move: North]",
            "[Move: South]",
            "[Move: East]",
            "[Move: West]",
        ]

    # ----------------------------------------------------------------------
    # Reset
    # ----------------------------------------------------------------------
    def reset(self, num_players: int, seed: Optional[int] = None):
        """
        Resets the environment to an initial state.

        Args:
            num_players: Must be 2
            seed: random seed for determinism
        """
        if num_players != 2:
            raise ValueError("EchoMaze requires exactly 2 players (Sun and Moon).")

        self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_turns)
        random.seed(seed)

        # Generate base maze using seed for deterministic layout
        maze_layout, exit_location, sun_start, moon_start = self._generate_maze(seed)

        # Build game_state following Stage 1 schema
        game_state: Dict[str, Any] = {
            "maze_seed": seed,
            "turn_count": 0,
            "max_turns": self.max_turns,
            "maze_layout": maze_layout,
            "exit_location": exit_location,
            "players": {
                "Sun": {
                    "position": sun_start,
                    "markers": [],
                    "focus": 5,
                    "observations": [
                        f"Turn 1: Started at {tuple(sun_start)}."
                    ],
                    "last_action": None,
                },
                "Moon": {
                    "position": moon_start,
                    "markers": [],
                    "focus": 5,
                    "observations": [
                        f"Turn 1: Started at {tuple(moon_start)}."
                    ],
                    "last_action": None,
                },
            },
            "public_transcript": [],
            "winner": None,
            "is_terminal": False,
            "invalid_move_reason": None,
        }

        # Reset game state
        self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt,
                         role_mapping={0: "Sun", 1: "Moon"})

        # Announce
        self.state.add_observation("Welcome to EchoMaze: The Labyrinth Duel!", ta.ObservationType.GAME_MESSAGE)
        self.state.add_observation(f"Exit Glyph hidden at {tuple(exit_location)} (secretly known to system).",
                                   ta.ObservationType.GAME_MESSAGE)
        return self.state

    # ----------------------------------------------------------------------
    # Step
    # ----------------------------------------------------------------------
    def step(self, action: str) -> Tuple[bool, ta.Info]:
        """
        Perform a single environment step for the current player.
        """
        player_id = self.state.current_player_id
        player_name = "Sun" if player_id == 0 else "Moon"

        self.state.add_observation(
            action,
            ta.ObservationType.PLAYER_ACTION,
            from_id=player_id,
            to_id=-1,
        )

        extracted_action = self._extract_answer_content(action)
        current_state = self.state.game_state
        player_data = current_state["players"][player_name]

        # If game already terminal
        if current_state["winner"] or current_state["is_terminal"]:
            return self.state.step()

        # --- Validation ---
        if extracted_action not in self.valid_actions:
            self.state.set_invalid_move("Unrecognized action syntax.")
            current_state["invalid_move_reason"] = "Unrecognized action syntax."
            current_state["is_terminal"] = True
            return self.state.step()

        if player_data["focus"] <= 0 and extracted_action != "[Rest]":
            self.state.set_invalid_move("Insufficient focus to perform action.")
            current_state["invalid_move_reason"] = "Insufficient focus to perform action."
            current_state["is_terminal"] = True
            return self.state.step()

        # Execute effect
        result_message = ""
        if extracted_action.startswith("[Move:"):
            direction = extracted_action.split(":")[1].strip(" ]")
            result_message = self._process_move(player_name, direction, current_state)
        elif extracted_action == "[Scan]":
            result_message = self._process_scan(player_name, current_state)
            player_data["focus"] -= 1
        elif extracted_action == "[Mark]":
            result_message = self._process_mark(player_name, current_state)
            player_data["focus"] -= 1
        elif extracted_action == "[Rest]":
            result_message = self._process_rest(player_name, current_state)

        player_data["last_action"] = extracted_action
        current_state["public_transcript"].append(f"{player_name}: {extracted_action}")
        current_state["turn_count"] += 1

        # --- Check Terminal Conditions after action ---
        exit_loc = current_state["exit_location"]
        sun_pos = current_state["players"]["Sun"]["position"]
        moon_pos = current_state["players"]["Moon"]["position"]

        if sun_pos == exit_loc and moon_pos == exit_loc:
            self.state.set_draw("Both players reached the Exit Glyph simultaneously.")
            current_state["winner"] = "Draw"
            current_state["is_terminal"] = True
        elif sun_pos == exit_loc:
            self.state.set_winner(0, "Sun reached the Exit Glyph.")
            current_state["winner"] = "Sun"
            current_state["is_terminal"] = True
        elif moon_pos == exit_loc:
            self.state.set_winner(1, "Moon reached the Exit Glyph.")
            current_state["winner"] = "Moon"
            current_state["is_terminal"] = True
        elif current_state["turn_count"] >= self.max_turns:
            sun_dist = self._manhattan_distance(sun_pos, exit_loc)
            moon_dist = self._manhattan_distance(moon_pos, exit_loc)
            if sun_dist < moon_dist:
                self.state.set_winner(0, "Sun is closer to the Exit Glyph after max turns.")
                current_state["winner"] = "Sun"
            elif moon_dist < sun_dist:
                self.state.set_winner(1, "Moon is closer to the Exit Glyph after max turns.")
                current_state["winner"] = "Moon"
            else:
                self.state.set_draw("Equal distance to Exit Glyph after max turns.")
                current_state["winner"] = "Draw"
            current_state["is_terminal"] = True

        # Log observation message
        self.state.add_observation(result_message, ta.ObservationType.GAME_MESSAGE)
        return self.state.step()

    # ----------------------------------------------------------------------
    # Helpers
    # ----------------------------------------------------------------------
    def _generate_maze(self, seed: int):
        """
        Produces deterministic maze layout with walls (#), open cells (.), Exit (E).
        Ensures reproducibility.
        """
        size = self.maze_size
        random.seed(seed)
        maze = [["#" for _ in range(size)] for _ in range(size)]

        # Create random open cells
        for i in range(1, size - 1):
            for j in range(1, size - 1):
                maze[i][j] = "." if random.random() > 0.25 else "#"

        # Place exit
        exit_x, exit_y = random.randint(1, size - 2), random.randint(1, size - 2)
        maze[exit_x][exit_y] = "E"

        # Find top-left open for Sun
        sun_start = self._find_open_cell(maze, from_top=True)
        moon_start = self._find_open_cell(maze, from_top=False)
        maze[sun_start[0]][sun_start[1]] = "S"  # Mark starting
        maze[moon_start[0]][moon_start[1]] = "M"

        return maze, [exit_x, exit_y], sun_start, moon_start

    def _find_open_cell(self, maze: List[List[str]], from_top: bool = True) -> List[int]:
        size = len(maze)
        row_range = range(size) if from_top else range(size - 1, -1, -1)
        for i in row_range:
            for j in row_range:
                if maze[i][j] == ".":
                    return [i, j]
        # Fallback if none open
        return [1, 1] if from_top else [size - 2, size - 2]

    def _extract_answer_content(self, action: str) -> str:
        """Extract content from \\boxed{}"""
        match = re.search(r"\\boxed\{([^}]*)\}", action, re.DOTALL)
        if match:
            return match.group(1).strip()
        return action.strip()

    def _manhattan_distance(self, a: List[int], b: List[int]) -> int:
        return abs(a[0] - b[0]) + abs(a[1] - b[1])

    def _process_move(self, player: str, direction: str, game_state: Dict[str, Any]) -> str:
        pos = game_state["players"][player]["position"]
        x, y = pos
        dx, dy = 0, 0
        if direction == "North":
            dx = -1
        elif direction == "South":
            dx = 1
        elif direction == "East":
            dy = 1
        elif direction == "West":
            dy = -1

        new_x, new_y = x + dx, y + dy
        maze = game_state["maze_layout"]
        if not (0 <= new_x < len(maze) and 0 <= new_y < len(maze[0])):
            self.state.set_invalid_move("Cannot move outside bounds.")
            game_state["invalid_move_reason"] = "Cannot move outside bounds."
            game_state["is_terminal"] = True
            return f"{player} attempted to move outside bounds."
        if maze[new_x][new_y] == "#":
            self.state.set_invalid_move("Cannot move through wall or outside bounds.")
            game_state["invalid_move_reason"] = "Cannot move through wall."
            game_state["is_terminal"] = True
            return f"{player} tried to move into a wall."

        game_state["players"][player]["position"] = [new_x, new_y]
        game_state["players"][player]["focus"] -= 1
        return f"{player} moved {direction} to {(new_x, new_y)}."

    def _process_scan(self, player: str, game_state: Dict[str, Any]) -> str:
        pos = game_state["players"][player]["position"]
        maze = game_state["maze_layout"]
        dirs = {
            "North": (pos[0] - 1, pos[1]),
            "South": (pos[0] + 1, pos[1]),
            "East": (pos[0], pos[1] + 1),
            "West": (pos[0], pos[1] - 1),
        }
        result = {}
        for dir_name, (x, y) in dirs.items():
            if 0 <= x < len(maze) and 0 <= y < len(maze[0]):
                result[dir_name] = "Wall" if maze[x][y] == "#" else "Open"
            else:
                result[dir_name] = "Out of bounds"
        obs_msg = ", ".join(f"{k}: {v}" for k, v in result.items())
        return f"{player} scanned surroundings. {obs_msg}"

    def _process_mark(self, player: str, game_state: Dict[str, Any]) -> str:
        pos = game_state["players"][player]["position"]
        markers = game_state["players"][player]["markers"]
        if pos not in markers:
            markers.append(pos.copy())
        return f"{player} marked the cell at {tuple(pos)}."

    def _process_rest(self, player: str, game_state: Dict[str, Any]) -> str:
        game_state["players"][player]["focus"] += 1
        return f"{player} rested and recovered 1 Focus (now {game_state['players'][player]['focus']})."

    # ----------------------------------------------------------------------
    # Prompt
    # ----------------------------------------------------------------------
    def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str:
        """
        Generates player prompt at start of game.
        """
        player_name = "Sun" if player_id == 0 else "Moon"
        pos = tuple(game_state["players"][player_name]["position"])
        focus = game_state["players"][player_name]["focus"]
        intro = (
            f"You are **Player {player_name}**, an explorer within the mystic underground labyrinth of EchoMaze.\n"
            f"Your current position is {pos} with Focus = {focus}.\n"
            "Your objective is to reach the Exit Glyph before your rival.\n"
            "Actions must be exactly one of:\n"
            "  - [Move: North], [Move: South], [Move: East], [Move: West]\n"
            "  - [Scan]  — Reveal walls around you.\n"
            "  - [Mark]  — Leave a marker in this cell.\n"
            "  - [Rest]  — Skip turn, regain 1 Focus.\n\n"
            "Only one action per turn. Place it inside \\boxed{} like so:\n"
            "Example valid response:\n"
            "I decide to move north.\n"
            "\\boxed{[Move: North]}\n\n"
            "Example invalid response:\n"
            "I will move upward.\n"
            "\\boxed{[Move: Up]}    <-- invalid action\n"
        )
        return intro

    # ----------------------------------------------------------------------
    # Framework helpers
    # ----------------------------------------------------------------------
    def get_observation(self) -> Tuple[int, List]:
        """Return observation for current player"""
        return self.state.current_player_id, self.state.game_state

    def close(self) -> Tuple[Dict, Dict]:
        """Return final info"""
        return self.state.rewards, self.state.game_state
```