env.py

```python
import re
import random
from typing import Any, Dict, List, Optional, Tuple

import textarena as ta


class LabyrinthCommandEnv(ta.Env):
    """
    Deterministic, turn-based two-player tactical maze environment: "Labyrinth Command"
    Two players (Explorer A and B) move through a deterministic maze to reach the Central Beacon.
    """

    def __init__(self, max_turns: int = 40, maze_width: int = 7, maze_height: int = 7):
        self.max_turns = max_turns
        self.maze_width = maze_width
        self.maze_height = maze_height
        self.move_pattern = re.compile(r"^\[Move:(North|South|East|West)\]$")
        self.scan_pattern = re.compile(r"^\[Scan\]$")
        self.wait_pattern = re.compile(r"^\[Wait\]$")

    # -------------------------------------------------------------------------
    # ========== Helper: Extract boxed command ==========
    def _extract_answer_content(self, action: str) -> str:
        """Extract content within \\boxed{{...}}."""
        match = re.search(r"\\boxed\{\{(.*?)\}\}", action, re.DOTALL)
        if match:
            return match.group(1).strip()
        match = re.search(r"\\boxed\{(.*?)\}", action, re.DOTALL)
        if match:
            return match.group(1).strip()
        return action.strip()

    # -------------------------------------------------------------------------
    # ========== Maze and visibility helpers ==========
    def _generate_deterministic_maze(self, seed: int) -> List[List[str]]:
        """Generate deterministic maze using random seeded layout of blocked cells."""
        random.seed(seed)
        maze = [["." for _ in range(self.maze_width)] for _ in range(self.maze_height)]
        num_blocks = (self.maze_width * self.maze_height) // 10  # about 10% blocked
        for _ in range(num_blocks):
            x = random.randint(0, self.maze_width - 1)
            y = random.randint(0, self.maze_height - 1)
            if (x, y) != (0, 0) and (x, y) != (self.maze_width - 1, self.maze_height - 1):
                maze[y][x] = "X"
        return maze

    def _compute_visible_map(self, maze: List[List[str]], pos: Tuple[int, int]) -> List[List[str]]:
        """Compute a 3x3 visible map centered on pos."""
        visible = []
        for dy in range(-1, 2):
            row = []
            for dx in range(-1, 2):
                nx, ny = pos[0] + dx, pos[1] + dy
                if 0 <= nx < self.maze_width and 0 <= ny < self.maze_height:
                    row.append(maze[ny][nx])
                else:
                    row.append("?")
            visible.append(row)
        return visible

    def _distance(self, a: Tuple[int, int], b: Tuple[int, int]) -> int:
        return abs(a[0] - b[0]) + abs(a[1] - b[1])

    # -------------------------------------------------------------------------
    # ========== Reset ==========
    def reset(self, num_players: int, seed: Optional[int] = None):
        """
        Resets the environment to an initial state.

        Args:
            num_players: must be 2.
            seed: optional deterministic seed.
        """
        if num_players != 2:
            raise ValueError("Labyrinth Command requires exactly 2 players.")

        seed = seed if seed is not None else random.randint(1, 999999)
        self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_turns)
        maze = self._generate_deterministic_maze(seed)
        beacon_pos = (self.maze_width // 2, self.maze_height // 2)
        maze[beacon_pos[1]][beacon_pos[0]] = "B"

        start_A = (0, 0)
        start_B = (self.maze_width - 1, self.maze_height - 1)

        player_states = {
            "A": {
                "position": start_A,
                "visible_map": self._compute_visible_map(maze, start_A),
                "visited_cells": [list(start_A)],
                "last_action": None,
            },
            "B": {
                "position": start_B,
                "visible_map": self._compute_visible_map(maze, start_B),
                "visited_cells": [list(start_B)],
                "last_action": None,
            },
        }

        cells_blocked = [[x, y] for y in range(self.maze_height) for x in range(self.maze_width) if maze[y][x] == "X"]

        game_state = {
            "seed": seed,
            "turn_index": 0,
            "max_turns": self.max_turns,
            "maze_width": self.maze_width,
            "maze_height": self.maze_height,
            "beacon_position": list(beacon_pos),
            "cells_blocked": cells_blocked,
            "player_states": player_states,
            "transcript": [],
            "winner": None,
            "terminated": False,
        }

        self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt)
        self.state.add_observation(message="Welcome to Labyrinth Command!", observation_type=ta.ObservationType.GAME_MESSAGE)
        self.state.add_observation(message=f"Seed: {seed} ensures deterministic maze generation.", observation_type=ta.ObservationType.GAME_MESSAGE)
        return self.state

    # -------------------------------------------------------------------------
    # ========== Step ==========
    def step(self, action: str) -> Tuple[bool, ta.Info]:
        """
        Perform a single environment step for the current player.
        """
        # log the player action
        self.state.add_observation(action, ta.ObservationType.PLAYER_ACTION, from_id=self.state.current_player_id, to_id=-1)
        player_id = self.state.current_player_id
        player_label = "A" if player_id == 0 else "B"
        opponent_label = "B" if player_label == "A" else "A"

        if self.state.done:
            self.state.set_invalid_move("Game already finished.")
            return self.state.step()

        answer = self._extract_answer_content(action)
        gs = self.state.game_state
        player_state = gs["player_states"][player_label]
        opponent_state = gs["player_states"][opponent_label]
        current_pos = tuple(player_state["position"])
        beacon = tuple(gs["beacon_position"])

        # Validate action syntax
        if not (self.move_pattern.match(answer) or self.scan_pattern.match(answer) or self.wait_pattern.match(answer)):
            self.state.set_invalid_move(reason="Invalid token format.")
            return self.state.step()

        new_pos = current_pos
        maze_width, maze_height = gs["maze_width"], gs["maze_height"]
        blocked = set(tuple(cell) for cell in gs["cells_blocked"])

        # execute move if movement
        if answer.startswith("[Move:"):
            direction = answer[len("[Move:"):-1]
            dx, dy = 0, 0
            if direction == "North":
                dy = -1
            elif direction == "South":
                dy = 1
            elif direction == "West":
                dx = -1
            elif direction == "East":
                dx = 1
            nx, ny = current_pos[0] + dx, current_pos[1] + dy
            if not (0 <= nx < maze_width and 0 <= ny < maze_height):
                self.state.set_invalid_move("Move out of bounds")
                return self.state.step()
            if (nx, ny) in blocked:
                self.state.set_invalid_move("Cell blocked")
                return self.state.step()
            new_pos = (nx, ny)
            player_state["position"] = list(new_pos)
            player_state["visited_cells"].append(list(new_pos))
            player_state["visible_map"] = self._compute_visible_map(
                [["X" if [x, y] in gs["cells_blocked"] else "." for x in range(maze_width)] for y in range(maze_height)],
                new_pos,
            )
        elif answer == "[Scan]":
            player_state["visible_map"] = self._compute_visible_map(
                [["X" if [x, y] in gs["cells_blocked"] else "." for x in range(maze_width)] for y in range(maze_height)],
                current_pos,
            )
        elif answer == "[Wait]":
            pass  # do nothing

        player_state["last_action"] = answer
        gs["transcript"].append({"player": player_label, "action": answer})
        gs["turn_index"] += 1

        # ===== Check terminal conditions =====
        reached_A = tuple(gs["player_states"]["A"]["position"]) == beacon
        reached_B = tuple(gs["player_states"]["B"]["position"]) == beacon

        if reached_A and reached_B:
            self.state.set_draw(reason="Both players reached the Beacon simultaneously.")
            gs["winner"] = "Draw"
            gs["terminated"] = True
            return self.state.step()
        elif reached_A:
            self.state.set_winner(player_id=0, reason="Explorer A reached the Beacon first.")
            gs["winner"] = "A"
            gs["terminated"] = True
            return self.state.step()
        elif reached_B:
            self.state.set_winner(player_id=1, reason="Explorer B reached the Beacon first.")
            gs["winner"] = "B"
            gs["terminated"] = True
            return self.state.step()

        # Check turn limit
        if self.state.check_turn_limit():
            posA = tuple(gs["player_states"]["A"]["position"])
            posB = tuple(gs["player_states"]["B"]["position"])
            distA = self._distance(posA, beacon)
            distB = self._distance(posB, beacon)
            if distA < distB:
                self.state.set_winner(player_id=0, reason="Explorer A is closer to Beacon at turn limit.")
                gs["winner"] = "A"
            elif distB < distA:
                self.state.set_winner(player_id=1, reason="Explorer B is closer to Beacon at turn limit.")
                gs["winner"] = "B"
            else:
                self.state.set_draw(reason="Both explorers equally distant at turn limit.")
                gs["winner"] = "Draw"
            gs["terminated"] = True

        return self.state.step()

    # -------------------------------------------------------------------------
    # ========== Prompt ==========
    def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str:
        """Generate player prompt based on Stage 1 design."""
        player_label = "A" if player_id == 0 else "B"
        state = game_state["player_states"][player_label]
        pos = state["position"]
        visible_map = "\n".join([" ".join(row) for row in state["visible_map"]])
        turn_index = game_state["turn_index"]
        max_turns = game_state["max_turns"]
        opponent_label = "B" if player_label == "A" else "A"
        last_opp_action = (
            game_state["player_states"][opponent_label]["last_action"] or "None yet"
        )

        prompt = f"""
You are Explorer {player_label} navigating the labyrinth. Your goal is to reach the Central Beacon before your rival.
Each turn you may issue one command from this action grammar:

[Move:North] | [Move:South] | [Move:East] | [Move:West] | [Scan] | [Wait]

Remember:
- Maze bounds are 0 ≤ x < {game_state['maze_width']}, 0 ≤ y < {game_state['maze_height']}.
- Moving into blocked walls ('X') or out of bounds is invalid.
- The beacon lies at the labyrinth’s center at {game_state['beacon_position']}.
- You must wrap your command inside \\boxed{{}}.

Current turn: {turn_index}/{max_turns}
Your current position: {pos}
Your visible 3×3 map:
{visible_map}

Your opponent’s last known action: {last_opp_action}

Example valid response:
I want to go north toward the Beacon.
\\boxed{{[Move:North]}}

Example invalid response:
Let's go northeast! ← invalid direction keyword

Now choose your next command carefully.
Put your final answer within \\boxed{{}} at the end of your response.
""".strip()
        return prompt
```