```python import re import random from typing import Any, Dict, List, Optional, Tuple import textarena as ta class EchoMazeEnv(ta.Env): """ EchoMaze: The Labyrinth Duel Deterministic, two-player, turn-based maze exploration game. """ def __init__(self, max_turns: int = 60, maze_size: int = 9): """ Initialize environment config (not game-state). """ self.max_turns = max_turns self.maze_size = maze_size self.valid_actions = [ "[Scan]", "[Mark]", "[Rest]", "[Move: North]", "[Move: South]", "[Move: East]", "[Move: West]", ] # ---------------------------------------------------------------------- # Reset # ---------------------------------------------------------------------- def reset(self, num_players: int, seed: Optional[int] = None): """ Resets the environment to an initial state. Args: num_players: Must be 2 seed: random seed for determinism """ if num_players != 2: raise ValueError("EchoMaze requires exactly 2 players (Sun and Moon).") self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_turns) random.seed(seed) # Generate base maze using seed for deterministic layout maze_layout, exit_location, sun_start, moon_start = self._generate_maze(seed) # Build game_state following Stage 1 schema game_state: Dict[str, Any] = { "maze_seed": seed, "turn_count": 0, "max_turns": self.max_turns, "maze_layout": maze_layout, "exit_location": exit_location, "players": { "Sun": { "position": sun_start, "markers": [], "focus": 5, "observations": [ f"Turn 1: Started at {tuple(sun_start)}." ], "last_action": None, }, "Moon": { "position": moon_start, "markers": [], "focus": 5, "observations": [ f"Turn 1: Started at {tuple(moon_start)}." ], "last_action": None, }, }, "public_transcript": [], "winner": None, "is_terminal": False, "invalid_move_reason": None, } # Reset game state self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt, role_mapping={0: "Sun", 1: "Moon"}) # Announce self.state.add_observation("Welcome to EchoMaze: The Labyrinth Duel!", ta.ObservationType.GAME_MESSAGE) self.state.add_observation(f"Exit Glyph hidden at {tuple(exit_location)} (secretly known to system).", ta.ObservationType.GAME_MESSAGE) return self.state # ---------------------------------------------------------------------- # Step # ---------------------------------------------------------------------- def step(self, action: str) -> Tuple[bool, ta.Info]: """ Perform a single environment step for the current player. """ player_id = self.state.current_player_id player_name = "Sun" if player_id == 0 else "Moon" self.state.add_observation( action, ta.ObservationType.PLAYER_ACTION, from_id=player_id, to_id=-1, ) extracted_action = self._extract_answer_content(action) current_state = self.state.game_state player_data = current_state["players"][player_name] # If game already terminal if current_state["winner"] or current_state["is_terminal"]: return self.state.step() # --- Validation --- if extracted_action not in self.valid_actions: self.state.set_invalid_move("Unrecognized action syntax.") current_state["invalid_move_reason"] = "Unrecognized action syntax." current_state["is_terminal"] = True return self.state.step() if player_data["focus"] <= 0 and extracted_action != "[Rest]": self.state.set_invalid_move("Insufficient focus to perform action.") current_state["invalid_move_reason"] = "Insufficient focus to perform action." current_state["is_terminal"] = True return self.state.step() # Execute effect result_message = "" if extracted_action.startswith("[Move:"): direction = extracted_action.split(":")[1].strip(" ]") result_message = self._process_move(player_name, direction, current_state) elif extracted_action == "[Scan]": result_message = self._process_scan(player_name, current_state) player_data["focus"] -= 1 elif extracted_action == "[Mark]": result_message = self._process_mark(player_name, current_state) player_data["focus"] -= 1 elif extracted_action == "[Rest]": result_message = self._process_rest(player_name, current_state) player_data["last_action"] = extracted_action current_state["public_transcript"].append(f"{player_name}: {extracted_action}") current_state["turn_count"] += 1 # --- Check Terminal Conditions after action --- exit_loc = current_state["exit_location"] sun_pos = current_state["players"]["Sun"]["position"] moon_pos = current_state["players"]["Moon"]["position"] if sun_pos == exit_loc and moon_pos == exit_loc: self.state.set_draw("Both players reached the Exit Glyph simultaneously.") current_state["winner"] = "Draw" current_state["is_terminal"] = True elif sun_pos == exit_loc: self.state.set_winner(0, "Sun reached the Exit Glyph.") current_state["winner"] = "Sun" current_state["is_terminal"] = True elif moon_pos == exit_loc: self.state.set_winner(1, "Moon reached the Exit Glyph.") current_state["winner"] = "Moon" current_state["is_terminal"] = True elif current_state["turn_count"] >= self.max_turns: sun_dist = self._manhattan_distance(sun_pos, exit_loc) moon_dist = self._manhattan_distance(moon_pos, exit_loc) if sun_dist < moon_dist: self.state.set_winner(0, "Sun is closer to the Exit Glyph after max turns.") current_state["winner"] = "Sun" elif moon_dist < sun_dist: self.state.set_winner(1, "Moon is closer to the Exit Glyph after max turns.") current_state["winner"] = "Moon" else: self.state.set_draw("Equal distance to Exit Glyph after max turns.") current_state["winner"] = "Draw" current_state["is_terminal"] = True # Log observation message self.state.add_observation(result_message, ta.ObservationType.GAME_MESSAGE) return self.state.step() # ---------------------------------------------------------------------- # Helpers # ---------------------------------------------------------------------- def _generate_maze(self, seed: int): """ Produces deterministic maze layout with walls (#), open cells (.), Exit (E). Ensures reproducibility. """ size = self.maze_size random.seed(seed) maze = [["#" for _ in range(size)] for _ in range(size)] # Create random open cells for i in range(1, size - 1): for j in range(1, size - 1): maze[i][j] = "." if random.random() > 0.25 else "#" # Place exit exit_x, exit_y = random.randint(1, size - 2), random.randint(1, size - 2) maze[exit_x][exit_y] = "E" # Find top-left open for Sun sun_start = self._find_open_cell(maze, from_top=True) moon_start = self._find_open_cell(maze, from_top=False) maze[sun_start[0]][sun_start[1]] = "S" # Mark starting maze[moon_start[0]][moon_start[1]] = "M" return maze, [exit_x, exit_y], sun_start, moon_start def _find_open_cell(self, maze: List[List[str]], from_top: bool = True) -> List[int]: size = len(maze) row_range = range(size) if from_top else range(size - 1, -1, -1) for i in row_range: for j in row_range: if maze[i][j] == ".": return [i, j] # Fallback if none open return [1, 1] if from_top else [size - 2, size - 2] def _extract_answer_content(self, action: str) -> str: """Extract content from \\boxed{}""" match = re.search(r"\\boxed\{([^}]*)\}", action, re.DOTALL) if match: return match.group(1).strip() return action.strip() def _manhattan_distance(self, a: List[int], b: List[int]) -> int: return abs(a[0] - b[0]) + abs(a[1] - b[1]) def _process_move(self, player: str, direction: str, game_state: Dict[str, Any]) -> str: pos = game_state["players"][player]["position"] x, y = pos dx, dy = 0, 0 if direction == "North": dx = -1 elif direction == "South": dx = 1 elif direction == "East": dy = 1 elif direction == "West": dy = -1 new_x, new_y = x + dx, y + dy maze = game_state["maze_layout"] if not (0 <= new_x < len(maze) and 0 <= new_y < len(maze[0])): self.state.set_invalid_move("Cannot move outside bounds.") game_state["invalid_move_reason"] = "Cannot move outside bounds." game_state["is_terminal"] = True return f"{player} attempted to move outside bounds." if maze[new_x][new_y] == "#": self.state.set_invalid_move("Cannot move through wall or outside bounds.") game_state["invalid_move_reason"] = "Cannot move through wall." game_state["is_terminal"] = True return f"{player} tried to move into a wall." game_state["players"][player]["position"] = [new_x, new_y] game_state["players"][player]["focus"] -= 1 return f"{player} moved {direction} to {(new_x, new_y)}." def _process_scan(self, player: str, game_state: Dict[str, Any]) -> str: pos = game_state["players"][player]["position"] maze = game_state["maze_layout"] dirs = { "North": (pos[0] - 1, pos[1]), "South": (pos[0] + 1, pos[1]), "East": (pos[0], pos[1] + 1), "West": (pos[0], pos[1] - 1), } result = {} for dir_name, (x, y) in dirs.items(): if 0 <= x < len(maze) and 0 <= y < len(maze[0]): result[dir_name] = "Wall" if maze[x][y] == "#" else "Open" else: result[dir_name] = "Out of bounds" obs_msg = ", ".join(f"{k}: {v}" for k, v in result.items()) return f"{player} scanned surroundings. {obs_msg}" def _process_mark(self, player: str, game_state: Dict[str, Any]) -> str: pos = game_state["players"][player]["position"] markers = game_state["players"][player]["markers"] if pos not in markers: markers.append(pos.copy()) return f"{player} marked the cell at {tuple(pos)}." def _process_rest(self, player: str, game_state: Dict[str, Any]) -> str: game_state["players"][player]["focus"] += 1 return f"{player} rested and recovered 1 Focus (now {game_state['players'][player]['focus']})." # ---------------------------------------------------------------------- # Prompt # ---------------------------------------------------------------------- def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str: """ Generates player prompt at start of game. """ player_name = "Sun" if player_id == 0 else "Moon" pos = tuple(game_state["players"][player_name]["position"]) focus = game_state["players"][player_name]["focus"] intro = ( f"You are **Player {player_name}**, an explorer within the mystic underground labyrinth of EchoMaze.\n" f"Your current position is {pos} with Focus = {focus}.\n" "Your objective is to reach the Exit Glyph before your rival.\n" "Actions must be exactly one of:\n" " - [Move: North], [Move: South], [Move: East], [Move: West]\n" " - [Scan] — Reveal walls around you.\n" " - [Mark] — Leave a marker in this cell.\n" " - [Rest] — Skip turn, regain 1 Focus.\n\n" "Only one action per turn. Place it inside \\boxed{} like so:\n" "Example valid response:\n" "I decide to move north.\n" "\\boxed{[Move: North]}\n\n" "Example invalid response:\n" "I will move upward.\n" "\\boxed{[Move: Up]} <-- invalid action\n" ) return intro # ---------------------------------------------------------------------- # Framework helpers # ---------------------------------------------------------------------- def get_observation(self) -> Tuple[int, List]: """Return observation for current player""" return self.state.current_player_id, self.state.game_state def close(self) -> Tuple[Dict, Dict]: """Return final info""" return self.state.rewards, self.state.game_state ```