```python import re import random from typing import Any, Dict, Optional, Tuple, List import textarena as ta class MazeBoundEnv(ta.Env): """ MazeBound: Deterministic, turn-based maze navigation game. Two explorers compete to reach the Beacon Core first. """ def __init__(self, maze_size: int = 7, turn_limit: int = 40): self.maze_size = maze_size self.turn_limit = turn_limit self.visibility_radius = 1 # Precompile regex patterns for action grammar self.move_pattern = re.compile(r"^MOVE:(N|S|E|W)$") self.scan_pattern = re.compile(r"^SCAN$") self.pass_pattern = re.compile(r"^PASS$") # ------------------------------- # Helper: Extract \boxed{} content # ------------------------------- def _extract_answer_content(self, action: str) -> str: """Extract content from \boxed{} to validate the player's action.""" match = re.search(r"\\boxed\{([^}]*)\}", action, re.DOTALL) if match: return match.group(1).strip() return action.strip() # ------------------------------- # Maze Generation # ------------------------------- def _generate_maze(self, seed: Optional[int]) -> Tuple[List[List[str]], Tuple[int, int]]: """Generate a simple deterministic maze and Beacon location given a seed.""" rnd = random.Random(seed) maze = [] for i in range(self.maze_size): row = [] for j in range(self.maze_size): # Keep borders mostly passable, random walls elsewhere if rnd.random() < 0.2: row.append("#") else: row.append(" ") maze.append(row) # Ensure start and end are open maze[0][0] = " " maze[self.maze_size - 1][self.maze_size - 1] = " " # Beacon location - ensure open cell (not on edge) bx, by = rnd.randint(1, self.maze_size - 2), rnd.randint(1, self.maze_size - 2) maze[bx][by] = "B" return maze, (bx, by) # ------------------------------- # Helper: Compute Manhattan distance # ------------------------------- def _manhattan(self, a: Tuple[int, int], b: Tuple[int, int]) -> int: return abs(a[0] - b[0]) + abs(a[1] - b[1]) # ------------------------------- # Reset method # ------------------------------- def reset(self, num_players: int, seed: Optional[int] = None): """ Resets the environment to an initial state. Args: num_players: Number of players in the game. Must be 2. seed: Optional seed for deterministic behavior. Returns: None """ if num_players != 2: raise ValueError("MazeBound is strictly a two-player game.") self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.turn_limit) maze, beacon_coord = self._generate_maze(seed) rnd = random.Random(seed) # Initialize players players = { "A": { "name": "Explorer Alpha", "position": [0, 0], "visible_cells": self._visible_cells((0, 0)), "discovered_map": {}, "distance_to_beacon": 0, "last_action": None, }, "B": { "name": "Explorer Beta", "position": [self.maze_size - 1, self.maze_size - 1], "visible_cells": self._visible_cells((self.maze_size - 1, self.maze_size - 1)), "discovered_map": {}, "distance_to_beacon": 0, "last_action": None, }, } players["A"]["distance_to_beacon"] = self._manhattan(tuple(players["A"]["position"]), beacon_coord) players["B"]["distance_to_beacon"] = self._manhattan(tuple(players["B"]["position"]), beacon_coord) game_state = { "maze_size": self.maze_size, "turn_number": 0, "turn_limit": self.turn_limit, "seed": seed, "beacon_coord": list(beacon_coord), "maze_layout": maze, "players": players, "history": [], "winner": None, "terminated": False, "termination_reason": "", } self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt) self.state.add_observation("Welcome to MazeBound!", ta.ObservationType.GAME_MESSAGE) # ------------------------------- # Visibility Calculation # ------------------------------- def _visible_cells(self, pos: Tuple[int, int]) -> List[List[int]]: """Return list of visible cells within radius 1 (including self).""" cells = [] x, y = pos for dx in [-1, 0, 1]: for dy in [-1, 0, 1]: nx, ny = x + dx, y + dy if 0 <= nx < self.maze_size and 0 <= ny < self.maze_size: cells.append([nx, ny]) return cells # ------------------------------- # Step Method # ------------------------------- def step(self, action: str) -> Tuple[bool, ta.Info]: """ Perform a single environment step for the current player. Args: action: The action text submitted by the current player. Returns: A tuple (done, info) """ player_idx = self.state.current_player_id player_key = "A" if player_idx == 0 else "B" opp_key = "B" if player_key == "A" else "A" self.state.add_observation( message=action, observation_type=ta.ObservationType.PLAYER_ACTION, from_id=player_idx, to_id=-1, ) extracted = self._extract_answer_content(action) game_state = self.state.game_state valid_action = False reason_invalid = None # Validate grammar if self.move_pattern.match(extracted): direction = extracted.split(":")[1] valid_action = True self._execute_move(player_key, direction) elif self.scan_pattern.match(extracted): valid_action = True self._execute_scan(player_key) elif self.pass_pattern.match(extracted): valid_action = True # do nothing else: reason_invalid = "UnrecognizedActionFormat" if not valid_action: self.state.set_invalid_move(reason=reason_invalid or "MalformedInput") return self.state.step() # Record history game_state["players"][player_key]["last_action"] = extracted turn_pair_number = (len(game_state["history"]) // 2) + 1 game_state["history"].append({"turn": turn_pair_number, "player": player_key, "action": extracted}) # Check beacon capture termination player_pos = tuple(game_state["players"][player_key]["position"]) beacon = tuple(game_state["beacon_coord"]) if player_pos == beacon: game_state["terminated"] = True game_state["winner"] = player_key game_state["termination_reason"] = "BeaconCaptured" self.state.set_winner(player_id=player_idx, reason="BeaconCaptured") return self.state.step() # Update turn number every two moves total_actions = len(game_state["history"]) if total_actions % 2 == 0: game_state["turn_number"] += 1 # Check turn limit termination if game_state["turn_number"] >= self.turn_limit // 2: self._determine_end_by_distance() return self.state.step() # ------------------------------- # Action execution helpers # ------------------------------- def _execute_move(self, player_key: str, direction: str): """Execute movement if possible, handling walls and bounds.""" game_state = self.state.game_state pos = game_state["players"][player_key]["position"] x, y = pos if direction == "N": nx, ny = x - 1, y elif direction == "S": nx, ny = x + 1, y elif direction == "E": nx, ny = x, y + 1 elif direction == "W": nx, ny = x, y - 1 else: self.state.set_invalid_move("UnrecognizedActionFormat") return if not (0 <= nx < self.maze_size and 0 <= ny < self.maze_size): self.state.set_invalid_move("OutOfBounds") return if game_state["maze_layout"][nx][ny] == "#": self.state.set_invalid_move("BlockedByWall") return # Apply move game_state["players"][player_key]["position"] = [nx, ny] game_state["players"][player_key]["visible_cells"] = self._visible_cells((nx, ny)) # Recalculate distance beacon = tuple(game_state["beacon_coord"]) game_state["players"][player_key]["distance_to_beacon"] = self._manhattan((nx, ny), beacon) def _execute_scan(self, player_key: str): """Reveal adjacent cells within visibility radius.""" game_state = self.state.game_state pos = tuple(game_state["players"][player_key]["position"]) visible = self._visible_cells(pos) game_state["players"][player_key]["visible_cells"] = visible # ------------------------------- # Terminal Check helper (time expired) # ------------------------------- def _determine_end_by_distance(self): """Determine winner by shortest distance to beacon upon timeout.""" game_state = self.state.game_state A_dist = game_state["players"]["A"]["distance_to_beacon"] B_dist = game_state["players"]["B"]["distance_to_beacon"] if A_dist < B_dist: game_state["terminated"] = True game_state["winner"] = "A" game_state["termination_reason"] = "TimeExpired" self.state.set_winner(player_id=0, reason="TimeExpired") elif B_dist < A_dist: game_state["terminated"] = True game_state["winner"] = "B" game_state["termination_reason"] = "TimeExpired" self.state.set_winner(player_id=1, reason="TimeExpired") else: game_state["terminated"] = True game_state["winner"] = None game_state["termination_reason"] = "Draw" self.state.set_draw(reason="EqualDistance") # ------------------------------- # Prompt generation for player # ------------------------------- def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str: player_key = "A" if player_id == 0 else "B" player_data = game_state["players"][player_key] visible = player_data["visible_cells"] coords_str = ", ".join([f"({x},{y})" for x, y in visible]) remaining = game_state["turn_limit"] - game_state["turn_number"] return ( f"You are {player_data['name']} in MazeBound, a turn-based labyrinth navigation game.\n" "Your goal is to reach the Beacon Core (marked 'B') before your opponent.\n\n" f"Current coordinates: {tuple(player_data['position'])}\n" f"Visible cells (radius {self.visibility_radius}): {coords_str}\n" f"Turns remaining (approximate): {remaining}\n" "Available actions:\n" " - MOVE:N, MOVE:S, MOVE:E, MOVE:W\n" " - SCAN\n" " - PASS\n\n" "Rules:\n" " - Moves blocked by walls (#) or map edges cause Invalid Moves.\n" " - SCAN reveals adjacent cells within your visibility range.\n" " - Game ends when a player reaches the Beacon Core or after 40 turns.\n" "\nUse \\boxed{} around your action token.\n" "Example valid response:\n" " It looks clear eastward, I'll proceed.\n" " \\boxed{MOVE:E}\n" "Example invalid response:\n" " Let's go east! (missing box)\n" ) # ------------------------------- # Close method # ------------------------------- def close(self) -> Tuple[Dict, Dict]: """Return rewards and game_info at end of game.""" return self.state.rewards, self.state.game_info ```