```python import re import random from typing import Any, Dict, List, Optional, Tuple import textarena as ta class LabyrinthConquestEnv(ta.Env): """ Environment implementation for the Labyrinth Conquest game (Stage 1 design). Two-player deterministic turn-based grid navigation game. """ def __init__(self, grid_size: int = 5, max_turns: int = 80): self.grid_size = grid_size self.max_turns = max_turns self.move_pattern = re.compile(r'^\[Move: (N|S|E|W)\]$') self.rotate_pattern = re.compile(r'^\[Rotate: ([0-9]+),([0-9]+),(CW|CCW)\]$') self.activate_pattern = re.compile(r'^\[Activate: (Bridge|TrapDisarm|RowShift)\]$') # === Helper to extract boxed command ====================================== def _extract_answer_content(self, action: str) -> str: """Extract content inside \\boxed{...}. Returns stripped content string.""" match = re.search(r'\\boxed\{\{?([^}]*)\}?\}', action, re.DOTALL) if match: return match.group(1).strip() return action.strip() # === Reset =============================================================== def reset(self, num_players: int, seed: Optional[int] = None): """ Resets the environment to an initial state. Args: num_players (int): Must be 2. seed (Optional[int]): Optional seed for deterministic setup. Returns: Optional: self.state for chaining if needed. """ if num_players != 2: raise ValueError("Labyrinth Conquest is a two-player game.") self.random = random.Random(seed) self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_turns) size = self.grid_size tiles = [["floor" for _ in range(size)] for _ in range(size)] for i in range(size): for j in range(size): if (i, j) == (0, 0): tiles[i][j] = "startA" elif (i, j) == (size - 1, size - 1): tiles[i][j] = "startB" elif (i, j) == (size // 2, size // 2): tiles[i][j] = "relic" else: r = self.random.random() if r < 0.1: tiles[i][j] = "wall" elif r < 0.2: tiles[i][j] = "trap" all_gadgets = ["Bridge", "TrapDisarm", "RowShift"] gA = self.random.sample(all_gadgets, k=2) gB = self.random.sample(all_gadgets, k=2) player_states = { "A": {"position": [0, 0], "gadgets": gA, "moves_taken": 0, "distance_to_relic": self._manhattan([0, 0], [size // 2, size // 2])}, "B": {"position": [size - 1, size - 1], "gadgets": gB, "moves_taken": 0, "distance_to_relic": self._manhattan([size - 1, size - 1], [size // 2, size // 2])}, } game_state = { "grid_size": size, "tiles": tiles, "player_states": player_states, "turn_number": 0, "current_player": "A", "seed": seed, "action_history": [], "winner": None, "terminated": False, "invalid_reason": None, "observations": ["Game begins. Players start in opposite corners."], } self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt) layout_str = "\n".join(" ".join(row) for row in tiles) self.state.add_observation(f"Initial labyrinth layout:\n{layout_str}", ta.ObservationType.GAME_BOARD) return self.state # === Step ================================================================ def step(self, action: str) -> Tuple[bool, ta.Info]: """ Perform a single environment step for the current player. Args: action (str): The action text submitted by the current player. Returns: Tuple[bool, ta.Info]: done flag and info object from the state. """ pid = self.state.current_player_id player_key = "A" if pid == 0 else "B" opp_key = "B" if player_key == "A" else "A" game_state = self.state.game_state player_state = game_state["player_states"][player_key] relic_pos = [self.grid_size // 2, self.grid_size // 2] tiles = game_state["tiles"] self.state.add_observation(action, ta.ObservationType.PLAYER_ACTION, from_id=pid, to_id=-1) content = self._extract_answer_content(action) # Validate action format if not (self.move_pattern.match(content) or self.rotate_pattern.match(content) or self.activate_pattern.match(content)): self.state.set_invalid_move(reason="Invalid action format") return self.state.step() if self.move_pattern.match(content): direction = self.move_pattern.match(content).group(1) new_pos = player_state["position"].copy() if direction == "N": new_pos[0] -= 1 elif direction == "S": new_pos[0] += 1 elif direction == "E": new_pos[1] += 1 elif direction == "W": new_pos[1] -= 1 if not (0 <= new_pos[0] < self.grid_size and 0 <= new_pos[1] < self.grid_size): self.state.set_invalid_move(reason="Tile out of bounds") return self.state.step() if tiles[new_pos[0]][new_pos[1]] == "wall": self.state.set_invalid_move(reason="Wall blocks path") return self.state.step() player_state["position"] = new_pos player_state["moves_taken"] += 1 action_desc = f"{player_key} moved {direction}." self.state.add_observation(action_desc, ta.ObservationType.GAME_MESSAGE) game_state["action_history"].append(f"{player_key}: {content}") game_state["observations"].append(action_desc) elif self.rotate_pattern.match(content): x, y, dir_rot = self.rotate_pattern.match(content).groups() x, y = int(x), int(y) if not (0 <= x < self.grid_size and 0 <= y < self.grid_size): self.state.set_invalid_move(reason="Tile out of bounds") return self.state.step() desc = f"{player_key} rotated tile ({x},{y}) {dir_rot}." self.state.add_observation(desc, ta.ObservationType.GAME_MESSAGE) game_state["action_history"].append(f"{player_key}: {content}") game_state["observations"].append(desc) elif self.activate_pattern.match(content): gadget = self.activate_pattern.match(content).group(1) if gadget not in player_state["gadgets"]: self.state.set_invalid_move(reason="Gadget unavailable") return self.state.step() player_state["gadgets"].remove(gadget) desc = f"{player_key} activated {gadget}." self.state.add_observation(desc, ta.ObservationType.GAME_MESSAGE) game_state["action_history"].append(f"{player_key}: {content}") game_state["observations"].append(desc) player_state["distance_to_relic"] = self._manhattan(player_state["position"], relic_pos) game_state["turn_number"] += 1 game_state["current_player"] = opp_key if self._same_pos(player_state["position"], relic_pos): game_state["winner"] = player_key self.state.set_winner(player_id=pid, reason=f"{player_key} reached the relic first.") game_state["terminated"] = True return self.state.step() if game_state["turn_number"] >= self.max_turns: dA = game_state["player_states"]["A"]["distance_to_relic"] dB = game_state["player_states"]["B"]["distance_to_relic"] if dA < dB: self.state.set_winner(player_id=0, reason="Player A closer to the relic.") game_state["winner"] = "A" elif dB < dA: self.state.set_winner(player_id=1, reason="Player B closer to the relic.") game_state["winner"] = "B" else: self.state.set_draw(reason="Equal distance to the relic.") game_state["winner"] = None game_state["terminated"] = True return self.state.step() return self.state.step() # === Prompt ============================================================== def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str: player_key = "A" if player_id == 0 else "B" player_info = game_state["player_states"][player_key] relic_pos = (self.grid_size // 2, self.grid_size // 2) return ( "You are an Explorer navigating a shifting labyrinth.\n" "Your goal is to reach the Relic Tile before your opponent by issuing one of the allowed commands.\n\n" "Available actions (case-sensitive):\n" "- [Move: N|S|E|W] — Move one tile in a direction if no wall blocks the way.\n" "- [Rotate: x,y,CW|CCW] — Rotate the tile at coordinates (x,y).\n" "- [Activate: Bridge|TrapDisarm|RowShift] — Use one of your gadgets (if available).\n\n" f"Current Turn: {game_state['turn_number']}\n" f"You are Player {player_key}. Opponent is Player {'B' if player_key == 'A' else 'A'}.\n" f"Your position: {tuple(player_info['position'])}\n" f"Relic position: {relic_pos}\n" f"Available gadgets: {', '.join(player_info['gadgets']) if player_info['gadgets'] else 'None'}\n\n" "Respond with exactly one valid action token.\n" "Put your final answer within \\boxed{{}} at the end of your response.\n\n" "Example valid response:\n" "I will move north to progress toward the relic.\n" "\\boxed{{[Move: N]}}\n\n" "Example invalid response:\n" "\\boxed{{Move north}} ← Invalid format; must include brackets and colon." ) # === Utility ============================================================= def _manhattan(self, a: List[int], b: List[int]) -> int: return abs(a[0] - b[0]) + abs(a[1] - b[1]) def _same_pos(self, a: List[int], b: List[int]) -> bool: return a[0] == b[0] and a[1] == b[1] ```