```python import re import random from typing import Any, Dict, List, Optional, Tuple import textarena as ta class TicTacTrailEnv(ta.Env): """ Tic-Tac-Trail: A deterministic, turn-based tactical puzzle game. Two explorers — Team Sun (S) and Team Moon (M) — claim tiles on an ancient 3×3 grid. The first team to align three of their emblems horizontally, vertically, or diagonally wins. """ def __init__(self, max_turns: int = 9): self.max_turns = max_turns # Define regex patterns for allowed actions self.mark_pattern = re.compile(r"^\[Mark:(0|1|2),(0|1|2)\]$") self.pass_pattern = re.compile(r"^\[Pass\]$") self.num_players = 2 # ---------------------------------------------------------------- # Helper: Extract boxed content # ---------------------------------------------------------------- def _extract_answer_content(self, action: str) -> str: """ Extract content from \\boxed{{}}. Returns stripped text. """ match = re.search(r"\\boxed\{\{(.*?)\}\}", action, re.DOTALL) if not match: # Try single braces fallback (\boxed{}) match = re.search(r"\\boxed\{(.*?)\}", action, re.DOTALL) return match.group(1).strip() if match else action.strip() # ---------------------------------------------------------------- # Helper: Board display utility # ---------------------------------------------------------------- def _board_to_str(self, board: List[List[str]]) -> str: """Convert board to a readable string representation.""" return "\n".join([" ".join(row) for row in board]) # ---------------------------------------------------------------- # Helper: Compute available (empty) cells # ---------------------------------------------------------------- def _get_available_moves(self, board: List[List[str]]) -> List[List[int]]: moves: List[List[int]] = [] for r in range(3): for c in range(3): if board[r][c] == "_": moves.append([r, c]) return moves # ---------------------------------------------------------------- # Helper: Check for winner # ---------------------------------------------------------------- def _check_winner(self, board: List[List[str]]) -> Optional[str]: """Return 'S' or 'M' if a symbol wins, else None.""" lines = [] # Rows and cols for i in range(3): lines.append(board[i]) lines.append([board[r][i] for r in range(3)]) # Diagonals lines.append([board[i][i] for i in range(3)]) lines.append([board[i][2 - i] for i in range(3)]) for line in lines: if line[0] != "_" and line.count(line[0]) == 3: return line[0] return None # ---------------------------------------------------------------- # Player Prompt Generator # ---------------------------------------------------------------- def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str: """ Build instructions for a player based on the current board state. """ team_name = "Sun" if player_id == 0 else "Moon" symbol = game_state["player_symbols"][team_name] board_view = self._board_to_str(game_state["board_state"]) prompt = ( f"You are an explorer representing Team {team_name} " f"({symbol}) claiming tiles on the ancient Tic-Tac-Trail.\n" f"Current board state:\n{board_view}\n\n" "You may take one of the following actions:\n" " - [Mark:,] to claim an unmarked tile (rows and cols 0–2)\n" " - [Pass] if no unclaimed tiles remain\n\n" "Victory condition: Align three of your emblems in a straight line.\n" "All actions must be enclosed in \\boxed{{}} at the end of your message.\n\n" "Example valid response:\n" "I should take the center stone before my rival.\n" "\\boxed{{[Mark:1,1]}}\n\n" "Example valid response (no moves left):\n" "No moves left; I will pass.\n" "\\boxed{{[Pass]}}\n" ) return prompt # ---------------------------------------------------------------- # Reset # ---------------------------------------------------------------- def reset(self, num_players: int, seed: Optional[int] = None): """ Resets the environment to an initial state. Args: num_players: must be 2 (Sun, Moon) seed: random seed (stored but unused for determinism) """ if num_players != 2: raise ValueError("Tic-Tac-Trail requires exactly 2 players.") self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_turns) if seed is not None: random.seed(seed) empty_board = [["_"] * 3 for _ in range(3)] game_state: Dict[str, Any] = { "seed": seed or 42, "turn_count": 1, "current_player": "Sun", "board_state": empty_board, "player_symbols": {"Sun": "S", "Moon": "M"}, "history": [{"player": "System", "message": "The ancient board awaits."}], "winner": None, "status": "ongoing", "available_moves": self._get_available_moves(empty_board), "scores": {"Sun": 0, "Moon": 0}, } role_mapping = {0: "Sun", 1: "Moon"} self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt, role_mapping=role_mapping) self.state.add_observation("The ancient board awaits.", ta.ObservationType.GAME_MESSAGE, from_id=-1, to_id=-1) self.state.add_observation(self._board_to_str(empty_board), ta.ObservationType.GAME_BOARD) return self.state # ---------------------------------------------------------------- # Step # ---------------------------------------------------------------- def step(self, action: str) -> Tuple[bool, ta.Info]: """ Perform a single environment step for the current player. Args: action: The action text submitted by the current player. Returns: A tuple (done, info) """ player_id = self.state.current_player_id role_names = {0: "Sun", 1: "Moon"} current_team = role_names[player_id] other_team = role_names[1 - player_id] # Log player action self.state.add_observation(action, ta.ObservationType.PLAYER_ACTION, from_id=player_id, to_id=-1) extracted = self._extract_answer_content(action) # ---- Validation ---- if not (self.mark_pattern.match(extracted) or self.pass_pattern.match(extracted)): self.state.set_invalid_move("Invalid format — must be [Mark:r,c] or [Pass].") return self.state.step() game_state = self.state.game_state board = game_state["board_state"] if self.mark_pattern.match(extracted): m = self.mark_pattern.match(extracted) r, c = int(m.group(1)), int(m.group(2)) if r not in range(3) or c not in range(3): self.state.set_invalid_move("Row or column index out of range.") return self.state.step() if board[r][c] != "_": self.state.set_invalid_move("Chosen cell already occupied.") return self.state.step() # Apply the move board[r][c] = game_state["player_symbols"][current_team] game_state["history"].append({"player": current_team, "message": f"Marked cell ({r},{c})."}) else: # [Pass] available = self._get_available_moves(board) if len(available) > 0: self.state.set_invalid_move("Cannot pass while moves still available.") return self.state.step() game_state["history"].append({"player": current_team, "message": "Passed."}) # Update game_state game_state["available_moves"] = self._get_available_moves(board) # ---- Check terminal conditions ---- symbol_winner = self._check_winner(board) if symbol_winner: winning_team = "Sun" if symbol_winner == "S" else "Moon" game_state["winner"] = winning_team game_state["status"] = "finished" game_state["scores"][winning_team] = 1 game_state["scores"][other_team] = 0 self.state.set_winner(player_id if winning_team == current_team else 1 - player_id, reason=f"Team {winning_team} aligned three emblems!") return self.state.step() if not game_state["available_moves"]: game_state["winner"] = None game_state["status"] = "draw" game_state["scores"]["Sun"] = 0.5 game_state["scores"]["Moon"] = 0.5 self.state.set_draw(reason="All tiles filled without a winning alignment.") return self.state.step() # If ongoing game_state["turn_count"] += 1 game_state["current_player"] = other_team game_state["status"] = "ongoing" self.state.add_observation(self._board_to_str(board), ta.ObservationType.GAME_BOARD) return self.state.step() # ---------------------------------------------------------------- # Observation Retrieval # ---------------------------------------------------------------- def get_observation(self) -> Tuple[int, List]: """Return (player_id, observation_list) for current player.""" return (self.state.current_player_id, self.state.observations) # ---------------------------------------------------------------- # Close # ---------------------------------------------------------------- def close(self) -> Tuple[Dict, Dict]: """Return final rewards and game info.""" return self.state.rewards, self.state.game_info ```