```python import re import random from typing import Any, Dict, Optional, Tuple, List import textarena as ta class GlyphGridDuelEnv(ta.Env): """ GlyphGrid Duel Environment Implements the deterministic, turn-based game designed in Stage 1. """ def __init__(self, max_turns: int = 9): self.max_turns = max_turns self.action_pattern = re.compile(r"^\[Etch:\s*([1-3]),\s*([1-3])\]$") self.player_roles = {0: "Solar", 1: "Lunar"} self.player_symbols = {"Solar": "S", "Lunar": "L"} # ------------------------------------------------------------------------- # Helper Methods # ------------------------------------------------------------------------- def _extract_answer_content(self, action: str) -> str: """Extract content between \boxed{{...}} from a player's response.""" # Double braces escaped pattern match = re.search(r'\\boxed\{\{(.*?)\}\}', action, re.DOTALL) if match: return match.group(1).strip() # fallback single brace just in case match = re.search(r'\\boxed\{(.*?)\}', action, re.DOTALL) if match: return match.group(1).strip() return action.strip() def _empty_runeboard(self) -> List[List[str]]: """Create an empty 3x3 runeboard.""" return [["_"] * 3 for _ in range(3)] def _render_runeboard(self, runeboard: List[List[str]]) -> str: """Return a string representation of the current runeboard.""" board_lines = [] for row in runeboard: board_lines.append(" ".join(row)) return "\n".join(board_lines) def _check_winner(self, runeboard: List[List[str]], symbol: str) -> bool: """Return True if the provided symbol has aligned three glyphs.""" # rows for r in range(3): if all(runeboard[r][c] == symbol for c in range(3)): return True # cols for c in range(3): if all(runeboard[r][c] == symbol for r in range(3)): return True # diagonals if all(runeboard[i][i] == symbol for i in range(3)): return True if all(runeboard[i][2 - i] == symbol for i in range(3)): return True return False # ------------------------------------------------------------------------- # Game Lifecycle # ------------------------------------------------------------------------- def reset(self, num_players: int, seed: Optional[int] = None): """ Resets the environment to an initial state. Args: num_players: Number of players in the game (must be 2). seed: Optional seed for deterministic behavior. """ if num_players != 2: raise ValueError("GlyphGrid Duel requires exactly 2 players.") self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_turns) rng_seed = seed if seed is not None else random.randint(0, 10000) game_state: Dict[str, Any] = { "runeboard": self._empty_runeboard(), "current_player": "Solar", "turn_count": 0, "winner": None, "is_terminal": False, "last_action": None, "observations": {"Solar": [], "Lunar": []}, "player_symbols": self.player_symbols, "seed": rng_seed, } self.state.reset( game_state=game_state, player_prompt_function=self._generate_player_prompt, role_mapping=self.player_roles, ) # initial observation to all players init_message = ( "The Runeboard is empty. Each Scribe may etch a glyph using [Etch: row, col]." ) self.state.add_observation( init_message, ta.ObservationType.GAME_MESSAGE, from_id=-1, to_id=-1 ) board_str = self._render_runeboard(game_state["runeboard"]) self.state.add_observation( board_str, ta.ObservationType.GAME_BOARD, from_id=-1, to_id=-1 ) return self.state def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str: """Generate turn prompt for each player.""" player_role = self.player_roles[player_id] player_symbol = game_state["player_symbols"][player_role] runeboard_str = self._render_runeboard(game_state["runeboard"]) empties = [ f"[Etch: {r+1}, {c+1}]" for r in range(3) for c in range(3) if game_state["runeboard"][r][c] == "_" ] empties_str = ", ".join(empties) prompt = ( f"You are a Scribe competing to master the Runeboard through glyph alignment.\n" f"Role: Scribe {player_role} ({player_symbol})\n\n" f"Rules Summary:\n" f"- Each player alternately etches one glyph per turn.\n" f"- Wins occur when three identical glyphs align (row, column, or diagonal).\n" f"- If all nine cells are filled without alignment, it’s a draw.\n\n" f"Current Runeboard:\n{runeboard_str}\n\n" f"Empty Cells where you can etch:\n{empties_str}\n\n" f"Action Format:\n" f"Use [Etch: row, column] with row and column in 1–3.\n" f"Put your final answer within \\boxed{{}} at the end of your response.\n\n" f"Example valid response:\n" f"I will etch at the top right corner.\n" f"\\boxed{{{{[Etch: 1, 3]}}}}\n\n" f"Example invalid response:\n" f"\\boxed{{{{[Mark: 1, 3]}}}} # Reason: 'Mark' is not a valid action.\n" ) return prompt def step(self, action: str) -> Tuple[bool, ta.Info]: """ Perform a single environment step for the current player. Args: action: The action text submitted by the current player. Returns: (done, info) """ current_id = self.state.current_player_id current_role = self.player_roles[current_id] opponent_role = self.player_roles[1 - current_id] board = self.state.game_state["runeboard"] # Record player's raw action self.state.add_observation( action, ta.ObservationType.PLAYER_ACTION, from_id=current_id, to_id=-1, ) # Extract content inside boxed action_content = self._extract_answer_content(action) # Validate if self.state.game_state["is_terminal"]: self.state.set_invalid_move("Game already ended.") return self.state.step() match = self.action_pattern.match(action_content) if not match: self.state.set_invalid_move( "Invalid format: must be [Etch: row, column] with row,col in 1–3." ) return self.state.step() try: row, col = int(match.group(1)) - 1, int(match.group(2)) - 1 except (ValueError, IndexError): self.state.set_invalid_move( "Out of bounds: coordinates must be between 1 and 3." ) return self.state.step() if row not in range(3) or col not in range(3): self.state.set_invalid_move( "Out of bounds: coordinates must be between 1 and 3." ) return self.state.step() if board[row][col] != "_": self.state.set_invalid_move("Cell already occupied.") return self.state.step() # Apply action symbol = self.player_symbols[current_role] board[row][col] = symbol self.state.game_state["last_action"] = action_content self.state.game_state["turn_count"] += 1 # Announce move move_msg = f"{current_role} etched a {symbol} glyph at ({row+1},{col+1})." self.state.add_observation(move_msg, ta.ObservationType.GAME_MESSAGE) # Show updated board board_render = self._render_runeboard(board) self.state.add_observation( board_render, ta.ObservationType.GAME_BOARD, from_id=-1, to_id=-1 ) # Check win condition if self._check_winner(board, symbol): self.state.game_state["winner"] = current_role self.state.game_state["is_terminal"] = True self.state.set_winner( player_id=current_id, reason=f"{current_role} formed a line of glyphs." ) return self.state.step() # Check draw condition if self.state.game_state["turn_count"] >= 9: self.state.game_state["is_terminal"] = True self.state.set_draw("Runeboard is full with no alignment. Draw.") return self.state.step() # Switch player self.state.game_state["current_player"] = opponent_role return self.state.step() ```