```python import re import random from typing import Any, Dict, Optional, Tuple, List import textarena as ta class GlyphGridDuelEnv(ta.Env): """ GlyphGrid Duel: a deterministic two-player abstract logic game. Players alternate inscribing glyphs ("X" or "O") on a 3×3 grid. The first to align three identical glyphs along any row, column, or diagonal wins. """ VALID_ACTION_PATTERN = re.compile(r"^\[Inscribe:(1|2|3),(1|2|3)\]$") def __init__(self): """Initialize reusable attributes.""" self.state: Optional[ta.TwoPlayerState] = None # ------------------------------------------------------------------------- # Helper: Extract content inside tags # ------------------------------------------------------------------------- def _extract_answer_content(self, action: str) -> str: """ Extract content from tags. If tags are missing, fallback to stripping the entire action string. """ match = re.search(r"(.*?)", action, re.DOTALL | re.IGNORECASE) if match: return match.group(1).strip() return action.strip() # ------------------------------------------------------------------------- # Reset environment # ------------------------------------------------------------------------- def reset(self, num_players: int, seed: Optional[int] = None): """ Resets the environment to an initial state. Args: num_players: Number of players (must be 2 for GlyphGrid Duel). seed: Optional seed for deterministic behavior. Notes: • Must construct a State object and pass game_state and player_prompt_function • Must call self.state.reset(...) • Should emit initial observations if helpful """ if num_players != 2: raise ValueError("GlyphGrid Duel requires exactly 2 players.") if seed is None: seed = random.randint(0, 99999) # Create a reproducible state manager self.state = ta.TwoPlayerState(num_players=num_players, seed=seed) # Determine which player starts (based on seed parity) starting_player_id = 0 if seed % 2 == 0 else 1 starting_player_name = f"Player {starting_player_id + 1}" # Construct initial 3x3 board (each cell empty string) board = [["" for _ in range(3)] for _ in range(3)] # Define player info players = { "Player 1": {"symbol": "X", "moves_made": 0}, "Player 2": {"symbol": "O", "moves_made": 0}, } # Build game_state dictionary game_state: Dict[str, Any] = { "turn_count": 0, "current_player": starting_player_name, "seed": seed, "board": board, "players": players, "winner": None, "is_terminal": False, "last_action": None, "observation_log": [f"{starting_player_name} begins the glyph duel."], } # Initialize internal environment state self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt) # Initial observations self.state.add_observation( from_id=-1, message=f"{starting_player_name} begins the glyph duel.", observation_type=ta.ObservationType.GAME_MESSAGE, ) self.state.add_observation( from_id=-1, message=self._render_board(board), observation_type=ta.ObservationType.GAME_BOARD, ) return self.state.game_state # ------------------------------------------------------------------------- # Board and State Helpers # ------------------------------------------------------------------------- def _render_board(self, board: List[List[str]]) -> str: """Format the 3×3 board for display.""" header = " 1 2 3" rows = [] for i, row in enumerate(board): cells = [cell if cell else "." for cell in row] rows.append(f"{i+1} " + " | ".join(cells)) return f"{header}\n" + "\n".join(rows) def _check_winner(self, symbol: str, board: List[List[str]]) -> bool: """Check if the given symbol has three in a row (row, column, diagonal).""" # Rows and columns for i in range(3): if all(board[i][j] == symbol for j in range(3)): return True if all(board[j][i] == symbol for j in range(3)): return True # Diagonals if all(board[i][i] == symbol for i in range(3)): return True if all(board[i][2 - i] == symbol for i in range(3)): return True return False def _is_board_full(self, board: List[List[str]]) -> bool: """Return True if no empty cells remain.""" return all(cell != "" for row in board for cell in row) # ------------------------------------------------------------------------- # Step Action # ------------------------------------------------------------------------- def step(self, action: str) -> Tuple[bool, ta.Info]: """ Perform a single environment step for the current player. Args: action: The action text submitted by the current player. Returns: A tuple (done, info) where: done: True if the episode has concluded info: A ta.Info object with auxiliary details """ player_id = self.state.current_player_id player_name = f"Player {player_id + 1}" # Log player's raw action self.state.add_observation( from_id=player_id, to_id=-1, message=action, observation_type=ta.ObservationType.PLAYER_ACTION, ) # Extract meaningful token content answer_content = self._extract_answer_content(action) # Validate action format if not self.VALID_ACTION_PATTERN.match(answer_content): self.state.set_invalid_move(reason="Invalid action format. Must match [Inscribe:x,y].") return self.state.step() # Parse coordinates x_str, y_str = re.findall(r"(1|2|3)", answer_content) x, y = int(x_str) - 1, int(y_str) - 1 # Access current game_state g = self.state.game_state board = g["board"] # Check if cell already occupied if board[x][y] != "": self.state.set_invalid_move(reason="Cell already occupied.") return self.state.step() # Check current player if g["current_player"] != player_name: self.state.set_invalid_move(reason="Not your turn.") return self.state.step() # Apply move symbol = g["players"][player_name]["symbol"] board[x][y] = symbol g["players"][player_name]["moves_made"] += 1 g["turn_count"] += 1 g["last_action"] = answer_content g["observation_log"].append(f"{player_name} placed at ({x+1},{y+1})") # Add observation for move and board update self.state.add_observation( from_id=player_id, message=f"{player_name} inscribed glyph '{symbol}' at ({x+1},{y+1})", observation_type=ta.ObservationType.GAME_MESSAGE, ) self.state.add_observation( from_id=-1, message=self._render_board(board), observation_type=ta.ObservationType.GAME_BOARD, ) # Check win condition if self._check_winner(symbol, board): g["winner"] = player_name g["is_terminal"] = True self.state.set_winner(player_id=player_id, reason=f"{player_name} aligned three glyphs and won the duel.") return self.state.step() # Check draw condition if self._is_board_full(board): g["winner"] = "Draw" g["is_terminal"] = True self.state.set_draw(reason="The grid is full. The duel ends in a draw.") return self.state.step() # Switch turns next_player_id = 1 - player_id g["current_player"] = f"Player {next_player_id + 1}" # End step (non-terminal) return self.state.step() # ------------------------------------------------------------------------- # Prompt Generation # ------------------------------------------------------------------------- def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str: """ Produce role-appropriate instructions for a player at episode start. Args: player_id: The integer ID of the player. game_state: The shared game state. Returns: A string prompt describing the current state, rules, and expected format. """ player_name = f"Player {player_id + 1}" symbol = game_state["players"][player_name]["symbol"] board_str = self._render_board(game_state["board"]) current_turn_name = game_state["current_player"] prompt = ( f"You are {player_name}, bearer of the glyph '{symbol}', in the abstract digital arena.\n" "Your goal is to align three of your runes (glyphs) in a straight line—row, column, or diagonal—before your opponent does.\n\n" f"Current arena state:\n{board_str}\n\n" f"It is currently {current_turn_name}'s turn.\n" "On your turn, inscribe your glyph in any unoccupied cell.\n\n" "Action grammar (must be exact): [Inscribe:x,y]\n" " - x, y ∈ {1, 2, 3}\n" " - Example: [Inscribe:2,3] inscribes at row 2, column 3.\n\n" "Formatting rules:\n" " - Put private reasoning inside .\n" " - Put your chosen action inside .\n\n" "Example valid response:\n" "I will take the center to prepare a diagonal line.\n" "[Inscribe:2,2]\n\n" "Example invalid response:\n" "I'll use a lowercase tag.\n" "[inscribe:2,2] <-- Invalid keyword\n" ) return prompt ```