```python import re from typing import Any, Dict, Optional, Tuple, List import textarena as ta class StarGridDuelEnv(ta.Env): """ Implementation of the 'StarGrid Duel' game environment. Deterministic two-player strategy game where navigators place energy beacons on a 3x3 grid aiming to align three of their own beacons in a row, column, or diagonal. """ def __init__(self, max_turns: int = 9): self.max_turns = max_turns # Compile regex patterns once self.place_pattern = re.compile(r"^\[Place:\s*(A|B|C)(1|2|3)\]$") # Cell labels in order self.all_cells = [f"{r}{c}" for r in "ABC" for c in "123"] # ------------------------ Helper Methods ------------------------ def _extract_answer_content(self, action: str) -> str: """ Extract content inside \boxed{} for machine parsing. Falls back to entire content (trimmed) if no match. """ match = re.search(r"\\boxed\{\{([^}]*)\}\}", action) if not match: # Also support single braces in case formatting differs match = re.search(r"\\boxed\{([^}]*)\}", action) if match: return match.group(1).strip() return action.strip() def _check_victory(self, board: Dict[str, Optional[str]], color: str) -> bool: """Check all 8 winning line combinations for the specified color.""" lines = [ ["A1", "A2", "A3"], ["B1", "B2", "B3"], ["C1", "C2", "C3"], ["A1", "B1", "C1"], ["A2", "B2", "C2"], ["A3", "B3", "C3"], ["A1", "B2", "C3"], ["A3", "B2", "C1"], ] for line in lines: if all(board[cell] == color for cell in line): return True return False def _generate_board_str(self, board: Dict[str, Optional[str]]) -> str: """Render the 3x3 StarGrid as a simple text table.""" rows = [] for r in "ABC": row_cells = [] for c in "123": val = board[f"{r}{c}"] if val is None: row_cells.append(f"{r}{c}") else: symbol = "B" if val == "Blue" else "C" row_cells.append(symbol) rows.append(" | ".join(row_cells)) return "\n".join(rows) def _get_active_player_label(self, player_id: int) -> str: return "Navigator Alpha" if player_id == 0 else "Navigator Beta" def _cell_valid(self, cell: str) -> bool: return cell in self.all_cells # ------------------------ Core Env API ------------------------ def reset(self, num_players: int, seed: Optional[int] = None): """ Resets the environment to an initial state. Args: num_players: Number of players in the game. Must be 2. seed: Optional seed for determinism. Returns: None (or self.state for compatibility) """ if num_players != 2: raise ValueError("StarGrid Duel requires exactly 2 players.") self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_turns) empty_board = {cell: None for cell in self.all_cells} game_state: Dict[str, Any] = { "turn_index": 0, "active_player": "A", "board": empty_board, "player_symbols": {"A": "Blue", "B": "Crimson"}, "move_history": [], "winner": None, "is_draw": False, "observations": {"A": "", "B": ""}, "seed": seed, } role_mapping = {0: "Navigator Alpha", 1: "Navigator Beta"} # Initialize internal game state self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt, role_mapping=role_mapping) # Onboarding observations onboarding_msg = ( "Welcome to StarGrid Duel!\n" "Two navigators will alternately place energy beacons on the 3×3 StarGrid.\n" "Your mission is to align three of your beacons in a line before your rival." ) self.state.add_observation(onboarding_msg, ta.ObservationType.GAME_MESSAGE) board_msg = self._generate_board_str(empty_board) self.state.add_observation(board_msg, ta.ObservationType.GAME_BOARD) return self.state def step(self, action: str) -> Tuple[bool, ta.Info]: """ Perform a single environment step for the current player. Args: action: The action text submitted by the current player. Returns: A tuple (done, info) where: done: True if the episode has concluded info: A ta.Info object with auxiliary details """ player_id = self.state.current_player_id player_key = "A" if player_id == 0 else "B" player_color = self.state.game_state["player_symbols"][player_key] # 1. Log the raw player action self.state.add_observation(action, ta.ObservationType.PLAYER_ACTION, from_id=player_id, to_id=-1) # 2. Extract the boxed content extracted = self._extract_answer_content(action) # 3. Validate the action pattern match = self.place_pattern.match(extracted) if not match: self.state.set_invalid_move(reason="MalformedAction: The action must strictly follow '[Place: ]' format.") return self.state.step() cell_id = f"{match.group(1)}{match.group(2)}" if not self._cell_valid(cell_id): self.state.set_invalid_move(reason=f"CellOutOfRange: {cell_id} is not a valid StarGrid coordinate.") return self.state.step() board = self.state.game_state["board"] if board[cell_id] is not None: self.state.set_invalid_move(reason=f"CellOccupied: {cell_id} is already occupied.") return self.state.step() # 4. Execute valid action: place beacon board[cell_id] = player_color self.state.game_state["board"] = board # Record move self.state.game_state["move_history"].append( {"player": player_key, "action": extracted} ) # Increment turn index and rotate active player (unless terminal) self.state.game_state["turn_index"] += 1 # 5. Check for victory if self._check_victory(board, player_color): self.state.game_state["winner"] = player_key winner_str = self._get_active_player_label(player_id) self.state.set_winner(player_id=player_id, reason=f"{winner_str} aligned three energy beacons in a line.") board_str = self._generate_board_str(board) self.state.add_observation(board_str, ta.ObservationType.GAME_BOARD) return self.state.step() # 6. Check for draw (grid filled, no winner) if all(v is not None for v in board.values()): self.state.game_state["is_draw"] = True self.state.set_draw(reason="All cells filled with no aligned beacons—a balanced standoff.") board_str = self._generate_board_str(board) self.state.add_observation(board_str, ta.ObservationType.GAME_BOARD) return self.state.step() # 7. Update board observation for next player board_str = self._generate_board_str(board) self.state.add_observation(board_str, ta.ObservationType.GAME_BOARD) # 8. Rotate turn self.state.game_state["active_player"] = "B" if player_key == "A" else "A" # Proceed to next step return self.state.step() # -------------------- Player Prompt Generation -------------------- def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str: """ Generate a detailed prompt for each player at the start or on every turn. """ role = "Navigator Alpha" if player_id == 0 else "Navigator Beta" color = game_state["player_symbols"]["A" if player_id == 0 else "B"] active_pid = game_state["active_player"] active_label = "Navigator Alpha" if active_pid == "A" else "Navigator Beta" board_repr = self._generate_board_str(game_state["board"]) open_cells = [cell for cell, val in game_state["board"].items() if val is None] allowed_actions = [f"[Place: {cell}]" for cell in open_cells] prompt = ( f"You are {role}, commanding the {color} energy.\n" "Your goal: deploy energy beacons across the StarGrid to align three of your own in a straight line before your opponent.\n\n" f"Current Board:\n{board_repr}\n\n" f"Your Color: {color}\nActive Navigator: {active_label}\n\n" f"Allowed Actions:\nFormat: [Place: ]\nAvailable cells: {', '.join(open_cells) if open_cells else 'None'}\n\n" "Response Format:\n" "You may describe your reasoning, then finalize your move as:\n\n" "Example valid response:\n" "I will claim the center of the grid to control diagonals.\n" "\\boxed{{[Place: B2]}}\n\n" "Example invalid response:\n" "I think I'll move now.\n" "\\boxed{{[Move: B2]}}\n\n" "Put your final answer within \\boxed{{}} at the end of your response." ) return prompt ```