```python import re import random from typing import Any, Dict, Optional, Tuple, List import textarena as ta class OrbitalAlignEnv(ta.Env): """ Orbital Align (Deterministic Turn-Based Strategy Inspired by Tic-Tac-Toe) Implementation for Stage 1 Design. """ def __init__(self, max_turns: int = 9): self.max_turns = max_turns self.deploy_pattern = re.compile(r"^\[Deploy:(?:[1-3]),(?:[1-3])\]$") self.scan_pattern = re.compile(r"^\[Scan\]$") self.current_seed: Optional[int] = None def _extract_answer_content(self, action: str) -> str: """Extract content from \\boxed{{...}}. Return raw text if malformed.""" match = re.search(r"\\boxed\{\{(.*?)\}\}", action, re.DOTALL) if match: return match.group(1).strip() return action.strip() def _generate_empty_board(self) -> List[List[str]]: """3x3 grid initialized with spaces.""" return [[" " for _ in range(3)] for _ in range(3)] def _board_to_str(self, board: List[List[str]]) -> str: """Return a text representation of the orbital grid.""" lines = [] for row in board: lines.append(" | ".join(cell if cell.strip() else " " for cell in row)) return "\n---------\n".join(lines) def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str: role_mapping = {0: "Commander Solis", 1: "Commander Nyx"} commander = role_mapping[player_id] symbol = game_state["players"][commander]["symbol"] prompt = ( f"You are {commander}, commanding orbital fleet marked as '{symbol}' satellites.\n" "Your objective is to align three of your satellites (horizontally, vertically, or diagonally) " "across the 3×3 orbital grid surrounding a dying star before your rival does.\n\n" "**Current Orbital Grid:**\n" f"{self._board_to_str(game_state['board'])}\n\n" "**Available Actions:**\n" "- `[Deploy:x,y]` → Place a satellite on an orbital coordinate (x,y) with x,y ∈ {1,2,3}\n" "- `[Scan]` → Skip placement to review the orbital grid state.\n\n" "**Format:** Each response must end with `\\boxed{{}}`\n" "Example valid:\n" "I will secure the top-right orbit next.\n" "\\boxed{{[Deploy:1,3]}}\n\n" "Example invalid:\n" "Let's attack next time.\n" "[Deploy:1,3]\n" "(missing boxed syntax)\n\n" "Remember:\n" "- You cannot deploy on an occupied orbit.\n" "- The game ends when a commander aligns three satellites or all orbits are filled.\n" ) return prompt def reset(self, num_players: int, seed: Optional[int] = None): """ Resets the environment to the initial state of Orbital Align. Args: num_players: must be 2 for two-player game seed: optional deterministic seed Returns: self.state """ if num_players != 2: raise ValueError("Orbital Align requires exactly 2 players.") if seed is None: seed = 42 self.current_seed = seed random.seed(seed) # Initialize base state self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_turns) empty_board = self._generate_empty_board() game_state = { "turn_count": 0, "current_player": "Commander Solis", "board": empty_board, "players": { "Commander Solis": {"symbol": "S", "actions_taken": []}, "Commander Nyx": {"symbol": "N", "actions_taken": []}, }, "winner": None, "is_terminal": False, "last_action": None, "observation_log": [], "seed": seed, } # Setup role mapping role_mapping = {0: "Commander Solis", 1: "Commander Nyx"} self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt, role_mapping=role_mapping) # Add initial onboarding messages onboard_msg = ( "Welcome to *Orbital Align*! Two rival commanders compete to align three satellites in a 3×3 orbital grid.\n" "Commander Solis (S) deploys first, followed by Commander Nyx (N)." ) self.state.add_observation(onboard_msg, ta.ObservationType.GAME_MESSAGE) return self.state def _parse_deploy_coords(self, action: str) -> Optional[Tuple[int, int]]: """Extract coordinates from [Deploy:x,y].""" try: match = re.match(r"^\[Deploy:([1-3]),([1-3])\]$", action) if match: x, y = int(match.group(1)), int(match.group(2)) return x, y except Exception: return None return None def _validate_and_apply_action(self, player_id: int, content: str) -> Optional[str]: """Validate an extracted action string and apply to game state if valid. Returns reason string if invalid, otherwise None if success. """ gs = self.state.game_state role_mapping = {0: "Commander Solis", 1: "Commander Nyx"} player_name = role_mapping[player_id] symbol = gs["players"][player_name]["symbol"] # Validate structure if not (self.deploy_pattern.match(content) or self.scan_pattern.match(content)): return "Malformed action syntax" if self.deploy_pattern.match(content): coords = self._parse_deploy_coords(content) if not coords: return "Coordinates out of range" x, y = coords if not (1 <= x <= 3 and 1 <= y <= 3): return "Coordinates out of range" if gs["board"][x - 1][y - 1] != " ": return "Target cell occupied" # Apply deploy gs["board"][x - 1][y - 1] = symbol gs["players"][player_name]["actions_taken"].append(content) gs["observation_log"].append(f"{player_name} deployed to {x},{y}") gs["last_action"] = content gs["turn_count"] += 1 self.state.add_observation(f"{player_name} deployed satellite to orbit {x},{y}.", ta.ObservationType.GAME_MESSAGE) elif self.scan_pattern.match(content): gs["players"][player_name]["actions_taken"].append(content) gs["observation_log"].append(f"{player_name} scanned the grid") gs["last_action"] = content gs["turn_count"] += 1 board_str = self._board_to_str(gs["board"]) self.state.add_observation(f"{player_name} scanned the orbital grid:\n{board_str}", ta.ObservationType.GAME_MESSAGE) else: return "Unrecognized action token" return None def _check_win_condition(self, board: List[List[str]]) -> Optional[str]: """Check for winner. Return symbol 'S' or 'N' if found, else None.""" lines = [] lines.extend(board) lines.extend([[board[r][c] for r in range(3)] for c in range(3)]) lines.append([board[i][i] for i in range(3)]) lines.append([board[i][2 - i] for i in range(3)]) for line in lines: if line == ["S", "S", "S"]: return "S" if line == ["N", "N", "N"]: return "N" return None def step(self, action: str) -> Tuple[bool, ta.Info]: """ Perform a single environment step for the current player. Args: action: The full action text submitted by the current player. Returns: (done, info) tuple. """ player_id = self.state.current_player_id self.state.add_observation(action, ta.ObservationType.PLAYER_ACTION, from_id=player_id, to_id=-1) answer = self._extract_answer_content(action) reason = self._validate_and_apply_action(player_id, answer) if reason: self.state.set_invalid_move(reason=reason) return self.state.step() # Check for win after valid action gs = self.state.game_state board = gs["board"] winner_symbol = self._check_win_condition(board) role_mapping = {"S": 0, "N": 1} name_mapping = {0: "Commander Solis", 1: "Commander Nyx"} if winner_symbol: gs["is_terminal"] = True gs["winner"] = name_mapping[role_mapping[winner_symbol]] self.state.set_winner(player_id=role_mapping[winner_symbol], reason=f"{gs['winner']} aligned three satellites and won the match.") return self.state.step() # Check draw if gs["turn_count"] >= 9 and not winner_symbol: gs["is_terminal"] = True self.state.set_draw(reason="All orbital nodes filled. The system remains in equilibrium.") return self.state.step() return self.state.step() ```