commit 62cd544aaf4a5d41532af5462e3ff96fd707dcb4 Author: Openverse Builder Date: Mon Jan 1 00:00:00 2001 +0000 Add env.py from Openverse builder diff --git a/env.py b/env.py new file mode 100644 index 0000000..70601fe --- /dev/null +++ b/env.py @@ -0,0 +1,221 @@ +```python +import re +import random +from typing import Any, Dict, Optional, Tuple, List + +import textarena as ta + + +class OrbitalAlignEnv(ta.Env): + """ + Orbital Align (Deterministic Turn-Based Strategy Inspired by Tic-Tac-Toe) + Implementation for Stage 1 Design. + """ + + def __init__(self, max_turns: int = 9): + self.max_turns = max_turns + self.deploy_pattern = re.compile(r"^\[Deploy:(?:[1-3]),(?:[1-3])\]$") + self.scan_pattern = re.compile(r"^\[Scan\]$") + self.current_seed: Optional[int] = None + + def _extract_answer_content(self, action: str) -> str: + """Extract content from \\boxed{{...}}. Return raw text if malformed.""" + match = re.search(r"\\boxed\{\{(.*?)\}\}", action, re.DOTALL) + if match: + return match.group(1).strip() + return action.strip() + + def _generate_empty_board(self) -> List[List[str]]: + """3x3 grid initialized with spaces.""" + return [[" " for _ in range(3)] for _ in range(3)] + + def _board_to_str(self, board: List[List[str]]) -> str: + """Return a text representation of the orbital grid.""" + lines = [] + for row in board: + lines.append(" | ".join(cell if cell.strip() else " " for cell in row)) + return "\n---------\n".join(lines) + + def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str: + role_mapping = {0: "Commander Solis", 1: "Commander Nyx"} + commander = role_mapping[player_id] + symbol = game_state["players"][commander]["symbol"] + prompt = ( + f"You are {commander}, commanding orbital fleet marked as '{symbol}' satellites.\n" + "Your objective is to align three of your satellites (horizontally, vertically, or diagonally) " + "across the 3×3 orbital grid surrounding a dying star before your rival does.\n\n" + "**Current Orbital Grid:**\n" + f"{self._board_to_str(game_state['board'])}\n\n" + "**Available Actions:**\n" + "- `[Deploy:x,y]` → Place a satellite on an orbital coordinate (x,y) with x,y ∈ {1,2,3}\n" + "- `[Scan]` → Skip placement to review the orbital grid state.\n\n" + "**Format:** Each response must end with `\\boxed{{}}`\n" + "Example valid:\n" + "I will secure the top-right orbit next.\n" + "\\boxed{{[Deploy:1,3]}}\n\n" + "Example invalid:\n" + "Let's attack next time.\n" + "[Deploy:1,3]\n" + "(missing boxed syntax)\n\n" + "Remember:\n" + "- You cannot deploy on an occupied orbit.\n" + "- The game ends when a commander aligns three satellites or all orbits are filled.\n" + ) + return prompt + + def reset(self, num_players: int, seed: Optional[int] = None): + """ + Resets the environment to the initial state of Orbital Align. + + Args: + num_players: must be 2 for two-player game + seed: optional deterministic seed + + Returns: self.state + """ + if num_players != 2: + raise ValueError("Orbital Align requires exactly 2 players.") + if seed is None: + seed = 42 + self.current_seed = seed + random.seed(seed) + + # Initialize base state + self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_turns) + empty_board = self._generate_empty_board() + game_state = { + "turn_count": 0, + "current_player": "Commander Solis", + "board": empty_board, + "players": { + "Commander Solis": {"symbol": "S", "actions_taken": []}, + "Commander Nyx": {"symbol": "N", "actions_taken": []}, + }, + "winner": None, + "is_terminal": False, + "last_action": None, + "observation_log": [], + "seed": seed, + } + + # Setup role mapping + role_mapping = {0: "Commander Solis", 1: "Commander Nyx"} + self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt, role_mapping=role_mapping) + + # Add initial onboarding messages + onboard_msg = ( + "Welcome to *Orbital Align*! Two rival commanders compete to align three satellites in a 3×3 orbital grid.\n" + "Commander Solis (S) deploys first, followed by Commander Nyx (N)." + ) + self.state.add_observation(onboard_msg, ta.ObservationType.GAME_MESSAGE) + + return self.state + + def _parse_deploy_coords(self, action: str) -> Optional[Tuple[int, int]]: + """Extract coordinates from [Deploy:x,y].""" + try: + match = re.match(r"^\[Deploy:([1-3]),([1-3])\]$", action) + if match: + x, y = int(match.group(1)), int(match.group(2)) + return x, y + except Exception: + return None + return None + + def _validate_and_apply_action(self, player_id: int, content: str) -> Optional[str]: + """Validate an extracted action string and apply to game state if valid. + + Returns reason string if invalid, otherwise None if success. + """ + gs = self.state.game_state + role_mapping = {0: "Commander Solis", 1: "Commander Nyx"} + player_name = role_mapping[player_id] + symbol = gs["players"][player_name]["symbol"] + + # Validate structure + if not (self.deploy_pattern.match(content) or self.scan_pattern.match(content)): + return "Malformed action syntax" + + if self.deploy_pattern.match(content): + coords = self._parse_deploy_coords(content) + if not coords: + return "Coordinates out of range" + x, y = coords + if not (1 <= x <= 3 and 1 <= y <= 3): + return "Coordinates out of range" + if gs["board"][x - 1][y - 1] != " ": + return "Target cell occupied" + # Apply deploy + gs["board"][x - 1][y - 1] = symbol + gs["players"][player_name]["actions_taken"].append(content) + gs["observation_log"].append(f"{player_name} deployed to {x},{y}") + gs["last_action"] = content + gs["turn_count"] += 1 + self.state.add_observation(f"{player_name} deployed satellite to orbit {x},{y}.", ta.ObservationType.GAME_MESSAGE) + elif self.scan_pattern.match(content): + gs["players"][player_name]["actions_taken"].append(content) + gs["observation_log"].append(f"{player_name} scanned the grid") + gs["last_action"] = content + gs["turn_count"] += 1 + board_str = self._board_to_str(gs["board"]) + self.state.add_observation(f"{player_name} scanned the orbital grid:\n{board_str}", ta.ObservationType.GAME_MESSAGE) + else: + return "Unrecognized action token" + + return None + + def _check_win_condition(self, board: List[List[str]]) -> Optional[str]: + """Check for winner. Return symbol 'S' or 'N' if found, else None.""" + lines = [] + lines.extend(board) + lines.extend([[board[r][c] for r in range(3)] for c in range(3)]) + lines.append([board[i][i] for i in range(3)]) + lines.append([board[i][2 - i] for i in range(3)]) + for line in lines: + if line == ["S", "S", "S"]: + return "S" + if line == ["N", "N", "N"]: + return "N" + return None + + def step(self, action: str) -> Tuple[bool, ta.Info]: + """ + Perform a single environment step for the current player. + + Args: + action: The full action text submitted by the current player. + + Returns: + (done, info) tuple. + """ + player_id = self.state.current_player_id + self.state.add_observation(action, ta.ObservationType.PLAYER_ACTION, from_id=player_id, to_id=-1) + answer = self._extract_answer_content(action) + + reason = self._validate_and_apply_action(player_id, answer) + if reason: + self.state.set_invalid_move(reason=reason) + return self.state.step() + + # Check for win after valid action + gs = self.state.game_state + board = gs["board"] + winner_symbol = self._check_win_condition(board) + role_mapping = {"S": 0, "N": 1} + name_mapping = {0: "Commander Solis", 1: "Commander Nyx"} + + if winner_symbol: + gs["is_terminal"] = True + gs["winner"] = name_mapping[role_mapping[winner_symbol]] + self.state.set_winner(player_id=role_mapping[winner_symbol], + reason=f"{gs['winner']} aligned three satellites and won the match.") + return self.state.step() + + # Check draw + if gs["turn_count"] >= 9 and not winner_symbol: + gs["is_terminal"] = True + self.state.set_draw(reason="All orbital nodes filled. The system remains in equilibrium.") + return self.state.step() + + return self.state.step() +``` \ No newline at end of file