Add env.py from Openverse builder

This commit is contained in:
Openverse Builder
2001-01-01 00:00:00 +00:00
commit 62cd544aaf

221
env.py Normal file
View File

@@ -0,0 +1,221 @@
```python
import re
import random
from typing import Any, Dict, Optional, Tuple, List
import textarena as ta
class OrbitalAlignEnv(ta.Env):
"""
Orbital Align (Deterministic Turn-Based Strategy Inspired by Tic-Tac-Toe)
Implementation for Stage 1 Design.
"""
def __init__(self, max_turns: int = 9):
self.max_turns = max_turns
self.deploy_pattern = re.compile(r"^\[Deploy:(?:[1-3]),(?:[1-3])\]$")
self.scan_pattern = re.compile(r"^\[Scan\]$")
self.current_seed: Optional[int] = None
def _extract_answer_content(self, action: str) -> str:
"""Extract content from \\boxed{{...}}. Return raw text if malformed."""
match = re.search(r"\\boxed\{\{(.*?)\}\}", action, re.DOTALL)
if match:
return match.group(1).strip()
return action.strip()
def _generate_empty_board(self) -> List[List[str]]:
"""3x3 grid initialized with spaces."""
return [[" " for _ in range(3)] for _ in range(3)]
def _board_to_str(self, board: List[List[str]]) -> str:
"""Return a text representation of the orbital grid."""
lines = []
for row in board:
lines.append(" | ".join(cell if cell.strip() else " " for cell in row))
return "\n---------\n".join(lines)
def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str:
role_mapping = {0: "Commander Solis", 1: "Commander Nyx"}
commander = role_mapping[player_id]
symbol = game_state["players"][commander]["symbol"]
prompt = (
f"You are {commander}, commanding orbital fleet marked as '{symbol}' satellites.\n"
"Your objective is to align three of your satellites (horizontally, vertically, or diagonally) "
"across the 3×3 orbital grid surrounding a dying star before your rival does.\n\n"
"**Current Orbital Grid:**\n"
f"{self._board_to_str(game_state['board'])}\n\n"
"**Available Actions:**\n"
"- `[Deploy:x,y]` → Place a satellite on an orbital coordinate (x,y) with x,y ∈ {1,2,3}\n"
"- `[Scan]` → Skip placement to review the orbital grid state.\n\n"
"**Format:** Each response must end with `\\boxed{{<action>}}`\n"
"Example valid:\n"
"I will secure the top-right orbit next.\n"
"\\boxed{{[Deploy:1,3]}}\n\n"
"Example invalid:\n"
"Let's attack next time.\n"
"[Deploy:1,3]\n"
"(missing boxed syntax)\n\n"
"Remember:\n"
"- You cannot deploy on an occupied orbit.\n"
"- The game ends when a commander aligns three satellites or all orbits are filled.\n"
)
return prompt
def reset(self, num_players: int, seed: Optional[int] = None):
"""
Resets the environment to the initial state of Orbital Align.
Args:
num_players: must be 2 for two-player game
seed: optional deterministic seed
Returns: self.state
"""
if num_players != 2:
raise ValueError("Orbital Align requires exactly 2 players.")
if seed is None:
seed = 42
self.current_seed = seed
random.seed(seed)
# Initialize base state
self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_turns)
empty_board = self._generate_empty_board()
game_state = {
"turn_count": 0,
"current_player": "Commander Solis",
"board": empty_board,
"players": {
"Commander Solis": {"symbol": "S", "actions_taken": []},
"Commander Nyx": {"symbol": "N", "actions_taken": []},
},
"winner": None,
"is_terminal": False,
"last_action": None,
"observation_log": [],
"seed": seed,
}
# Setup role mapping
role_mapping = {0: "Commander Solis", 1: "Commander Nyx"}
self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt, role_mapping=role_mapping)
# Add initial onboarding messages
onboard_msg = (
"Welcome to *Orbital Align*! Two rival commanders compete to align three satellites in a 3×3 orbital grid.\n"
"Commander Solis (S) deploys first, followed by Commander Nyx (N)."
)
self.state.add_observation(onboard_msg, ta.ObservationType.GAME_MESSAGE)
return self.state
def _parse_deploy_coords(self, action: str) -> Optional[Tuple[int, int]]:
"""Extract coordinates from [Deploy:x,y]."""
try:
match = re.match(r"^\[Deploy:([1-3]),([1-3])\]$", action)
if match:
x, y = int(match.group(1)), int(match.group(2))
return x, y
except Exception:
return None
return None
def _validate_and_apply_action(self, player_id: int, content: str) -> Optional[str]:
"""Validate an extracted action string and apply to game state if valid.
Returns reason string if invalid, otherwise None if success.
"""
gs = self.state.game_state
role_mapping = {0: "Commander Solis", 1: "Commander Nyx"}
player_name = role_mapping[player_id]
symbol = gs["players"][player_name]["symbol"]
# Validate structure
if not (self.deploy_pattern.match(content) or self.scan_pattern.match(content)):
return "Malformed action syntax"
if self.deploy_pattern.match(content):
coords = self._parse_deploy_coords(content)
if not coords:
return "Coordinates out of range"
x, y = coords
if not (1 <= x <= 3 and 1 <= y <= 3):
return "Coordinates out of range"
if gs["board"][x - 1][y - 1] != " ":
return "Target cell occupied"
# Apply deploy
gs["board"][x - 1][y - 1] = symbol
gs["players"][player_name]["actions_taken"].append(content)
gs["observation_log"].append(f"{player_name} deployed to {x},{y}")
gs["last_action"] = content
gs["turn_count"] += 1
self.state.add_observation(f"{player_name} deployed satellite to orbit {x},{y}.", ta.ObservationType.GAME_MESSAGE)
elif self.scan_pattern.match(content):
gs["players"][player_name]["actions_taken"].append(content)
gs["observation_log"].append(f"{player_name} scanned the grid")
gs["last_action"] = content
gs["turn_count"] += 1
board_str = self._board_to_str(gs["board"])
self.state.add_observation(f"{player_name} scanned the orbital grid:\n{board_str}", ta.ObservationType.GAME_MESSAGE)
else:
return "Unrecognized action token"
return None
def _check_win_condition(self, board: List[List[str]]) -> Optional[str]:
"""Check for winner. Return symbol 'S' or 'N' if found, else None."""
lines = []
lines.extend(board)
lines.extend([[board[r][c] for r in range(3)] for c in range(3)])
lines.append([board[i][i] for i in range(3)])
lines.append([board[i][2 - i] for i in range(3)])
for line in lines:
if line == ["S", "S", "S"]:
return "S"
if line == ["N", "N", "N"]:
return "N"
return None
def step(self, action: str) -> Tuple[bool, ta.Info]:
"""
Perform a single environment step for the current player.
Args:
action: The full action text submitted by the current player.
Returns:
(done, info) tuple.
"""
player_id = self.state.current_player_id
self.state.add_observation(action, ta.ObservationType.PLAYER_ACTION, from_id=player_id, to_id=-1)
answer = self._extract_answer_content(action)
reason = self._validate_and_apply_action(player_id, answer)
if reason:
self.state.set_invalid_move(reason=reason)
return self.state.step()
# Check for win after valid action
gs = self.state.game_state
board = gs["board"]
winner_symbol = self._check_win_condition(board)
role_mapping = {"S": 0, "N": 1}
name_mapping = {0: "Commander Solis", 1: "Commander Nyx"}
if winner_symbol:
gs["is_terminal"] = True
gs["winner"] = name_mapping[role_mapping[winner_symbol]]
self.state.set_winner(player_id=role_mapping[winner_symbol],
reason=f"{gs['winner']} aligned three satellites and won the match.")
return self.state.step()
# Check draw
if gs["turn_count"] >= 9 and not winner_symbol:
gs["is_terminal"] = True
self.state.set_draw(reason="All orbital nodes filled. The system remains in equilibrium.")
return self.state.step()
return self.state.step()
```