Files
test-v0/env.py
2001-01-01 00:00:00 +00:00

221 lines
8.9 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
```python
import re
import random
from typing import Any, Dict, Optional, Tuple, List
import textarena as ta
class OrbitalAlignEnv(ta.Env):
"""
Orbital Align (Deterministic Turn-Based Strategy Inspired by Tic-Tac-Toe)
Implementation for Stage 1 Design.
"""
def __init__(self, max_turns: int = 9):
self.max_turns = max_turns
self.deploy_pattern = re.compile(r"^\[Deploy:(?:[1-3]),(?:[1-3])\]$")
self.scan_pattern = re.compile(r"^\[Scan\]$")
self.current_seed: Optional[int] = None
def _extract_answer_content(self, action: str) -> str:
"""Extract content from \\boxed{{...}}. Return raw text if malformed."""
match = re.search(r"\\boxed\{\{(.*?)\}\}", action, re.DOTALL)
if match:
return match.group(1).strip()
return action.strip()
def _generate_empty_board(self) -> List[List[str]]:
"""3x3 grid initialized with spaces."""
return [[" " for _ in range(3)] for _ in range(3)]
def _board_to_str(self, board: List[List[str]]) -> str:
"""Return a text representation of the orbital grid."""
lines = []
for row in board:
lines.append(" | ".join(cell if cell.strip() else " " for cell in row))
return "\n---------\n".join(lines)
def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str:
role_mapping = {0: "Commander Solis", 1: "Commander Nyx"}
commander = role_mapping[player_id]
symbol = game_state["players"][commander]["symbol"]
prompt = (
f"You are {commander}, commanding orbital fleet marked as '{symbol}' satellites.\n"
"Your objective is to align three of your satellites (horizontally, vertically, or diagonally) "
"across the 3×3 orbital grid surrounding a dying star before your rival does.\n\n"
"**Current Orbital Grid:**\n"
f"{self._board_to_str(game_state['board'])}\n\n"
"**Available Actions:**\n"
"- `[Deploy:x,y]` → Place a satellite on an orbital coordinate (x,y) with x,y ∈ {1,2,3}\n"
"- `[Scan]` → Skip placement to review the orbital grid state.\n\n"
"**Format:** Each response must end with `\\boxed{{<action>}}`\n"
"Example valid:\n"
"I will secure the top-right orbit next.\n"
"\\boxed{{[Deploy:1,3]}}\n\n"
"Example invalid:\n"
"Let's attack next time.\n"
"[Deploy:1,3]\n"
"(missing boxed syntax)\n\n"
"Remember:\n"
"- You cannot deploy on an occupied orbit.\n"
"- The game ends when a commander aligns three satellites or all orbits are filled.\n"
)
return prompt
def reset(self, num_players: int, seed: Optional[int] = None):
"""
Resets the environment to the initial state of Orbital Align.
Args:
num_players: must be 2 for two-player game
seed: optional deterministic seed
Returns: self.state
"""
if num_players != 2:
raise ValueError("Orbital Align requires exactly 2 players.")
if seed is None:
seed = 42
self.current_seed = seed
random.seed(seed)
# Initialize base state
self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_turns)
empty_board = self._generate_empty_board()
game_state = {
"turn_count": 0,
"current_player": "Commander Solis",
"board": empty_board,
"players": {
"Commander Solis": {"symbol": "S", "actions_taken": []},
"Commander Nyx": {"symbol": "N", "actions_taken": []},
},
"winner": None,
"is_terminal": False,
"last_action": None,
"observation_log": [],
"seed": seed,
}
# Setup role mapping
role_mapping = {0: "Commander Solis", 1: "Commander Nyx"}
self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt, role_mapping=role_mapping)
# Add initial onboarding messages
onboard_msg = (
"Welcome to *Orbital Align*! Two rival commanders compete to align three satellites in a 3×3 orbital grid.\n"
"Commander Solis (S) deploys first, followed by Commander Nyx (N)."
)
self.state.add_observation(onboard_msg, ta.ObservationType.GAME_MESSAGE)
return self.state
def _parse_deploy_coords(self, action: str) -> Optional[Tuple[int, int]]:
"""Extract coordinates from [Deploy:x,y]."""
try:
match = re.match(r"^\[Deploy:([1-3]),([1-3])\]$", action)
if match:
x, y = int(match.group(1)), int(match.group(2))
return x, y
except Exception:
return None
return None
def _validate_and_apply_action(self, player_id: int, content: str) -> Optional[str]:
"""Validate an extracted action string and apply to game state if valid.
Returns reason string if invalid, otherwise None if success.
"""
gs = self.state.game_state
role_mapping = {0: "Commander Solis", 1: "Commander Nyx"}
player_name = role_mapping[player_id]
symbol = gs["players"][player_name]["symbol"]
# Validate structure
if not (self.deploy_pattern.match(content) or self.scan_pattern.match(content)):
return "Malformed action syntax"
if self.deploy_pattern.match(content):
coords = self._parse_deploy_coords(content)
if not coords:
return "Coordinates out of range"
x, y = coords
if not (1 <= x <= 3 and 1 <= y <= 3):
return "Coordinates out of range"
if gs["board"][x - 1][y - 1] != " ":
return "Target cell occupied"
# Apply deploy
gs["board"][x - 1][y - 1] = symbol
gs["players"][player_name]["actions_taken"].append(content)
gs["observation_log"].append(f"{player_name} deployed to {x},{y}")
gs["last_action"] = content
gs["turn_count"] += 1
self.state.add_observation(f"{player_name} deployed satellite to orbit {x},{y}.", ta.ObservationType.GAME_MESSAGE)
elif self.scan_pattern.match(content):
gs["players"][player_name]["actions_taken"].append(content)
gs["observation_log"].append(f"{player_name} scanned the grid")
gs["last_action"] = content
gs["turn_count"] += 1
board_str = self._board_to_str(gs["board"])
self.state.add_observation(f"{player_name} scanned the orbital grid:\n{board_str}", ta.ObservationType.GAME_MESSAGE)
else:
return "Unrecognized action token"
return None
def _check_win_condition(self, board: List[List[str]]) -> Optional[str]:
"""Check for winner. Return symbol 'S' or 'N' if found, else None."""
lines = []
lines.extend(board)
lines.extend([[board[r][c] for r in range(3)] for c in range(3)])
lines.append([board[i][i] for i in range(3)])
lines.append([board[i][2 - i] for i in range(3)])
for line in lines:
if line == ["S", "S", "S"]:
return "S"
if line == ["N", "N", "N"]:
return "N"
return None
def step(self, action: str) -> Tuple[bool, ta.Info]:
"""
Perform a single environment step for the current player.
Args:
action: The full action text submitted by the current player.
Returns:
(done, info) tuple.
"""
player_id = self.state.current_player_id
self.state.add_observation(action, ta.ObservationType.PLAYER_ACTION, from_id=player_id, to_id=-1)
answer = self._extract_answer_content(action)
reason = self._validate_and_apply_action(player_id, answer)
if reason:
self.state.set_invalid_move(reason=reason)
return self.state.step()
# Check for win after valid action
gs = self.state.game_state
board = gs["board"]
winner_symbol = self._check_win_condition(board)
role_mapping = {"S": 0, "N": 1}
name_mapping = {0: "Commander Solis", 1: "Commander Nyx"}
if winner_symbol:
gs["is_terminal"] = True
gs["winner"] = name_mapping[role_mapping[winner_symbol]]
self.state.set_winner(player_id=role_mapping[winner_symbol],
reason=f"{gs['winner']} aligned three satellites and won the match.")
return self.state.step()
# Check draw
if gs["turn_count"] >= 9 and not winner_symbol:
gs["is_terminal"] = True
self.state.set_draw(reason="All orbital nodes filled. The system remains in equilibrium.")
return self.state.step()
return self.state.step()
```