221 lines
8.9 KiB
Python
221 lines
8.9 KiB
Python
```python
|
||
import re
|
||
import random
|
||
from typing import Any, Dict, Optional, Tuple, List
|
||
|
||
import textarena as ta
|
||
|
||
|
||
class OrbitalAlignEnv(ta.Env):
|
||
"""
|
||
Orbital Align (Deterministic Turn-Based Strategy Inspired by Tic-Tac-Toe)
|
||
Implementation for Stage 1 Design.
|
||
"""
|
||
|
||
def __init__(self, max_turns: int = 9):
|
||
self.max_turns = max_turns
|
||
self.deploy_pattern = re.compile(r"^\[Deploy:(?:[1-3]),(?:[1-3])\]$")
|
||
self.scan_pattern = re.compile(r"^\[Scan\]$")
|
||
self.current_seed: Optional[int] = None
|
||
|
||
def _extract_answer_content(self, action: str) -> str:
|
||
"""Extract content from \\boxed{{...}}. Return raw text if malformed."""
|
||
match = re.search(r"\\boxed\{\{(.*?)\}\}", action, re.DOTALL)
|
||
if match:
|
||
return match.group(1).strip()
|
||
return action.strip()
|
||
|
||
def _generate_empty_board(self) -> List[List[str]]:
|
||
"""3x3 grid initialized with spaces."""
|
||
return [[" " for _ in range(3)] for _ in range(3)]
|
||
|
||
def _board_to_str(self, board: List[List[str]]) -> str:
|
||
"""Return a text representation of the orbital grid."""
|
||
lines = []
|
||
for row in board:
|
||
lines.append(" | ".join(cell if cell.strip() else " " for cell in row))
|
||
return "\n---------\n".join(lines)
|
||
|
||
def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str:
|
||
role_mapping = {0: "Commander Solis", 1: "Commander Nyx"}
|
||
commander = role_mapping[player_id]
|
||
symbol = game_state["players"][commander]["symbol"]
|
||
prompt = (
|
||
f"You are {commander}, commanding orbital fleet marked as '{symbol}' satellites.\n"
|
||
"Your objective is to align three of your satellites (horizontally, vertically, or diagonally) "
|
||
"across the 3×3 orbital grid surrounding a dying star before your rival does.\n\n"
|
||
"**Current Orbital Grid:**\n"
|
||
f"{self._board_to_str(game_state['board'])}\n\n"
|
||
"**Available Actions:**\n"
|
||
"- `[Deploy:x,y]` → Place a satellite on an orbital coordinate (x,y) with x,y ∈ {1,2,3}\n"
|
||
"- `[Scan]` → Skip placement to review the orbital grid state.\n\n"
|
||
"**Format:** Each response must end with `\\boxed{{<action>}}`\n"
|
||
"Example valid:\n"
|
||
"I will secure the top-right orbit next.\n"
|
||
"\\boxed{{[Deploy:1,3]}}\n\n"
|
||
"Example invalid:\n"
|
||
"Let's attack next time.\n"
|
||
"[Deploy:1,3]\n"
|
||
"(missing boxed syntax)\n\n"
|
||
"Remember:\n"
|
||
"- You cannot deploy on an occupied orbit.\n"
|
||
"- The game ends when a commander aligns three satellites or all orbits are filled.\n"
|
||
)
|
||
return prompt
|
||
|
||
def reset(self, num_players: int, seed: Optional[int] = None):
|
||
"""
|
||
Resets the environment to the initial state of Orbital Align.
|
||
|
||
Args:
|
||
num_players: must be 2 for two-player game
|
||
seed: optional deterministic seed
|
||
|
||
Returns: self.state
|
||
"""
|
||
if num_players != 2:
|
||
raise ValueError("Orbital Align requires exactly 2 players.")
|
||
if seed is None:
|
||
seed = 42
|
||
self.current_seed = seed
|
||
random.seed(seed)
|
||
|
||
# Initialize base state
|
||
self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_turns)
|
||
empty_board = self._generate_empty_board()
|
||
game_state = {
|
||
"turn_count": 0,
|
||
"current_player": "Commander Solis",
|
||
"board": empty_board,
|
||
"players": {
|
||
"Commander Solis": {"symbol": "S", "actions_taken": []},
|
||
"Commander Nyx": {"symbol": "N", "actions_taken": []},
|
||
},
|
||
"winner": None,
|
||
"is_terminal": False,
|
||
"last_action": None,
|
||
"observation_log": [],
|
||
"seed": seed,
|
||
}
|
||
|
||
# Setup role mapping
|
||
role_mapping = {0: "Commander Solis", 1: "Commander Nyx"}
|
||
self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt, role_mapping=role_mapping)
|
||
|
||
# Add initial onboarding messages
|
||
onboard_msg = (
|
||
"Welcome to *Orbital Align*! Two rival commanders compete to align three satellites in a 3×3 orbital grid.\n"
|
||
"Commander Solis (S) deploys first, followed by Commander Nyx (N)."
|
||
)
|
||
self.state.add_observation(onboard_msg, ta.ObservationType.GAME_MESSAGE)
|
||
|
||
return self.state
|
||
|
||
def _parse_deploy_coords(self, action: str) -> Optional[Tuple[int, int]]:
|
||
"""Extract coordinates from [Deploy:x,y]."""
|
||
try:
|
||
match = re.match(r"^\[Deploy:([1-3]),([1-3])\]$", action)
|
||
if match:
|
||
x, y = int(match.group(1)), int(match.group(2))
|
||
return x, y
|
||
except Exception:
|
||
return None
|
||
return None
|
||
|
||
def _validate_and_apply_action(self, player_id: int, content: str) -> Optional[str]:
|
||
"""Validate an extracted action string and apply to game state if valid.
|
||
|
||
Returns reason string if invalid, otherwise None if success.
|
||
"""
|
||
gs = self.state.game_state
|
||
role_mapping = {0: "Commander Solis", 1: "Commander Nyx"}
|
||
player_name = role_mapping[player_id]
|
||
symbol = gs["players"][player_name]["symbol"]
|
||
|
||
# Validate structure
|
||
if not (self.deploy_pattern.match(content) or self.scan_pattern.match(content)):
|
||
return "Malformed action syntax"
|
||
|
||
if self.deploy_pattern.match(content):
|
||
coords = self._parse_deploy_coords(content)
|
||
if not coords:
|
||
return "Coordinates out of range"
|
||
x, y = coords
|
||
if not (1 <= x <= 3 and 1 <= y <= 3):
|
||
return "Coordinates out of range"
|
||
if gs["board"][x - 1][y - 1] != " ":
|
||
return "Target cell occupied"
|
||
# Apply deploy
|
||
gs["board"][x - 1][y - 1] = symbol
|
||
gs["players"][player_name]["actions_taken"].append(content)
|
||
gs["observation_log"].append(f"{player_name} deployed to {x},{y}")
|
||
gs["last_action"] = content
|
||
gs["turn_count"] += 1
|
||
self.state.add_observation(f"{player_name} deployed satellite to orbit {x},{y}.", ta.ObservationType.GAME_MESSAGE)
|
||
elif self.scan_pattern.match(content):
|
||
gs["players"][player_name]["actions_taken"].append(content)
|
||
gs["observation_log"].append(f"{player_name} scanned the grid")
|
||
gs["last_action"] = content
|
||
gs["turn_count"] += 1
|
||
board_str = self._board_to_str(gs["board"])
|
||
self.state.add_observation(f"{player_name} scanned the orbital grid:\n{board_str}", ta.ObservationType.GAME_MESSAGE)
|
||
else:
|
||
return "Unrecognized action token"
|
||
|
||
return None
|
||
|
||
def _check_win_condition(self, board: List[List[str]]) -> Optional[str]:
|
||
"""Check for winner. Return symbol 'S' or 'N' if found, else None."""
|
||
lines = []
|
||
lines.extend(board)
|
||
lines.extend([[board[r][c] for r in range(3)] for c in range(3)])
|
||
lines.append([board[i][i] for i in range(3)])
|
||
lines.append([board[i][2 - i] for i in range(3)])
|
||
for line in lines:
|
||
if line == ["S", "S", "S"]:
|
||
return "S"
|
||
if line == ["N", "N", "N"]:
|
||
return "N"
|
||
return None
|
||
|
||
def step(self, action: str) -> Tuple[bool, ta.Info]:
|
||
"""
|
||
Perform a single environment step for the current player.
|
||
|
||
Args:
|
||
action: The full action text submitted by the current player.
|
||
|
||
Returns:
|
||
(done, info) tuple.
|
||
"""
|
||
player_id = self.state.current_player_id
|
||
self.state.add_observation(action, ta.ObservationType.PLAYER_ACTION, from_id=player_id, to_id=-1)
|
||
answer = self._extract_answer_content(action)
|
||
|
||
reason = self._validate_and_apply_action(player_id, answer)
|
||
if reason:
|
||
self.state.set_invalid_move(reason=reason)
|
||
return self.state.step()
|
||
|
||
# Check for win after valid action
|
||
gs = self.state.game_state
|
||
board = gs["board"]
|
||
winner_symbol = self._check_win_condition(board)
|
||
role_mapping = {"S": 0, "N": 1}
|
||
name_mapping = {0: "Commander Solis", 1: "Commander Nyx"}
|
||
|
||
if winner_symbol:
|
||
gs["is_terminal"] = True
|
||
gs["winner"] = name_mapping[role_mapping[winner_symbol]]
|
||
self.state.set_winner(player_id=role_mapping[winner_symbol],
|
||
reason=f"{gs['winner']} aligned three satellites and won the match.")
|
||
return self.state.step()
|
||
|
||
# Check draw
|
||
if gs["turn_count"] >= 9 and not winner_symbol:
|
||
gs["is_terminal"] = True
|
||
self.state.set_draw(reason="All orbital nodes filled. The system remains in equilibrium.")
|
||
return self.state.step()
|
||
|
||
return self.state.step()
|
||
``` |