Add env.py from Openverse builder
This commit is contained in:
221
env.py
Normal file
221
env.py
Normal file
@@ -0,0 +1,221 @@
|
||||
```python
|
||||
import re
|
||||
import random
|
||||
from typing import Any, Dict, Optional, Tuple, List
|
||||
|
||||
import textarena as ta
|
||||
|
||||
|
||||
class OrbitalAlignEnv(ta.Env):
|
||||
"""
|
||||
Orbital Align (Deterministic Turn-Based Strategy Inspired by Tic-Tac-Toe)
|
||||
Implementation for Stage 1 Design.
|
||||
"""
|
||||
|
||||
def __init__(self, max_turns: int = 9):
|
||||
self.max_turns = max_turns
|
||||
self.deploy_pattern = re.compile(r"^\[Deploy:(?:[1-3]),(?:[1-3])\]$")
|
||||
self.scan_pattern = re.compile(r"^\[Scan\]$")
|
||||
self.current_seed: Optional[int] = None
|
||||
|
||||
def _extract_answer_content(self, action: str) -> str:
|
||||
"""Extract content from \\boxed{{...}}. Return raw text if malformed."""
|
||||
match = re.search(r"\\boxed\{\{(.*?)\}\}", action, re.DOTALL)
|
||||
if match:
|
||||
return match.group(1).strip()
|
||||
return action.strip()
|
||||
|
||||
def _generate_empty_board(self) -> List[List[str]]:
|
||||
"""3x3 grid initialized with spaces."""
|
||||
return [[" " for _ in range(3)] for _ in range(3)]
|
||||
|
||||
def _board_to_str(self, board: List[List[str]]) -> str:
|
||||
"""Return a text representation of the orbital grid."""
|
||||
lines = []
|
||||
for row in board:
|
||||
lines.append(" | ".join(cell if cell.strip() else " " for cell in row))
|
||||
return "\n---------\n".join(lines)
|
||||
|
||||
def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str:
|
||||
role_mapping = {0: "Commander Solis", 1: "Commander Nyx"}
|
||||
commander = role_mapping[player_id]
|
||||
symbol = game_state["players"][commander]["symbol"]
|
||||
prompt = (
|
||||
f"You are {commander}, commanding orbital fleet marked as '{symbol}' satellites.\n"
|
||||
"Your objective is to align three of your satellites (horizontally, vertically, or diagonally) "
|
||||
"across the 3×3 orbital grid surrounding a dying star before your rival does.\n\n"
|
||||
"**Current Orbital Grid:**\n"
|
||||
f"{self._board_to_str(game_state['board'])}\n\n"
|
||||
"**Available Actions:**\n"
|
||||
"- `[Deploy:x,y]` → Place a satellite on an orbital coordinate (x,y) with x,y ∈ {1,2,3}\n"
|
||||
"- `[Scan]` → Skip placement to review the orbital grid state.\n\n"
|
||||
"**Format:** Each response must end with `\\boxed{{<action>}}`\n"
|
||||
"Example valid:\n"
|
||||
"I will secure the top-right orbit next.\n"
|
||||
"\\boxed{{[Deploy:1,3]}}\n\n"
|
||||
"Example invalid:\n"
|
||||
"Let's attack next time.\n"
|
||||
"[Deploy:1,3]\n"
|
||||
"(missing boxed syntax)\n\n"
|
||||
"Remember:\n"
|
||||
"- You cannot deploy on an occupied orbit.\n"
|
||||
"- The game ends when a commander aligns three satellites or all orbits are filled.\n"
|
||||
)
|
||||
return prompt
|
||||
|
||||
def reset(self, num_players: int, seed: Optional[int] = None):
|
||||
"""
|
||||
Resets the environment to the initial state of Orbital Align.
|
||||
|
||||
Args:
|
||||
num_players: must be 2 for two-player game
|
||||
seed: optional deterministic seed
|
||||
|
||||
Returns: self.state
|
||||
"""
|
||||
if num_players != 2:
|
||||
raise ValueError("Orbital Align requires exactly 2 players.")
|
||||
if seed is None:
|
||||
seed = 42
|
||||
self.current_seed = seed
|
||||
random.seed(seed)
|
||||
|
||||
# Initialize base state
|
||||
self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_turns)
|
||||
empty_board = self._generate_empty_board()
|
||||
game_state = {
|
||||
"turn_count": 0,
|
||||
"current_player": "Commander Solis",
|
||||
"board": empty_board,
|
||||
"players": {
|
||||
"Commander Solis": {"symbol": "S", "actions_taken": []},
|
||||
"Commander Nyx": {"symbol": "N", "actions_taken": []},
|
||||
},
|
||||
"winner": None,
|
||||
"is_terminal": False,
|
||||
"last_action": None,
|
||||
"observation_log": [],
|
||||
"seed": seed,
|
||||
}
|
||||
|
||||
# Setup role mapping
|
||||
role_mapping = {0: "Commander Solis", 1: "Commander Nyx"}
|
||||
self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt, role_mapping=role_mapping)
|
||||
|
||||
# Add initial onboarding messages
|
||||
onboard_msg = (
|
||||
"Welcome to *Orbital Align*! Two rival commanders compete to align three satellites in a 3×3 orbital grid.\n"
|
||||
"Commander Solis (S) deploys first, followed by Commander Nyx (N)."
|
||||
)
|
||||
self.state.add_observation(onboard_msg, ta.ObservationType.GAME_MESSAGE)
|
||||
|
||||
return self.state
|
||||
|
||||
def _parse_deploy_coords(self, action: str) -> Optional[Tuple[int, int]]:
|
||||
"""Extract coordinates from [Deploy:x,y]."""
|
||||
try:
|
||||
match = re.match(r"^\[Deploy:([1-3]),([1-3])\]$", action)
|
||||
if match:
|
||||
x, y = int(match.group(1)), int(match.group(2))
|
||||
return x, y
|
||||
except Exception:
|
||||
return None
|
||||
return None
|
||||
|
||||
def _validate_and_apply_action(self, player_id: int, content: str) -> Optional[str]:
|
||||
"""Validate an extracted action string and apply to game state if valid.
|
||||
|
||||
Returns reason string if invalid, otherwise None if success.
|
||||
"""
|
||||
gs = self.state.game_state
|
||||
role_mapping = {0: "Commander Solis", 1: "Commander Nyx"}
|
||||
player_name = role_mapping[player_id]
|
||||
symbol = gs["players"][player_name]["symbol"]
|
||||
|
||||
# Validate structure
|
||||
if not (self.deploy_pattern.match(content) or self.scan_pattern.match(content)):
|
||||
return "Malformed action syntax"
|
||||
|
||||
if self.deploy_pattern.match(content):
|
||||
coords = self._parse_deploy_coords(content)
|
||||
if not coords:
|
||||
return "Coordinates out of range"
|
||||
x, y = coords
|
||||
if not (1 <= x <= 3 and 1 <= y <= 3):
|
||||
return "Coordinates out of range"
|
||||
if gs["board"][x - 1][y - 1] != " ":
|
||||
return "Target cell occupied"
|
||||
# Apply deploy
|
||||
gs["board"][x - 1][y - 1] = symbol
|
||||
gs["players"][player_name]["actions_taken"].append(content)
|
||||
gs["observation_log"].append(f"{player_name} deployed to {x},{y}")
|
||||
gs["last_action"] = content
|
||||
gs["turn_count"] += 1
|
||||
self.state.add_observation(f"{player_name} deployed satellite to orbit {x},{y}.", ta.ObservationType.GAME_MESSAGE)
|
||||
elif self.scan_pattern.match(content):
|
||||
gs["players"][player_name]["actions_taken"].append(content)
|
||||
gs["observation_log"].append(f"{player_name} scanned the grid")
|
||||
gs["last_action"] = content
|
||||
gs["turn_count"] += 1
|
||||
board_str = self._board_to_str(gs["board"])
|
||||
self.state.add_observation(f"{player_name} scanned the orbital grid:\n{board_str}", ta.ObservationType.GAME_MESSAGE)
|
||||
else:
|
||||
return "Unrecognized action token"
|
||||
|
||||
return None
|
||||
|
||||
def _check_win_condition(self, board: List[List[str]]) -> Optional[str]:
|
||||
"""Check for winner. Return symbol 'S' or 'N' if found, else None."""
|
||||
lines = []
|
||||
lines.extend(board)
|
||||
lines.extend([[board[r][c] for r in range(3)] for c in range(3)])
|
||||
lines.append([board[i][i] for i in range(3)])
|
||||
lines.append([board[i][2 - i] for i in range(3)])
|
||||
for line in lines:
|
||||
if line == ["S", "S", "S"]:
|
||||
return "S"
|
||||
if line == ["N", "N", "N"]:
|
||||
return "N"
|
||||
return None
|
||||
|
||||
def step(self, action: str) -> Tuple[bool, ta.Info]:
|
||||
"""
|
||||
Perform a single environment step for the current player.
|
||||
|
||||
Args:
|
||||
action: The full action text submitted by the current player.
|
||||
|
||||
Returns:
|
||||
(done, info) tuple.
|
||||
"""
|
||||
player_id = self.state.current_player_id
|
||||
self.state.add_observation(action, ta.ObservationType.PLAYER_ACTION, from_id=player_id, to_id=-1)
|
||||
answer = self._extract_answer_content(action)
|
||||
|
||||
reason = self._validate_and_apply_action(player_id, answer)
|
||||
if reason:
|
||||
self.state.set_invalid_move(reason=reason)
|
||||
return self.state.step()
|
||||
|
||||
# Check for win after valid action
|
||||
gs = self.state.game_state
|
||||
board = gs["board"]
|
||||
winner_symbol = self._check_win_condition(board)
|
||||
role_mapping = {"S": 0, "N": 1}
|
||||
name_mapping = {0: "Commander Solis", 1: "Commander Nyx"}
|
||||
|
||||
if winner_symbol:
|
||||
gs["is_terminal"] = True
|
||||
gs["winner"] = name_mapping[role_mapping[winner_symbol]]
|
||||
self.state.set_winner(player_id=role_mapping[winner_symbol],
|
||||
reason=f"{gs['winner']} aligned three satellites and won the match.")
|
||||
return self.state.step()
|
||||
|
||||
# Check draw
|
||||
if gs["turn_count"] >= 9 and not winner_symbol:
|
||||
gs["is_terminal"] = True
|
||||
self.state.set_draw(reason="All orbital nodes filled. The system remains in equilibrium.")
|
||||
return self.state.step()
|
||||
|
||||
return self.state.step()
|
||||
```
|
||||
Reference in New Issue
Block a user