217 lines
9.0 KiB
Python
217 lines
9.0 KiB
Python
|
|
```python
|
|||
|
|
import re
|
|||
|
|
from typing import Any, Dict, List, Optional, Tuple
|
|||
|
|
|
|||
|
|
import textarena as ta
|
|||
|
|
|
|||
|
|
|
|||
|
|
class StellarTriadEnv(ta.Env):
|
|||
|
|
"""
|
|||
|
|
Stellar Triad — Turn-based two-player abstract alignment game-themed around cosmic architecture.
|
|||
|
|
Implementation strictly follows the Stage 1 design document.
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
def __init__(self):
|
|||
|
|
"""Initialize regex pattern and constants."""
|
|||
|
|
super().__init__()
|
|||
|
|
self.action_pattern = re.compile(r'^\[Channel:(?:[1-3])-(?:[1-3])\]$')
|
|||
|
|
self.max_turns = 9 # Maximum number of turns (one per matrix cell)
|
|||
|
|
self.state: Optional[ta.TwoPlayerState] = None
|
|||
|
|
|
|||
|
|
def _extract_answer_content(self, action: str) -> str:
|
|||
|
|
"""
|
|||
|
|
Extract the content within \boxed{{ ... }}. Return empty string if not found.
|
|||
|
|
"""
|
|||
|
|
match = re.search(r'\\boxed\{\{(.*?)\}\}', action, re.DOTALL)
|
|||
|
|
if match:
|
|||
|
|
return match.group(1).strip()
|
|||
|
|
# Also support single brace variant if it may occur
|
|||
|
|
match = re.search(r'\\boxed\{(.*?)\}', action, re.DOTALL)
|
|||
|
|
if match:
|
|||
|
|
return match.group(1).strip()
|
|||
|
|
return ""
|
|||
|
|
|
|||
|
|
def reset(self, num_players: int, seed: Optional[int] = None):
|
|||
|
|
"""
|
|||
|
|
Resets the environment to an initial state.
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
num_players: Number of players (must be 2).
|
|||
|
|
seed: Optional seed for deterministic starting player order.
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
None
|
|||
|
|
"""
|
|||
|
|
if num_players != 2:
|
|||
|
|
raise ValueError("Stellar Triad requires exactly 2 players.")
|
|||
|
|
|
|||
|
|
# Initialize state
|
|||
|
|
self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_turns)
|
|||
|
|
|
|||
|
|
# Determine starting player based on seed parity (even seed => ArchitectA starts)
|
|||
|
|
if seed is None:
|
|||
|
|
seed = 0
|
|||
|
|
starting_player = 0 if seed % 2 == 0 else 1
|
|||
|
|
|
|||
|
|
matrix_state = [[None for _ in range(3)] for _ in range(3)]
|
|||
|
|
player_symbols = {"ArchitectA": "A", "ArchitectB": "B"}
|
|||
|
|
active_player = "ArchitectA" if starting_player == 0 else "ArchitectB"
|
|||
|
|
|
|||
|
|
game_state = {
|
|||
|
|
"matrix_state": matrix_state,
|
|||
|
|
"player_symbols": player_symbols,
|
|||
|
|
"turn_count": 0,
|
|||
|
|
"active_player": active_player,
|
|||
|
|
"last_action": None,
|
|||
|
|
"move_history": [],
|
|||
|
|
"game_result": None,
|
|||
|
|
"winner": None,
|
|||
|
|
"draw": False,
|
|||
|
|
"seed": seed,
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
role_mapping = {0: "ArchitectA", 1: "ArchitectB"}
|
|||
|
|
|
|||
|
|
# Initialize the underlying textarena state
|
|||
|
|
self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt, role_mapping=role_mapping)
|
|||
|
|
|
|||
|
|
# Manually set starting player based on seed
|
|||
|
|
self.state.manually_set_current_player_id(starting_player)
|
|||
|
|
|
|||
|
|
# Add initial observations
|
|||
|
|
self.state.add_observation("Welcome to Stellar Triad!", ta.ObservationType.GAME_MESSAGE)
|
|||
|
|
self.state.add_observation("A 3x3 orbital matrix awaits your channeling commands.", ta.ObservationType.GAME_MESSAGE)
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
def step(self, action: str) -> Tuple[bool, ta.Info]:
|
|||
|
|
"""
|
|||
|
|
Perform one environment step for the active player.
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
action: The response text from the agent containing a \boxed{{[Channel:X-Y]}} token.
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
(done, info)
|
|||
|
|
"""
|
|||
|
|
player_id = self.state.current_player_id
|
|||
|
|
player_role = "ArchitectA" if player_id == 0 else "ArchitectB"
|
|||
|
|
|
|||
|
|
# Log raw action as a player action observation
|
|||
|
|
self.state.add_observation(message=action, observation_type=ta.ObservationType.PLAYER_ACTION,
|
|||
|
|
from_id=player_id, to_id=-1)
|
|||
|
|
|
|||
|
|
extracted = self._extract_answer_content(action)
|
|||
|
|
if not extracted:
|
|||
|
|
self.state.set_invalid_move("Action missing or not boxed")
|
|||
|
|
return self.state.step()
|
|||
|
|
|
|||
|
|
# Validate action format
|
|||
|
|
if not self.action_pattern.match(extracted):
|
|||
|
|
self.state.set_invalid_move("Malformed token: does not match [Channel:X-Y] pattern")
|
|||
|
|
return self.state.step()
|
|||
|
|
|
|||
|
|
# Parse coordinates
|
|||
|
|
try:
|
|||
|
|
coords = extracted.strip("[]").split(":")[1].split("-")
|
|||
|
|
x = int(coords[0]) - 1 # columns 1–3; convert to 0–2
|
|||
|
|
y = int(coords[1]) - 1 # rows 1–3; convert to 0–2
|
|||
|
|
except Exception:
|
|||
|
|
self.state.set_invalid_move("Malformed token: cannot parse coordinates")
|
|||
|
|
return self.state.step()
|
|||
|
|
|
|||
|
|
# Validate range
|
|||
|
|
if not (0 <= x <= 2 and 0 <= y <= 2):
|
|||
|
|
self.state.set_invalid_move("Coordinates out of range")
|
|||
|
|
return self.state.step()
|
|||
|
|
|
|||
|
|
# Validate occupancy
|
|||
|
|
matrix_state = self.state.game_state["matrix_state"]
|
|||
|
|
if matrix_state[y][x] is not None:
|
|||
|
|
self.state.set_invalid_move("Target cell occupied")
|
|||
|
|
return self.state.step()
|
|||
|
|
|
|||
|
|
# Apply move
|
|||
|
|
symbol = self.state.game_state["player_symbols"][player_role]
|
|||
|
|
matrix_state[y][x] = symbol
|
|||
|
|
|
|||
|
|
# Update state-based info
|
|||
|
|
self.state.game_state["matrix_state"] = matrix_state
|
|||
|
|
self.state.game_state["last_action"] = extracted
|
|||
|
|
self.state.game_state["turn_count"] += 1
|
|||
|
|
self.state.game_state["move_history"].append(f"{player_role}:{extracted}")
|
|||
|
|
|
|||
|
|
# Check for win or draw
|
|||
|
|
if self._check_alignment(matrix_state, symbol):
|
|||
|
|
winner_id = 0 if player_role == "ArchitectA" else 1
|
|||
|
|
self.state.game_state["game_result"] = f"{player_role}_won"
|
|||
|
|
self.state.game_state["winner"] = player_role
|
|||
|
|
self.state.state_done = True if hasattr(self.state, "state_done") else None
|
|||
|
|
self.state.set_winner(winner_id, reason=f"{player_role} achieved Stellar Alignment.")
|
|||
|
|
return self.state.step()
|
|||
|
|
|
|||
|
|
if self.state.game_state["turn_count"] >= 9:
|
|||
|
|
self.state.game_state["draw"] = True
|
|||
|
|
self.state.game_state["game_result"] = "Stellar_Collapse"
|
|||
|
|
self.state.set_draw(reason="Orbital grid full without alignment (Stellar Collapse).")
|
|||
|
|
return self.state.step()
|
|||
|
|
|
|||
|
|
# Switch active player
|
|||
|
|
self.state.game_state["active_player"] = "ArchitectA" if player_role == "ArchitectB" else "ArchitectB"
|
|||
|
|
|
|||
|
|
return self.state.step()
|
|||
|
|
|
|||
|
|
def _check_alignment(self, board: List[List[Optional[str]]], symbol: str) -> bool:
|
|||
|
|
"""
|
|||
|
|
Check if the given symbol has achieved a Stellar Alignment (three-in-a-line).
|
|||
|
|
"""
|
|||
|
|
# Rows and columns
|
|||
|
|
for i in range(3):
|
|||
|
|
if all(cell == symbol for cell in board[i]):
|
|||
|
|
return True
|
|||
|
|
if all(board[row][i] == symbol for row in range(3)):
|
|||
|
|
return True
|
|||
|
|
# Diagonals
|
|||
|
|
if all(board[i][i] == symbol for i in range(3)):
|
|||
|
|
return True
|
|||
|
|
if all(board[i][2 - i] == symbol for i in range(3)):
|
|||
|
|
return True
|
|||
|
|
return False
|
|||
|
|
|
|||
|
|
def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str:
|
|||
|
|
"""
|
|||
|
|
Generates the textual game prompt for each Architect player.
|
|||
|
|
"""
|
|||
|
|
role = "ArchitectA" if player_id == 0 else "ArchitectB"
|
|||
|
|
symbol = game_state["player_symbols"][role]
|
|||
|
|
active_marker = " (You start.)" if game_state["active_player"] == role else ""
|
|||
|
|
matrix_str = self._format_matrix(game_state["matrix_state"])
|
|||
|
|
|
|||
|
|
prompt = (
|
|||
|
|
f"You are {role}, a cosmic architect channeling energy Nodes around a dying star.\n"
|
|||
|
|
f"Your symbol: '{symbol}'.{active_marker}\n\n"
|
|||
|
|
"Below is the current 3×3 orbital matrix. Empty slots are shown as '.' :\n"
|
|||
|
|
f"{matrix_str}\n\n"
|
|||
|
|
"Your goal: Achieve a *Stellar Alignment* — three of your Nodes in any straight line (horizontal, vertical, or diagonal).\n"
|
|||
|
|
"If the matrix fills without alignment, the star collapses and both architects fail.\n\n"
|
|||
|
|
"Each turn, choose one unoccupied cell to channel your energy Node into.\n"
|
|||
|
|
"Use the exact format: [Channel:X-Y] (columns and rows from 1 to 3).\n"
|
|||
|
|
"Example: [Channel:2-3] → channel into column 2, row 3.\n"
|
|||
|
|
"Invalid formats include [Deploy:2-3] or [Channel:4-1].\n\n"
|
|||
|
|
"Place your chosen command within \\boxed{{}} at the end of your response.\n\n"
|
|||
|
|
"Example valid response:\n"
|
|||
|
|
"I will project energy into the lower middle conduit.\n"
|
|||
|
|
"\\boxed{{[Channel:2-3]}}\n\n"
|
|||
|
|
"Example invalid response:\n"
|
|||
|
|
"I channel energy south-east.\n"
|
|||
|
|
"\\boxed{{Channel:SE}}\n"
|
|||
|
|
)
|
|||
|
|
return prompt
|
|||
|
|
|
|||
|
|
def _format_matrix(self, matrix: List[List[Optional[str]]]) -> str:
|
|||
|
|
"""Return textual representation of the matrix."""
|
|||
|
|
lines = []
|
|||
|
|
for row in matrix:
|
|||
|
|
line = " ".join(cell if cell is not None else "." for cell in row)
|
|||
|
|
lines.append(line)
|
|||
|
|
return "\n".join(lines)
|
|||
|
|
```
|