Files
tic/env.py

217 lines
9.0 KiB
Python
Raw Permalink Normal View History

2001-01-01 00:00:00 +00:00
```python
import re
from typing import Any, Dict, List, Optional, Tuple
import textarena as ta
class StellarTriadEnv(ta.Env):
"""
Stellar Triad Turn-based two-player abstract alignment game-themed around cosmic architecture.
Implementation strictly follows the Stage 1 design document.
"""
def __init__(self):
"""Initialize regex pattern and constants."""
super().__init__()
self.action_pattern = re.compile(r'^\[Channel:(?:[1-3])-(?:[1-3])\]$')
self.max_turns = 9 # Maximum number of turns (one per matrix cell)
self.state: Optional[ta.TwoPlayerState] = None
def _extract_answer_content(self, action: str) -> str:
"""
Extract the content within \boxed{{ ... }}. Return empty string if not found.
"""
match = re.search(r'\\boxed\{\{(.*?)\}\}', action, re.DOTALL)
if match:
return match.group(1).strip()
# Also support single brace variant if it may occur
match = re.search(r'\\boxed\{(.*?)\}', action, re.DOTALL)
if match:
return match.group(1).strip()
return ""
def reset(self, num_players: int, seed: Optional[int] = None):
"""
Resets the environment to an initial state.
Args:
num_players: Number of players (must be 2).
seed: Optional seed for deterministic starting player order.
Returns:
None
"""
if num_players != 2:
raise ValueError("Stellar Triad requires exactly 2 players.")
# Initialize state
self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_turns)
# Determine starting player based on seed parity (even seed => ArchitectA starts)
if seed is None:
seed = 0
starting_player = 0 if seed % 2 == 0 else 1
matrix_state = [[None for _ in range(3)] for _ in range(3)]
player_symbols = {"ArchitectA": "A", "ArchitectB": "B"}
active_player = "ArchitectA" if starting_player == 0 else "ArchitectB"
game_state = {
"matrix_state": matrix_state,
"player_symbols": player_symbols,
"turn_count": 0,
"active_player": active_player,
"last_action": None,
"move_history": [],
"game_result": None,
"winner": None,
"draw": False,
"seed": seed,
}
role_mapping = {0: "ArchitectA", 1: "ArchitectB"}
# Initialize the underlying textarena state
self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt, role_mapping=role_mapping)
# Manually set starting player based on seed
self.state.manually_set_current_player_id(starting_player)
# Add initial observations
self.state.add_observation("Welcome to Stellar Triad!", ta.ObservationType.GAME_MESSAGE)
self.state.add_observation("A 3x3 orbital matrix awaits your channeling commands.", ta.ObservationType.GAME_MESSAGE)
return None
def step(self, action: str) -> Tuple[bool, ta.Info]:
"""
Perform one environment step for the active player.
Args:
action: The response text from the agent containing a \boxed{{[Channel:X-Y]}} token.
Returns:
(done, info)
"""
player_id = self.state.current_player_id
player_role = "ArchitectA" if player_id == 0 else "ArchitectB"
# Log raw action as a player action observation
self.state.add_observation(message=action, observation_type=ta.ObservationType.PLAYER_ACTION,
from_id=player_id, to_id=-1)
extracted = self._extract_answer_content(action)
if not extracted:
self.state.set_invalid_move("Action missing or not boxed")
return self.state.step()
# Validate action format
if not self.action_pattern.match(extracted):
self.state.set_invalid_move("Malformed token: does not match [Channel:X-Y] pattern")
return self.state.step()
# Parse coordinates
try:
coords = extracted.strip("[]").split(":")[1].split("-")
x = int(coords[0]) - 1 # columns 13; convert to 02
y = int(coords[1]) - 1 # rows 13; convert to 02
except Exception:
self.state.set_invalid_move("Malformed token: cannot parse coordinates")
return self.state.step()
# Validate range
if not (0 <= x <= 2 and 0 <= y <= 2):
self.state.set_invalid_move("Coordinates out of range")
return self.state.step()
# Validate occupancy
matrix_state = self.state.game_state["matrix_state"]
if matrix_state[y][x] is not None:
self.state.set_invalid_move("Target cell occupied")
return self.state.step()
# Apply move
symbol = self.state.game_state["player_symbols"][player_role]
matrix_state[y][x] = symbol
# Update state-based info
self.state.game_state["matrix_state"] = matrix_state
self.state.game_state["last_action"] = extracted
self.state.game_state["turn_count"] += 1
self.state.game_state["move_history"].append(f"{player_role}:{extracted}")
# Check for win or draw
if self._check_alignment(matrix_state, symbol):
winner_id = 0 if player_role == "ArchitectA" else 1
self.state.game_state["game_result"] = f"{player_role}_won"
self.state.game_state["winner"] = player_role
self.state.state_done = True if hasattr(self.state, "state_done") else None
self.state.set_winner(winner_id, reason=f"{player_role} achieved Stellar Alignment.")
return self.state.step()
if self.state.game_state["turn_count"] >= 9:
self.state.game_state["draw"] = True
self.state.game_state["game_result"] = "Stellar_Collapse"
self.state.set_draw(reason="Orbital grid full without alignment (Stellar Collapse).")
return self.state.step()
# Switch active player
self.state.game_state["active_player"] = "ArchitectA" if player_role == "ArchitectB" else "ArchitectB"
return self.state.step()
def _check_alignment(self, board: List[List[Optional[str]]], symbol: str) -> bool:
"""
Check if the given symbol has achieved a Stellar Alignment (three-in-a-line).
"""
# Rows and columns
for i in range(3):
if all(cell == symbol for cell in board[i]):
return True
if all(board[row][i] == symbol for row in range(3)):
return True
# Diagonals
if all(board[i][i] == symbol for i in range(3)):
return True
if all(board[i][2 - i] == symbol for i in range(3)):
return True
return False
def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str:
"""
Generates the textual game prompt for each Architect player.
"""
role = "ArchitectA" if player_id == 0 else "ArchitectB"
symbol = game_state["player_symbols"][role]
active_marker = " (You start.)" if game_state["active_player"] == role else ""
matrix_str = self._format_matrix(game_state["matrix_state"])
prompt = (
f"You are {role}, a cosmic architect channeling energy Nodes around a dying star.\n"
f"Your symbol: '{symbol}'.{active_marker}\n\n"
"Below is the current 3×3 orbital matrix. Empty slots are shown as '.' :\n"
f"{matrix_str}\n\n"
"Your goal: Achieve a *Stellar Alignment* — three of your Nodes in any straight line (horizontal, vertical, or diagonal).\n"
"If the matrix fills without alignment, the star collapses and both architects fail.\n\n"
"Each turn, choose one unoccupied cell to channel your energy Node into.\n"
"Use the exact format: [Channel:X-Y] (columns and rows from 1 to 3).\n"
"Example: [Channel:2-3] → channel into column 2, row 3.\n"
"Invalid formats include [Deploy:2-3] or [Channel:4-1].\n\n"
"Place your chosen command within \\boxed{{}} at the end of your response.\n\n"
"Example valid response:\n"
"I will project energy into the lower middle conduit.\n"
"\\boxed{{[Channel:2-3]}}\n\n"
"Example invalid response:\n"
"I channel energy south-east.\n"
"\\boxed{{Channel:SE}}\n"
)
return prompt
def _format_matrix(self, matrix: List[List[Optional[str]]]) -> str:
"""Return textual representation of the matrix."""
lines = []
for row in matrix:
line = " ".join(cell if cell is not None else "." for cell in row)
lines.append(line)
return "\n".join(lines)
```