Files
alrightalrightalright-v0/env.py

236 lines
8.8 KiB
Python
Raw Normal View History

2001-01-01 00:00:00 +00:00
```python
import re
import random
from typing import Any, Dict, Optional, Tuple, List
import textarena as ta
class GlyphGridDuelEnv(ta.Env):
"""
GlyphGrid Duel Environment
Implements the deterministic, turn-based game designed in Stage 1.
"""
def __init__(self, max_turns: int = 9):
self.max_turns = max_turns
self.action_pattern = re.compile(r"^\[Etch:\s*([1-3]),\s*([1-3])\]$")
self.player_roles = {0: "Solar", 1: "Lunar"}
self.player_symbols = {"Solar": "S", "Lunar": "L"}
# -------------------------------------------------------------------------
# Helper Methods
# -------------------------------------------------------------------------
def _extract_answer_content(self, action: str) -> str:
"""Extract content between \boxed{{...}} from a player's response."""
# Double braces escaped pattern
match = re.search(r'\\boxed\{\{(.*?)\}\}', action, re.DOTALL)
if match:
return match.group(1).strip()
# fallback single brace just in case
match = re.search(r'\\boxed\{(.*?)\}', action, re.DOTALL)
if match:
return match.group(1).strip()
return action.strip()
def _empty_runeboard(self) -> List[List[str]]:
"""Create an empty 3x3 runeboard."""
return [["_"] * 3 for _ in range(3)]
def _render_runeboard(self, runeboard: List[List[str]]) -> str:
"""Return a string representation of the current runeboard."""
board_lines = []
for row in runeboard:
board_lines.append(" ".join(row))
return "\n".join(board_lines)
def _check_winner(self, runeboard: List[List[str]], symbol: str) -> bool:
"""Return True if the provided symbol has aligned three glyphs."""
# rows
for r in range(3):
if all(runeboard[r][c] == symbol for c in range(3)):
return True
# cols
for c in range(3):
if all(runeboard[r][c] == symbol for r in range(3)):
return True
# diagonals
if all(runeboard[i][i] == symbol for i in range(3)):
return True
if all(runeboard[i][2 - i] == symbol for i in range(3)):
return True
return False
# -------------------------------------------------------------------------
# Game Lifecycle
# -------------------------------------------------------------------------
def reset(self, num_players: int, seed: Optional[int] = None):
"""
Resets the environment to an initial state.
Args:
num_players: Number of players in the game (must be 2).
seed: Optional seed for deterministic behavior.
"""
if num_players != 2:
raise ValueError("GlyphGrid Duel requires exactly 2 players.")
self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_turns)
rng_seed = seed if seed is not None else random.randint(0, 10000)
game_state: Dict[str, Any] = {
"runeboard": self._empty_runeboard(),
"current_player": "Solar",
"turn_count": 0,
"winner": None,
"is_terminal": False,
"last_action": None,
"observations": {"Solar": [], "Lunar": []},
"player_symbols": self.player_symbols,
"seed": rng_seed,
}
self.state.reset(
game_state=game_state,
player_prompt_function=self._generate_player_prompt,
role_mapping=self.player_roles,
)
# initial observation to all players
init_message = (
"The Runeboard is empty. Each Scribe may etch a glyph using [Etch: row, col]."
)
self.state.add_observation(
init_message, ta.ObservationType.GAME_MESSAGE, from_id=-1, to_id=-1
)
board_str = self._render_runeboard(game_state["runeboard"])
self.state.add_observation(
board_str, ta.ObservationType.GAME_BOARD, from_id=-1, to_id=-1
)
return self.state
def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str:
"""Generate turn prompt for each player."""
player_role = self.player_roles[player_id]
player_symbol = game_state["player_symbols"][player_role]
runeboard_str = self._render_runeboard(game_state["runeboard"])
empties = [
f"[Etch: {r+1}, {c+1}]"
for r in range(3)
for c in range(3)
if game_state["runeboard"][r][c] == "_"
]
empties_str = ", ".join(empties)
prompt = (
f"You are a Scribe competing to master the Runeboard through glyph alignment.\n"
f"Role: Scribe {player_role} ({player_symbol})\n\n"
f"Rules Summary:\n"
f"- Each player alternately etches one glyph per turn.\n"
f"- Wins occur when three identical glyphs align (row, column, or diagonal).\n"
f"- If all nine cells are filled without alignment, its a draw.\n\n"
f"Current Runeboard:\n{runeboard_str}\n\n"
f"Empty Cells where you can etch:\n{empties_str}\n\n"
f"Action Format:\n"
f"Use [Etch: row, column] with row and column in 13.\n"
f"Put your final answer within \\boxed{{}} at the end of your response.\n\n"
f"Example valid response:\n"
f"I will etch at the top right corner.\n"
f"\\boxed{{{{[Etch: 1, 3]}}}}\n\n"
f"Example invalid response:\n"
f"\\boxed{{{{[Mark: 1, 3]}}}} # Reason: 'Mark' is not a valid action.\n"
)
return prompt
def step(self, action: str) -> Tuple[bool, ta.Info]:
"""
Perform a single environment step for the current player.
Args:
action: The action text submitted by the current player.
Returns:
(done, info)
"""
current_id = self.state.current_player_id
current_role = self.player_roles[current_id]
opponent_role = self.player_roles[1 - current_id]
board = self.state.game_state["runeboard"]
# Record player's raw action
self.state.add_observation(
action,
ta.ObservationType.PLAYER_ACTION,
from_id=current_id,
to_id=-1,
)
# Extract content inside boxed
action_content = self._extract_answer_content(action)
# Validate
if self.state.game_state["is_terminal"]:
self.state.set_invalid_move("Game already ended.")
return self.state.step()
match = self.action_pattern.match(action_content)
if not match:
self.state.set_invalid_move(
"Invalid format: must be [Etch: row, column] with row,col in 13."
)
return self.state.step()
try:
row, col = int(match.group(1)) - 1, int(match.group(2)) - 1
except (ValueError, IndexError):
self.state.set_invalid_move(
"Out of bounds: coordinates must be between 1 and 3."
)
return self.state.step()
if row not in range(3) or col not in range(3):
self.state.set_invalid_move(
"Out of bounds: coordinates must be between 1 and 3."
)
return self.state.step()
if board[row][col] != "_":
self.state.set_invalid_move("Cell already occupied.")
return self.state.step()
# Apply action
symbol = self.player_symbols[current_role]
board[row][col] = symbol
self.state.game_state["last_action"] = action_content
self.state.game_state["turn_count"] += 1
# Announce move
move_msg = f"{current_role} etched a {symbol} glyph at ({row+1},{col+1})."
self.state.add_observation(move_msg, ta.ObservationType.GAME_MESSAGE)
# Show updated board
board_render = self._render_runeboard(board)
self.state.add_observation(
board_render, ta.ObservationType.GAME_BOARD, from_id=-1, to_id=-1
)
# Check win condition
if self._check_winner(board, symbol):
self.state.game_state["winner"] = current_role
self.state.game_state["is_terminal"] = True
self.state.set_winner(
player_id=current_id, reason=f"{current_role} formed a line of glyphs."
)
return self.state.step()
# Check draw condition
if self.state.game_state["turn_count"] >= 9:
self.state.game_state["is_terminal"] = True
self.state.set_draw("Runeboard is full with no alignment. Draw.")
return self.state.step()
# Switch player
self.state.game_state["current_player"] = opponent_role
return self.state.step()
```