Files
alrightalrightalright-v0/env.py
2001-01-01 00:00:00 +00:00

236 lines
8.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
```python
import re
import random
from typing import Any, Dict, Optional, Tuple, List
import textarena as ta
class GlyphGridDuelEnv(ta.Env):
"""
GlyphGrid Duel Environment
Implements the deterministic, turn-based game designed in Stage 1.
"""
def __init__(self, max_turns: int = 9):
self.max_turns = max_turns
self.action_pattern = re.compile(r"^\[Etch:\s*([1-3]),\s*([1-3])\]$")
self.player_roles = {0: "Solar", 1: "Lunar"}
self.player_symbols = {"Solar": "S", "Lunar": "L"}
# -------------------------------------------------------------------------
# Helper Methods
# -------------------------------------------------------------------------
def _extract_answer_content(self, action: str) -> str:
"""Extract content between \boxed{{...}} from a player's response."""
# Double braces escaped pattern
match = re.search(r'\\boxed\{\{(.*?)\}\}', action, re.DOTALL)
if match:
return match.group(1).strip()
# fallback single brace just in case
match = re.search(r'\\boxed\{(.*?)\}', action, re.DOTALL)
if match:
return match.group(1).strip()
return action.strip()
def _empty_runeboard(self) -> List[List[str]]:
"""Create an empty 3x3 runeboard."""
return [["_"] * 3 for _ in range(3)]
def _render_runeboard(self, runeboard: List[List[str]]) -> str:
"""Return a string representation of the current runeboard."""
board_lines = []
for row in runeboard:
board_lines.append(" ".join(row))
return "\n".join(board_lines)
def _check_winner(self, runeboard: List[List[str]], symbol: str) -> bool:
"""Return True if the provided symbol has aligned three glyphs."""
# rows
for r in range(3):
if all(runeboard[r][c] == symbol for c in range(3)):
return True
# cols
for c in range(3):
if all(runeboard[r][c] == symbol for r in range(3)):
return True
# diagonals
if all(runeboard[i][i] == symbol for i in range(3)):
return True
if all(runeboard[i][2 - i] == symbol for i in range(3)):
return True
return False
# -------------------------------------------------------------------------
# Game Lifecycle
# -------------------------------------------------------------------------
def reset(self, num_players: int, seed: Optional[int] = None):
"""
Resets the environment to an initial state.
Args:
num_players: Number of players in the game (must be 2).
seed: Optional seed for deterministic behavior.
"""
if num_players != 2:
raise ValueError("GlyphGrid Duel requires exactly 2 players.")
self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_turns)
rng_seed = seed if seed is not None else random.randint(0, 10000)
game_state: Dict[str, Any] = {
"runeboard": self._empty_runeboard(),
"current_player": "Solar",
"turn_count": 0,
"winner": None,
"is_terminal": False,
"last_action": None,
"observations": {"Solar": [], "Lunar": []},
"player_symbols": self.player_symbols,
"seed": rng_seed,
}
self.state.reset(
game_state=game_state,
player_prompt_function=self._generate_player_prompt,
role_mapping=self.player_roles,
)
# initial observation to all players
init_message = (
"The Runeboard is empty. Each Scribe may etch a glyph using [Etch: row, col]."
)
self.state.add_observation(
init_message, ta.ObservationType.GAME_MESSAGE, from_id=-1, to_id=-1
)
board_str = self._render_runeboard(game_state["runeboard"])
self.state.add_observation(
board_str, ta.ObservationType.GAME_BOARD, from_id=-1, to_id=-1
)
return self.state
def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str:
"""Generate turn prompt for each player."""
player_role = self.player_roles[player_id]
player_symbol = game_state["player_symbols"][player_role]
runeboard_str = self._render_runeboard(game_state["runeboard"])
empties = [
f"[Etch: {r+1}, {c+1}]"
for r in range(3)
for c in range(3)
if game_state["runeboard"][r][c] == "_"
]
empties_str = ", ".join(empties)
prompt = (
f"You are a Scribe competing to master the Runeboard through glyph alignment.\n"
f"Role: Scribe {player_role} ({player_symbol})\n\n"
f"Rules Summary:\n"
f"- Each player alternately etches one glyph per turn.\n"
f"- Wins occur when three identical glyphs align (row, column, or diagonal).\n"
f"- If all nine cells are filled without alignment, its a draw.\n\n"
f"Current Runeboard:\n{runeboard_str}\n\n"
f"Empty Cells where you can etch:\n{empties_str}\n\n"
f"Action Format:\n"
f"Use [Etch: row, column] with row and column in 13.\n"
f"Put your final answer within \\boxed{{}} at the end of your response.\n\n"
f"Example valid response:\n"
f"I will etch at the top right corner.\n"
f"\\boxed{{{{[Etch: 1, 3]}}}}\n\n"
f"Example invalid response:\n"
f"\\boxed{{{{[Mark: 1, 3]}}}} # Reason: 'Mark' is not a valid action.\n"
)
return prompt
def step(self, action: str) -> Tuple[bool, ta.Info]:
"""
Perform a single environment step for the current player.
Args:
action: The action text submitted by the current player.
Returns:
(done, info)
"""
current_id = self.state.current_player_id
current_role = self.player_roles[current_id]
opponent_role = self.player_roles[1 - current_id]
board = self.state.game_state["runeboard"]
# Record player's raw action
self.state.add_observation(
action,
ta.ObservationType.PLAYER_ACTION,
from_id=current_id,
to_id=-1,
)
# Extract content inside boxed
action_content = self._extract_answer_content(action)
# Validate
if self.state.game_state["is_terminal"]:
self.state.set_invalid_move("Game already ended.")
return self.state.step()
match = self.action_pattern.match(action_content)
if not match:
self.state.set_invalid_move(
"Invalid format: must be [Etch: row, column] with row,col in 13."
)
return self.state.step()
try:
row, col = int(match.group(1)) - 1, int(match.group(2)) - 1
except (ValueError, IndexError):
self.state.set_invalid_move(
"Out of bounds: coordinates must be between 1 and 3."
)
return self.state.step()
if row not in range(3) or col not in range(3):
self.state.set_invalid_move(
"Out of bounds: coordinates must be between 1 and 3."
)
return self.state.step()
if board[row][col] != "_":
self.state.set_invalid_move("Cell already occupied.")
return self.state.step()
# Apply action
symbol = self.player_symbols[current_role]
board[row][col] = symbol
self.state.game_state["last_action"] = action_content
self.state.game_state["turn_count"] += 1
# Announce move
move_msg = f"{current_role} etched a {symbol} glyph at ({row+1},{col+1})."
self.state.add_observation(move_msg, ta.ObservationType.GAME_MESSAGE)
# Show updated board
board_render = self._render_runeboard(board)
self.state.add_observation(
board_render, ta.ObservationType.GAME_BOARD, from_id=-1, to_id=-1
)
# Check win condition
if self._check_winner(board, symbol):
self.state.game_state["winner"] = current_role
self.state.game_state["is_terminal"] = True
self.state.set_winner(
player_id=current_id, reason=f"{current_role} formed a line of glyphs."
)
return self.state.step()
# Check draw condition
if self.state.game_state["turn_count"] >= 9:
self.state.game_state["is_terminal"] = True
self.state.set_draw("Runeboard is full with no alignment. Draw.")
return self.state.step()
# Switch player
self.state.game_state["current_player"] = opponent_role
return self.state.step()
```