Files
grid-game/env.py
2001-01-01 00:00:00 +00:00

240 lines
8.8 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
```python
import re
import random
from typing import Any, Dict, Optional, Tuple
import textarena as ta
class RunicGridEnv(ta.Env):
"""
TextArena Environment for the Runic Grid game.
Two rival mystics, the Solar Scribe (☼) and the Lunar Scribe (☽), compete
to inscribe runes on a 3×3 grid until one achieves an aligned triad.
"""
def __init__(self, max_turns: int = 9):
self.max_turns = max_turns
self.grammar_pattern = re.compile(r'^\[Inscribe:[0-2],[0-2]\]$')
# -------------------------------------------------------------------------
# Helper Functions
# -------------------------------------------------------------------------
def _extract_answer_content(self, action: str) -> str:
"""
Extract the content from inside a \\boxed{} structure in a player's response.
Args:
action: The full text of the player's action, possibly containing \\boxed{}.
Returns:
Extracted inner content string.
"""
match = re.search(r'\\boxed\{([^}]*)\}', action, re.DOTALL)
if match:
return match.group(1).strip()
return action.strip()
def _check_winner(self, board: list[list[Optional[str]]], symbol: str) -> bool:
"""Return True if the given symbol has a 3-in-a-line."""
# Rows and columns
for i in range(3):
if all(board[i][c] == symbol for c in range(3)):
return True
if all(board[r][i] == symbol for r in range(3)):
return True
# Diagonals
if all(board[i][i] == symbol for i in range(3)):
return True
if all(board[i][2 - i] == symbol for i in range(3)):
return True
return False
def _is_full(self, board: list[list[Optional[str]]]) -> bool:
return all(cell is not None for row in board for cell in row)
# -------------------------------------------------------------------------
# Prompt Generator
# -------------------------------------------------------------------------
def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str:
"""
Generate the player prompt message describing current game status.
"""
player_name = "Solar Scribe" if player_id == 0 else "Lunar Scribe"
symbol = game_state["players"][player_name]["symbol"]
board = game_state["board"]
board_str = "\n".join(
" ".join(cell if cell is not None else "." for cell in row) for row in board
)
prompt = (
f"You are the **{player_name}**, a mystic engraving runes upon the sacred Runic Tablet.\n"
f"Your rune symbol: {symbol}\n\n"
"Goal:\n"
"- Form a continuous line of three of your runes horizontally, vertically, or diagonally before your opponent.\n\n"
f"Current Runic Tablet:\n{board_str}\n\n"
"Action Format:\n"
"- To inscribe, use syntax: \u005cboxed{{[Inscribe:x,y]}}\n"
"- Coordinates (x,y) range from 0 to 2.\n\n"
"Example Coordinate Map:\n"
"(0,0) (0,1) (0,2)\n"
"(1,0) (1,1) (1,2)\n"
"(2,0) (2,1) (2,2)\n\n"
"Example valid response:\n"
"I shall inscribe my rune on the center tile for strength.\n"
"\\boxed{[Inscribe:1,1]}\n\n"
"Example invalid response:\n"
"I think I will go middle-right.\n"
"\\boxed{[Move:1,2]}\n\n"
"Put your final answer within \\boxed{} at the end of your response."
)
return prompt
# -------------------------------------------------------------------------
# Reset
# -------------------------------------------------------------------------
def reset(self, num_players: int, seed: Optional[int] = None):
"""
Reset the environment to starting conditions for Runic Grid.
Args:
num_players: Must be 2 for this game.
seed: Optional deterministic seed for reproducibility.
"""
if num_players != 2:
raise ValueError("Runic Grid only supports 2 players.")
self.state = ta.TwoPlayerState(
num_players=num_players, seed=seed, max_turns=self.max_turns
)
board = [[None for _ in range(3)] for _ in range(3)]
players = {
"Solar Scribe": {"symbol": "", "actions": []},
"Lunar Scribe": {"symbol": "", "actions": []},
}
# Determine who starts (Solar Scribe always first, deterministic)
current_player = "Solar Scribe"
game_state = {
"turn_count": 0,
"current_player": current_player,
"board": board,
"players": players,
"winner": None,
"outcome": "ongoing",
"observations": [],
}
self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt)
# Add initial game messages
self.state.add_observation(
"Welcome to Runic Grid: The duel of Solar and Lunar Scribes.",
ta.ObservationType.GAME_MESSAGE,
)
self.state.add_observation(
f"The {current_player} begins and will place the first rune.",
ta.ObservationType.GAME_MESSAGE,
)
# -------------------------------------------------------------------------
# Step
# -------------------------------------------------------------------------
def step(self, action: str) -> Tuple[bool, ta.Info]:
"""
Execute a single player step in the Runic Grid game.
Args:
action: The player's proposed move text, including reasoning and \\boxed{} token.
Returns:
(done, info): True if terminal, info dictionary from framework.
"""
player_id = self.state.current_player_id
player_name = "Solar Scribe" if player_id == 0 else "Lunar Scribe"
opponent_id = 1 - player_id
opponent_name = "Lunar Scribe" if player_id == 0 else "Solar Scribe"
symbol = self.state.game_state["players"][player_name]["symbol"]
# Log the raw action
self.state.add_observation(
action,
ta.ObservationType.PLAYER_ACTION,
from_id=player_id,
to_id=-1,
)
# Parse and validate
content = self._extract_answer_content(action)
if not content or not isinstance(content, str):
self.state.set_invalid_move(reason="Malformed boxed syntax")
return self.state.step()
match = self.grammar_pattern.match(content)
if not match:
self.state.set_invalid_move(reason="Action does not match grammar [Inscribe:x,y]")
return self.state.step()
try:
x_y = content.strip("[]").split(":")[1]
x, y = map(int, x_y.split(","))
except Exception:
self.state.set_invalid_move(reason="Malformed coordinates")
return self.state.step()
board = self.state.game_state["board"]
if x not in range(3) or y not in range(3):
self.state.set_invalid_move(reason="Coordinates out of bounds")
return self.state.step()
if board[x][y] is not None:
self.state.set_invalid_move(reason="Tile already inscribed")
return self.state.step()
# Apply move
board[x][y] = symbol
self.state.game_state["turn_count"] += 1
self.state.game_state["players"][player_name]["actions"].append(content)
self.state.game_state["observations"].append(
{"player": player_name, "action": content}
)
self.state.game_state["board"] = board
self.state.add_observation(
f"{player_name} inscribed {symbol} at ({x},{y}).",
ta.ObservationType.GAME_MESSAGE,
)
# Check terminal conditions
if self._check_winner(board, symbol):
self.state.game_state["winner"] = player_name
self.state.game_state["outcome"] = "win"
self.state.set_winner(player_id=player_id, reason=f"{player_name} achieved a triad alignment.")
return self.state.step()
if self._is_full(board):
self.state.game_state["outcome"] = "draw"
self.state.set_draw(reason="Runic Tablet fully inscribed with no alignment.")
return self.state.step()
# Otherwise continue
self.state.game_state["current_player"] = opponent_name
done, info = self.state.step()
return done, info
# -------------------------------------------------------------------------
# Close
# -------------------------------------------------------------------------
def close(self) -> Tuple[Dict, Dict]:
return self.state.rewards, self.state.game_info
```