Files
asdf-1762668074593/env.py

268 lines
10 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
```python
import re
import random
from typing import Any, Dict, Optional, Tuple, List
import textarena as ta
class GlyphGridDuelEnv(ta.Env):
"""
GlyphGrid Duel: a deterministic two-player abstract logic game.
Players alternate inscribing glyphs ("X" or "O") on a 3×3 grid.
The first to align three identical glyphs along any row, column, or diagonal wins.
"""
VALID_ACTION_PATTERN = re.compile(r"^\[Inscribe:(1|2|3),(1|2|3)\]$")
def __init__(self):
"""Initialize reusable attributes."""
self.state: Optional[ta.TwoPlayerState] = None
# -------------------------------------------------------------------------
# Helper: Extract content inside <answer> tags
# -------------------------------------------------------------------------
def _extract_answer_content(self, action: str) -> str:
"""
Extract content from <answer></answer> tags.
If tags are missing, fallback to stripping the entire action string.
"""
match = re.search(r"<answer>(.*?)</answer>", action, re.DOTALL | re.IGNORECASE)
if match:
return match.group(1).strip()
return action.strip()
# -------------------------------------------------------------------------
# Reset environment
# -------------------------------------------------------------------------
def reset(self, num_players: int, seed: Optional[int] = None):
"""
Resets the environment to an initial state.
Args:
num_players: Number of players (must be 2 for GlyphGrid Duel).
seed: Optional seed for deterministic behavior.
Notes:
• Must construct a State object and pass game_state and player_prompt_function
• Must call self.state.reset(...)
• Should emit initial observations if helpful
"""
if num_players != 2:
raise ValueError("GlyphGrid Duel requires exactly 2 players.")
if seed is None:
seed = random.randint(0, 99999)
# Create a reproducible state manager
self.state = ta.TwoPlayerState(num_players=num_players, seed=seed)
# Determine which player starts (based on seed parity)
starting_player_id = 0 if seed % 2 == 0 else 1
starting_player_name = f"Player {starting_player_id + 1}"
# Construct initial 3x3 board (each cell empty string)
board = [["" for _ in range(3)] for _ in range(3)]
# Define player info
players = {
"Player 1": {"symbol": "X", "moves_made": 0},
"Player 2": {"symbol": "O", "moves_made": 0},
}
# Build game_state dictionary
game_state: Dict[str, Any] = {
"turn_count": 0,
"current_player": starting_player_name,
"seed": seed,
"board": board,
"players": players,
"winner": None,
"is_terminal": False,
"last_action": None,
"observation_log": [f"{starting_player_name} begins the glyph duel."],
}
# Initialize internal environment state
self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt)
# Initial observations
self.state.add_observation(
from_id=-1,
message=f"{starting_player_name} begins the glyph duel.",
observation_type=ta.ObservationType.GAME_MESSAGE,
)
self.state.add_observation(
from_id=-1,
message=self._render_board(board),
observation_type=ta.ObservationType.GAME_BOARD,
)
return self.state.game_state
# -------------------------------------------------------------------------
# Board and State Helpers
# -------------------------------------------------------------------------
def _render_board(self, board: List[List[str]]) -> str:
"""Format the 3×3 board for display."""
header = " 1 2 3"
rows = []
for i, row in enumerate(board):
cells = [cell if cell else "." for cell in row]
rows.append(f"{i+1} " + " | ".join(cells))
return f"{header}\n" + "\n".join(rows)
def _check_winner(self, symbol: str, board: List[List[str]]) -> bool:
"""Check if the given symbol has three in a row (row, column, diagonal)."""
# Rows and columns
for i in range(3):
if all(board[i][j] == symbol for j in range(3)):
return True
if all(board[j][i] == symbol for j in range(3)):
return True
# Diagonals
if all(board[i][i] == symbol for i in range(3)):
return True
if all(board[i][2 - i] == symbol for i in range(3)):
return True
return False
def _is_board_full(self, board: List[List[str]]) -> bool:
"""Return True if no empty cells remain."""
return all(cell != "" for row in board for cell in row)
# -------------------------------------------------------------------------
# Step Action
# -------------------------------------------------------------------------
def step(self, action: str) -> Tuple[bool, ta.Info]:
"""
Perform a single environment step for the current player.
Args:
action: The action text submitted by the current player.
Returns:
A tuple (done, info) where:
done: True if the episode has concluded
info: A ta.Info object with auxiliary details
"""
player_id = self.state.current_player_id
player_name = f"Player {player_id + 1}"
# Log player's raw action
self.state.add_observation(
from_id=player_id,
to_id=-1,
message=action,
observation_type=ta.ObservationType.PLAYER_ACTION,
)
# Extract meaningful token content
answer_content = self._extract_answer_content(action)
# Validate action format
if not self.VALID_ACTION_PATTERN.match(answer_content):
self.state.set_invalid_move(reason="Invalid action format. Must match [Inscribe:x,y].")
return self.state.step()
# Parse coordinates
x_str, y_str = re.findall(r"(1|2|3)", answer_content)
x, y = int(x_str) - 1, int(y_str) - 1
# Access current game_state
g = self.state.game_state
board = g["board"]
# Check if cell already occupied
if board[x][y] != "":
self.state.set_invalid_move(reason="Cell already occupied.")
return self.state.step()
# Check current player
if g["current_player"] != player_name:
self.state.set_invalid_move(reason="Not your turn.")
return self.state.step()
# Apply move
symbol = g["players"][player_name]["symbol"]
board[x][y] = symbol
g["players"][player_name]["moves_made"] += 1
g["turn_count"] += 1
g["last_action"] = answer_content
g["observation_log"].append(f"{player_name} placed at ({x+1},{y+1})")
# Add observation for move and board update
self.state.add_observation(
from_id=player_id,
message=f"{player_name} inscribed glyph '{symbol}' at ({x+1},{y+1})",
observation_type=ta.ObservationType.GAME_MESSAGE,
)
self.state.add_observation(
from_id=-1,
message=self._render_board(board),
observation_type=ta.ObservationType.GAME_BOARD,
)
# Check win condition
if self._check_winner(symbol, board):
g["winner"] = player_name
g["is_terminal"] = True
self.state.set_winner(player_id=player_id, reason=f"{player_name} aligned three glyphs and won the duel.")
return self.state.step()
# Check draw condition
if self._is_board_full(board):
g["winner"] = "Draw"
g["is_terminal"] = True
self.state.set_draw(reason="The grid is full. The duel ends in a draw.")
return self.state.step()
# Switch turns
next_player_id = 1 - player_id
g["current_player"] = f"Player {next_player_id + 1}"
# End step (non-terminal)
return self.state.step()
# -------------------------------------------------------------------------
# Prompt Generation
# -------------------------------------------------------------------------
def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str:
"""
Produce role-appropriate instructions for a player at episode start.
Args:
player_id: The integer ID of the player.
game_state: The shared game state.
Returns:
A string prompt describing the current state, rules, and expected format.
"""
player_name = f"Player {player_id + 1}"
symbol = game_state["players"][player_name]["symbol"]
board_str = self._render_board(game_state["board"])
current_turn_name = game_state["current_player"]
prompt = (
f"You are {player_name}, bearer of the glyph '{symbol}', in the abstract digital arena.\n"
"Your goal is to align three of your runes (glyphs) in a straight line—row, column, or diagonal—before your opponent does.\n\n"
f"Current arena state:\n{board_str}\n\n"
f"It is currently {current_turn_name}'s turn.\n"
"On your turn, inscribe your glyph in any unoccupied cell.\n\n"
"Action grammar (must be exact): [Inscribe:x,y]\n"
" - x, y ∈ {1, 2, 3}\n"
" - Example: [Inscribe:2,3] inscribes at row 2, column 3.\n\n"
"Formatting rules:\n"
" - Put private reasoning inside <think></think>.\n"
" - Put your chosen action inside <answer></answer>.\n\n"
"Example valid response:\n"
"<think>I will take the center to prepare a diagonal line.</think>\n"
"<answer>[Inscribe:2,2]</answer>\n\n"
"Example invalid response:\n"
"<think>I'll use a lowercase tag.</think>\n"
"<answer>[inscribe:2,2]</answer> <-- Invalid keyword\n"
)
return prompt
```