268 lines
10 KiB
Python
268 lines
10 KiB
Python
|
|
```python
|
|||
|
|
import re
|
|||
|
|
import random
|
|||
|
|
from typing import Any, Dict, Optional, Tuple, List
|
|||
|
|
|
|||
|
|
import textarena as ta
|
|||
|
|
|
|||
|
|
|
|||
|
|
class GlyphGridDuelEnv(ta.Env):
|
|||
|
|
"""
|
|||
|
|
GlyphGrid Duel: a deterministic two-player abstract logic game.
|
|||
|
|
|
|||
|
|
Players alternate inscribing glyphs ("X" or "O") on a 3×3 grid.
|
|||
|
|
The first to align three identical glyphs along any row, column, or diagonal wins.
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
VALID_ACTION_PATTERN = re.compile(r"^\[Inscribe:(1|2|3),(1|2|3)\]$")
|
|||
|
|
|
|||
|
|
def __init__(self):
|
|||
|
|
"""Initialize reusable attributes."""
|
|||
|
|
self.state: Optional[ta.TwoPlayerState] = None
|
|||
|
|
|
|||
|
|
# -------------------------------------------------------------------------
|
|||
|
|
# Helper: Extract content inside <answer> tags
|
|||
|
|
# -------------------------------------------------------------------------
|
|||
|
|
def _extract_answer_content(self, action: str) -> str:
|
|||
|
|
"""
|
|||
|
|
Extract content from <answer></answer> tags.
|
|||
|
|
If tags are missing, fallback to stripping the entire action string.
|
|||
|
|
"""
|
|||
|
|
match = re.search(r"<answer>(.*?)</answer>", action, re.DOTALL | re.IGNORECASE)
|
|||
|
|
if match:
|
|||
|
|
return match.group(1).strip()
|
|||
|
|
return action.strip()
|
|||
|
|
|
|||
|
|
# -------------------------------------------------------------------------
|
|||
|
|
# Reset environment
|
|||
|
|
# -------------------------------------------------------------------------
|
|||
|
|
def reset(self, num_players: int, seed: Optional[int] = None):
|
|||
|
|
"""
|
|||
|
|
Resets the environment to an initial state.
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
num_players: Number of players (must be 2 for GlyphGrid Duel).
|
|||
|
|
seed: Optional seed for deterministic behavior.
|
|||
|
|
|
|||
|
|
Notes:
|
|||
|
|
• Must construct a State object and pass game_state and player_prompt_function
|
|||
|
|
• Must call self.state.reset(...)
|
|||
|
|
• Should emit initial observations if helpful
|
|||
|
|
"""
|
|||
|
|
if num_players != 2:
|
|||
|
|
raise ValueError("GlyphGrid Duel requires exactly 2 players.")
|
|||
|
|
|
|||
|
|
if seed is None:
|
|||
|
|
seed = random.randint(0, 99999)
|
|||
|
|
|
|||
|
|
# Create a reproducible state manager
|
|||
|
|
self.state = ta.TwoPlayerState(num_players=num_players, seed=seed)
|
|||
|
|
|
|||
|
|
# Determine which player starts (based on seed parity)
|
|||
|
|
starting_player_id = 0 if seed % 2 == 0 else 1
|
|||
|
|
starting_player_name = f"Player {starting_player_id + 1}"
|
|||
|
|
|
|||
|
|
# Construct initial 3x3 board (each cell empty string)
|
|||
|
|
board = [["" for _ in range(3)] for _ in range(3)]
|
|||
|
|
|
|||
|
|
# Define player info
|
|||
|
|
players = {
|
|||
|
|
"Player 1": {"symbol": "X", "moves_made": 0},
|
|||
|
|
"Player 2": {"symbol": "O", "moves_made": 0},
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# Build game_state dictionary
|
|||
|
|
game_state: Dict[str, Any] = {
|
|||
|
|
"turn_count": 0,
|
|||
|
|
"current_player": starting_player_name,
|
|||
|
|
"seed": seed,
|
|||
|
|
"board": board,
|
|||
|
|
"players": players,
|
|||
|
|
"winner": None,
|
|||
|
|
"is_terminal": False,
|
|||
|
|
"last_action": None,
|
|||
|
|
"observation_log": [f"{starting_player_name} begins the glyph duel."],
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# Initialize internal environment state
|
|||
|
|
self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt)
|
|||
|
|
|
|||
|
|
# Initial observations
|
|||
|
|
self.state.add_observation(
|
|||
|
|
from_id=-1,
|
|||
|
|
message=f"{starting_player_name} begins the glyph duel.",
|
|||
|
|
observation_type=ta.ObservationType.GAME_MESSAGE,
|
|||
|
|
)
|
|||
|
|
self.state.add_observation(
|
|||
|
|
from_id=-1,
|
|||
|
|
message=self._render_board(board),
|
|||
|
|
observation_type=ta.ObservationType.GAME_BOARD,
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
return self.state.game_state
|
|||
|
|
|
|||
|
|
# -------------------------------------------------------------------------
|
|||
|
|
# Board and State Helpers
|
|||
|
|
# -------------------------------------------------------------------------
|
|||
|
|
def _render_board(self, board: List[List[str]]) -> str:
|
|||
|
|
"""Format the 3×3 board for display."""
|
|||
|
|
header = " 1 2 3"
|
|||
|
|
rows = []
|
|||
|
|
for i, row in enumerate(board):
|
|||
|
|
cells = [cell if cell else "." for cell in row]
|
|||
|
|
rows.append(f"{i+1} " + " | ".join(cells))
|
|||
|
|
return f"{header}\n" + "\n".join(rows)
|
|||
|
|
|
|||
|
|
def _check_winner(self, symbol: str, board: List[List[str]]) -> bool:
|
|||
|
|
"""Check if the given symbol has three in a row (row, column, diagonal)."""
|
|||
|
|
# Rows and columns
|
|||
|
|
for i in range(3):
|
|||
|
|
if all(board[i][j] == symbol for j in range(3)):
|
|||
|
|
return True
|
|||
|
|
if all(board[j][i] == symbol for j in range(3)):
|
|||
|
|
return True
|
|||
|
|
# Diagonals
|
|||
|
|
if all(board[i][i] == symbol for i in range(3)):
|
|||
|
|
return True
|
|||
|
|
if all(board[i][2 - i] == symbol for i in range(3)):
|
|||
|
|
return True
|
|||
|
|
return False
|
|||
|
|
|
|||
|
|
def _is_board_full(self, board: List[List[str]]) -> bool:
|
|||
|
|
"""Return True if no empty cells remain."""
|
|||
|
|
return all(cell != "" for row in board for cell in row)
|
|||
|
|
|
|||
|
|
# -------------------------------------------------------------------------
|
|||
|
|
# Step Action
|
|||
|
|
# -------------------------------------------------------------------------
|
|||
|
|
def step(self, action: str) -> Tuple[bool, ta.Info]:
|
|||
|
|
"""
|
|||
|
|
Perform a single environment step for the current player.
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
action: The action text submitted by the current player.
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
A tuple (done, info) where:
|
|||
|
|
done: True if the episode has concluded
|
|||
|
|
info: A ta.Info object with auxiliary details
|
|||
|
|
"""
|
|||
|
|
player_id = self.state.current_player_id
|
|||
|
|
player_name = f"Player {player_id + 1}"
|
|||
|
|
|
|||
|
|
# Log player's raw action
|
|||
|
|
self.state.add_observation(
|
|||
|
|
from_id=player_id,
|
|||
|
|
to_id=-1,
|
|||
|
|
message=action,
|
|||
|
|
observation_type=ta.ObservationType.PLAYER_ACTION,
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# Extract meaningful token content
|
|||
|
|
answer_content = self._extract_answer_content(action)
|
|||
|
|
|
|||
|
|
# Validate action format
|
|||
|
|
if not self.VALID_ACTION_PATTERN.match(answer_content):
|
|||
|
|
self.state.set_invalid_move(reason="Invalid action format. Must match [Inscribe:x,y].")
|
|||
|
|
return self.state.step()
|
|||
|
|
|
|||
|
|
# Parse coordinates
|
|||
|
|
x_str, y_str = re.findall(r"(1|2|3)", answer_content)
|
|||
|
|
x, y = int(x_str) - 1, int(y_str) - 1
|
|||
|
|
|
|||
|
|
# Access current game_state
|
|||
|
|
g = self.state.game_state
|
|||
|
|
board = g["board"]
|
|||
|
|
|
|||
|
|
# Check if cell already occupied
|
|||
|
|
if board[x][y] != "":
|
|||
|
|
self.state.set_invalid_move(reason="Cell already occupied.")
|
|||
|
|
return self.state.step()
|
|||
|
|
|
|||
|
|
# Check current player
|
|||
|
|
if g["current_player"] != player_name:
|
|||
|
|
self.state.set_invalid_move(reason="Not your turn.")
|
|||
|
|
return self.state.step()
|
|||
|
|
|
|||
|
|
# Apply move
|
|||
|
|
symbol = g["players"][player_name]["symbol"]
|
|||
|
|
board[x][y] = symbol
|
|||
|
|
g["players"][player_name]["moves_made"] += 1
|
|||
|
|
g["turn_count"] += 1
|
|||
|
|
g["last_action"] = answer_content
|
|||
|
|
g["observation_log"].append(f"{player_name} placed at ({x+1},{y+1})")
|
|||
|
|
|
|||
|
|
# Add observation for move and board update
|
|||
|
|
self.state.add_observation(
|
|||
|
|
from_id=player_id,
|
|||
|
|
message=f"{player_name} inscribed glyph '{symbol}' at ({x+1},{y+1})",
|
|||
|
|
observation_type=ta.ObservationType.GAME_MESSAGE,
|
|||
|
|
)
|
|||
|
|
self.state.add_observation(
|
|||
|
|
from_id=-1,
|
|||
|
|
message=self._render_board(board),
|
|||
|
|
observation_type=ta.ObservationType.GAME_BOARD,
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# Check win condition
|
|||
|
|
if self._check_winner(symbol, board):
|
|||
|
|
g["winner"] = player_name
|
|||
|
|
g["is_terminal"] = True
|
|||
|
|
self.state.set_winner(player_id=player_id, reason=f"{player_name} aligned three glyphs and won the duel.")
|
|||
|
|
return self.state.step()
|
|||
|
|
|
|||
|
|
# Check draw condition
|
|||
|
|
if self._is_board_full(board):
|
|||
|
|
g["winner"] = "Draw"
|
|||
|
|
g["is_terminal"] = True
|
|||
|
|
self.state.set_draw(reason="The grid is full. The duel ends in a draw.")
|
|||
|
|
return self.state.step()
|
|||
|
|
|
|||
|
|
# Switch turns
|
|||
|
|
next_player_id = 1 - player_id
|
|||
|
|
g["current_player"] = f"Player {next_player_id + 1}"
|
|||
|
|
|
|||
|
|
# End step (non-terminal)
|
|||
|
|
return self.state.step()
|
|||
|
|
|
|||
|
|
# -------------------------------------------------------------------------
|
|||
|
|
# Prompt Generation
|
|||
|
|
# -------------------------------------------------------------------------
|
|||
|
|
def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str:
|
|||
|
|
"""
|
|||
|
|
Produce role-appropriate instructions for a player at episode start.
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
player_id: The integer ID of the player.
|
|||
|
|
game_state: The shared game state.
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
A string prompt describing the current state, rules, and expected format.
|
|||
|
|
"""
|
|||
|
|
player_name = f"Player {player_id + 1}"
|
|||
|
|
symbol = game_state["players"][player_name]["symbol"]
|
|||
|
|
board_str = self._render_board(game_state["board"])
|
|||
|
|
current_turn_name = game_state["current_player"]
|
|||
|
|
|
|||
|
|
prompt = (
|
|||
|
|
f"You are {player_name}, bearer of the glyph '{symbol}', in the abstract digital arena.\n"
|
|||
|
|
"Your goal is to align three of your runes (glyphs) in a straight line—row, column, or diagonal—before your opponent does.\n\n"
|
|||
|
|
f"Current arena state:\n{board_str}\n\n"
|
|||
|
|
f"It is currently {current_turn_name}'s turn.\n"
|
|||
|
|
"On your turn, inscribe your glyph in any unoccupied cell.\n\n"
|
|||
|
|
"Action grammar (must be exact): [Inscribe:x,y]\n"
|
|||
|
|
" - x, y ∈ {1, 2, 3}\n"
|
|||
|
|
" - Example: [Inscribe:2,3] inscribes at row 2, column 3.\n\n"
|
|||
|
|
"Formatting rules:\n"
|
|||
|
|
" - Put private reasoning inside <think></think>.\n"
|
|||
|
|
" - Put your chosen action inside <answer></answer>.\n\n"
|
|||
|
|
"Example valid response:\n"
|
|||
|
|
"<think>I will take the center to prepare a diagonal line.</think>\n"
|
|||
|
|
"<answer>[Inscribe:2,2]</answer>\n\n"
|
|||
|
|
"Example invalid response:\n"
|
|||
|
|
"<think>I'll use a lowercase tag.</think>\n"
|
|||
|
|
"<answer>[inscribe:2,2]</answer> <-- Invalid keyword\n"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
return prompt
|
|||
|
|
```
|