Add env.py from Openverse builder

This commit is contained in:
2001-01-01 00:00:00 +00:00
parent b839c13777
commit 24ad4e6214

268
env.py Normal file
View File

@@ -0,0 +1,268 @@
```python
import re
import random
from typing import Any, Dict, Optional, Tuple, List
import textarena as ta
class GlyphGridDuelEnv(ta.Env):
"""
GlyphGrid Duel: a deterministic two-player abstract logic game.
Players alternate inscribing glyphs ("X" or "O") on a 3×3 grid.
The first to align three identical glyphs along any row, column, or diagonal wins.
"""
VALID_ACTION_PATTERN = re.compile(r"^\[Inscribe:(1|2|3),(1|2|3)\]$")
def __init__(self):
"""Initialize reusable attributes."""
self.state: Optional[ta.TwoPlayerState] = None
# -------------------------------------------------------------------------
# Helper: Extract content inside <answer> tags
# -------------------------------------------------------------------------
def _extract_answer_content(self, action: str) -> str:
"""
Extract content from <answer></answer> tags.
If tags are missing, fallback to stripping the entire action string.
"""
match = re.search(r"<answer>(.*?)</answer>", action, re.DOTALL | re.IGNORECASE)
if match:
return match.group(1).strip()
return action.strip()
# -------------------------------------------------------------------------
# Reset environment
# -------------------------------------------------------------------------
def reset(self, num_players: int, seed: Optional[int] = None):
"""
Resets the environment to an initial state.
Args:
num_players: Number of players (must be 2 for GlyphGrid Duel).
seed: Optional seed for deterministic behavior.
Notes:
• Must construct a State object and pass game_state and player_prompt_function
• Must call self.state.reset(...)
• Should emit initial observations if helpful
"""
if num_players != 2:
raise ValueError("GlyphGrid Duel requires exactly 2 players.")
if seed is None:
seed = random.randint(0, 99999)
# Create a reproducible state manager
self.state = ta.TwoPlayerState(num_players=num_players, seed=seed)
# Determine which player starts (based on seed parity)
starting_player_id = 0 if seed % 2 == 0 else 1
starting_player_name = f"Player {starting_player_id + 1}"
# Construct initial 3x3 board (each cell empty string)
board = [["" for _ in range(3)] for _ in range(3)]
# Define player info
players = {
"Player 1": {"symbol": "X", "moves_made": 0},
"Player 2": {"symbol": "O", "moves_made": 0},
}
# Build game_state dictionary
game_state: Dict[str, Any] = {
"turn_count": 0,
"current_player": starting_player_name,
"seed": seed,
"board": board,
"players": players,
"winner": None,
"is_terminal": False,
"last_action": None,
"observation_log": [f"{starting_player_name} begins the glyph duel."],
}
# Initialize internal environment state
self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt)
# Initial observations
self.state.add_observation(
from_id=-1,
message=f"{starting_player_name} begins the glyph duel.",
observation_type=ta.ObservationType.GAME_MESSAGE,
)
self.state.add_observation(
from_id=-1,
message=self._render_board(board),
observation_type=ta.ObservationType.GAME_BOARD,
)
return self.state.game_state
# -------------------------------------------------------------------------
# Board and State Helpers
# -------------------------------------------------------------------------
def _render_board(self, board: List[List[str]]) -> str:
"""Format the 3×3 board for display."""
header = " 1 2 3"
rows = []
for i, row in enumerate(board):
cells = [cell if cell else "." for cell in row]
rows.append(f"{i+1} " + " | ".join(cells))
return f"{header}\n" + "\n".join(rows)
def _check_winner(self, symbol: str, board: List[List[str]]) -> bool:
"""Check if the given symbol has three in a row (row, column, diagonal)."""
# Rows and columns
for i in range(3):
if all(board[i][j] == symbol for j in range(3)):
return True
if all(board[j][i] == symbol for j in range(3)):
return True
# Diagonals
if all(board[i][i] == symbol for i in range(3)):
return True
if all(board[i][2 - i] == symbol for i in range(3)):
return True
return False
def _is_board_full(self, board: List[List[str]]) -> bool:
"""Return True if no empty cells remain."""
return all(cell != "" for row in board for cell in row)
# -------------------------------------------------------------------------
# Step Action
# -------------------------------------------------------------------------
def step(self, action: str) -> Tuple[bool, ta.Info]:
"""
Perform a single environment step for the current player.
Args:
action: The action text submitted by the current player.
Returns:
A tuple (done, info) where:
done: True if the episode has concluded
info: A ta.Info object with auxiliary details
"""
player_id = self.state.current_player_id
player_name = f"Player {player_id + 1}"
# Log player's raw action
self.state.add_observation(
from_id=player_id,
to_id=-1,
message=action,
observation_type=ta.ObservationType.PLAYER_ACTION,
)
# Extract meaningful token content
answer_content = self._extract_answer_content(action)
# Validate action format
if not self.VALID_ACTION_PATTERN.match(answer_content):
self.state.set_invalid_move(reason="Invalid action format. Must match [Inscribe:x,y].")
return self.state.step()
# Parse coordinates
x_str, y_str = re.findall(r"(1|2|3)", answer_content)
x, y = int(x_str) - 1, int(y_str) - 1
# Access current game_state
g = self.state.game_state
board = g["board"]
# Check if cell already occupied
if board[x][y] != "":
self.state.set_invalid_move(reason="Cell already occupied.")
return self.state.step()
# Check current player
if g["current_player"] != player_name:
self.state.set_invalid_move(reason="Not your turn.")
return self.state.step()
# Apply move
symbol = g["players"][player_name]["symbol"]
board[x][y] = symbol
g["players"][player_name]["moves_made"] += 1
g["turn_count"] += 1
g["last_action"] = answer_content
g["observation_log"].append(f"{player_name} placed at ({x+1},{y+1})")
# Add observation for move and board update
self.state.add_observation(
from_id=player_id,
message=f"{player_name} inscribed glyph '{symbol}' at ({x+1},{y+1})",
observation_type=ta.ObservationType.GAME_MESSAGE,
)
self.state.add_observation(
from_id=-1,
message=self._render_board(board),
observation_type=ta.ObservationType.GAME_BOARD,
)
# Check win condition
if self._check_winner(symbol, board):
g["winner"] = player_name
g["is_terminal"] = True
self.state.set_winner(player_id=player_id, reason=f"{player_name} aligned three glyphs and won the duel.")
return self.state.step()
# Check draw condition
if self._is_board_full(board):
g["winner"] = "Draw"
g["is_terminal"] = True
self.state.set_draw(reason="The grid is full. The duel ends in a draw.")
return self.state.step()
# Switch turns
next_player_id = 1 - player_id
g["current_player"] = f"Player {next_player_id + 1}"
# End step (non-terminal)
return self.state.step()
# -------------------------------------------------------------------------
# Prompt Generation
# -------------------------------------------------------------------------
def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str:
"""
Produce role-appropriate instructions for a player at episode start.
Args:
player_id: The integer ID of the player.
game_state: The shared game state.
Returns:
A string prompt describing the current state, rules, and expected format.
"""
player_name = f"Player {player_id + 1}"
symbol = game_state["players"][player_name]["symbol"]
board_str = self._render_board(game_state["board"])
current_turn_name = game_state["current_player"]
prompt = (
f"You are {player_name}, bearer of the glyph '{symbol}', in the abstract digital arena.\n"
"Your goal is to align three of your runes (glyphs) in a straight line—row, column, or diagonal—before your opponent does.\n\n"
f"Current arena state:\n{board_str}\n\n"
f"It is currently {current_turn_name}'s turn.\n"
"On your turn, inscribe your glyph in any unoccupied cell.\n\n"
"Action grammar (must be exact): [Inscribe:x,y]\n"
" - x, y ∈ {1, 2, 3}\n"
" - Example: [Inscribe:2,3] inscribes at row 2, column 3.\n\n"
"Formatting rules:\n"
" - Put private reasoning inside <think></think>.\n"
" - Put your chosen action inside <answer></answer>.\n\n"
"Example valid response:\n"
"<think>I will take the center to prepare a diagonal line.</think>\n"
"<answer>[Inscribe:2,2]</answer>\n\n"
"Example invalid response:\n"
"<think>I'll use a lowercase tag.</think>\n"
"<answer>[inscribe:2,2]</answer> <-- Invalid keyword\n"
)
return prompt
```