Add env.py from Openverse builder
This commit is contained in:
236
env.py
Normal file
236
env.py
Normal file
@@ -0,0 +1,236 @@
|
|||||||
|
```python
|
||||||
|
import re
|
||||||
|
import random
|
||||||
|
from typing import Any, Dict, Optional, Tuple, List
|
||||||
|
|
||||||
|
import textarena as ta
|
||||||
|
|
||||||
|
|
||||||
|
class GlyphGridDuelEnv(ta.Env):
|
||||||
|
"""
|
||||||
|
GlyphGrid Duel Environment
|
||||||
|
Implements the deterministic, turn-based game designed in Stage 1.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, max_turns: int = 9):
|
||||||
|
self.max_turns = max_turns
|
||||||
|
self.action_pattern = re.compile(r"^\[Etch:\s*([1-3]),\s*([1-3])\]$")
|
||||||
|
self.player_roles = {0: "Solar", 1: "Lunar"}
|
||||||
|
self.player_symbols = {"Solar": "S", "Lunar": "L"}
|
||||||
|
|
||||||
|
# -------------------------------------------------------------------------
|
||||||
|
# Helper Methods
|
||||||
|
# -------------------------------------------------------------------------
|
||||||
|
def _extract_answer_content(self, action: str) -> str:
|
||||||
|
"""Extract content between \boxed{{...}} from a player's response."""
|
||||||
|
# Double braces escaped pattern
|
||||||
|
match = re.search(r'\\boxed\{\{(.*?)\}\}', action, re.DOTALL)
|
||||||
|
if match:
|
||||||
|
return match.group(1).strip()
|
||||||
|
# fallback single brace just in case
|
||||||
|
match = re.search(r'\\boxed\{(.*?)\}', action, re.DOTALL)
|
||||||
|
if match:
|
||||||
|
return match.group(1).strip()
|
||||||
|
return action.strip()
|
||||||
|
|
||||||
|
def _empty_runeboard(self) -> List[List[str]]:
|
||||||
|
"""Create an empty 3x3 runeboard."""
|
||||||
|
return [["_"] * 3 for _ in range(3)]
|
||||||
|
|
||||||
|
def _render_runeboard(self, runeboard: List[List[str]]) -> str:
|
||||||
|
"""Return a string representation of the current runeboard."""
|
||||||
|
board_lines = []
|
||||||
|
for row in runeboard:
|
||||||
|
board_lines.append(" ".join(row))
|
||||||
|
return "\n".join(board_lines)
|
||||||
|
|
||||||
|
def _check_winner(self, runeboard: List[List[str]], symbol: str) -> bool:
|
||||||
|
"""Return True if the provided symbol has aligned three glyphs."""
|
||||||
|
# rows
|
||||||
|
for r in range(3):
|
||||||
|
if all(runeboard[r][c] == symbol for c in range(3)):
|
||||||
|
return True
|
||||||
|
# cols
|
||||||
|
for c in range(3):
|
||||||
|
if all(runeboard[r][c] == symbol for r in range(3)):
|
||||||
|
return True
|
||||||
|
# diagonals
|
||||||
|
if all(runeboard[i][i] == symbol for i in range(3)):
|
||||||
|
return True
|
||||||
|
if all(runeboard[i][2 - i] == symbol for i in range(3)):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
# -------------------------------------------------------------------------
|
||||||
|
# Game Lifecycle
|
||||||
|
# -------------------------------------------------------------------------
|
||||||
|
def reset(self, num_players: int, seed: Optional[int] = None):
|
||||||
|
"""
|
||||||
|
Resets the environment to an initial state.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
num_players: Number of players in the game (must be 2).
|
||||||
|
seed: Optional seed for deterministic behavior.
|
||||||
|
"""
|
||||||
|
if num_players != 2:
|
||||||
|
raise ValueError("GlyphGrid Duel requires exactly 2 players.")
|
||||||
|
|
||||||
|
self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_turns)
|
||||||
|
rng_seed = seed if seed is not None else random.randint(0, 10000)
|
||||||
|
|
||||||
|
game_state: Dict[str, Any] = {
|
||||||
|
"runeboard": self._empty_runeboard(),
|
||||||
|
"current_player": "Solar",
|
||||||
|
"turn_count": 0,
|
||||||
|
"winner": None,
|
||||||
|
"is_terminal": False,
|
||||||
|
"last_action": None,
|
||||||
|
"observations": {"Solar": [], "Lunar": []},
|
||||||
|
"player_symbols": self.player_symbols,
|
||||||
|
"seed": rng_seed,
|
||||||
|
}
|
||||||
|
|
||||||
|
self.state.reset(
|
||||||
|
game_state=game_state,
|
||||||
|
player_prompt_function=self._generate_player_prompt,
|
||||||
|
role_mapping=self.player_roles,
|
||||||
|
)
|
||||||
|
|
||||||
|
# initial observation to all players
|
||||||
|
init_message = (
|
||||||
|
"The Runeboard is empty. Each Scribe may etch a glyph using [Etch: row, col]."
|
||||||
|
)
|
||||||
|
self.state.add_observation(
|
||||||
|
init_message, ta.ObservationType.GAME_MESSAGE, from_id=-1, to_id=-1
|
||||||
|
)
|
||||||
|
board_str = self._render_runeboard(game_state["runeboard"])
|
||||||
|
self.state.add_observation(
|
||||||
|
board_str, ta.ObservationType.GAME_BOARD, from_id=-1, to_id=-1
|
||||||
|
)
|
||||||
|
|
||||||
|
return self.state
|
||||||
|
|
||||||
|
def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str:
|
||||||
|
"""Generate turn prompt for each player."""
|
||||||
|
player_role = self.player_roles[player_id]
|
||||||
|
player_symbol = game_state["player_symbols"][player_role]
|
||||||
|
runeboard_str = self._render_runeboard(game_state["runeboard"])
|
||||||
|
empties = [
|
||||||
|
f"[Etch: {r+1}, {c+1}]"
|
||||||
|
for r in range(3)
|
||||||
|
for c in range(3)
|
||||||
|
if game_state["runeboard"][r][c] == "_"
|
||||||
|
]
|
||||||
|
empties_str = ", ".join(empties)
|
||||||
|
|
||||||
|
prompt = (
|
||||||
|
f"You are a Scribe competing to master the Runeboard through glyph alignment.\n"
|
||||||
|
f"Role: Scribe {player_role} ({player_symbol})\n\n"
|
||||||
|
f"Rules Summary:\n"
|
||||||
|
f"- Each player alternately etches one glyph per turn.\n"
|
||||||
|
f"- Wins occur when three identical glyphs align (row, column, or diagonal).\n"
|
||||||
|
f"- If all nine cells are filled without alignment, it’s a draw.\n\n"
|
||||||
|
f"Current Runeboard:\n{runeboard_str}\n\n"
|
||||||
|
f"Empty Cells where you can etch:\n{empties_str}\n\n"
|
||||||
|
f"Action Format:\n"
|
||||||
|
f"Use [Etch: row, column] with row and column in 1–3.\n"
|
||||||
|
f"Put your final answer within \\boxed{{}} at the end of your response.\n\n"
|
||||||
|
f"Example valid response:\n"
|
||||||
|
f"I will etch at the top right corner.\n"
|
||||||
|
f"\\boxed{{{{[Etch: 1, 3]}}}}\n\n"
|
||||||
|
f"Example invalid response:\n"
|
||||||
|
f"\\boxed{{{{[Mark: 1, 3]}}}} # Reason: 'Mark' is not a valid action.\n"
|
||||||
|
)
|
||||||
|
return prompt
|
||||||
|
|
||||||
|
def step(self, action: str) -> Tuple[bool, ta.Info]:
|
||||||
|
"""
|
||||||
|
Perform a single environment step for the current player.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
action: The action text submitted by the current player.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(done, info)
|
||||||
|
"""
|
||||||
|
current_id = self.state.current_player_id
|
||||||
|
current_role = self.player_roles[current_id]
|
||||||
|
opponent_role = self.player_roles[1 - current_id]
|
||||||
|
board = self.state.game_state["runeboard"]
|
||||||
|
|
||||||
|
# Record player's raw action
|
||||||
|
self.state.add_observation(
|
||||||
|
action,
|
||||||
|
ta.ObservationType.PLAYER_ACTION,
|
||||||
|
from_id=current_id,
|
||||||
|
to_id=-1,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Extract content inside boxed
|
||||||
|
action_content = self._extract_answer_content(action)
|
||||||
|
|
||||||
|
# Validate
|
||||||
|
if self.state.game_state["is_terminal"]:
|
||||||
|
self.state.set_invalid_move("Game already ended.")
|
||||||
|
return self.state.step()
|
||||||
|
|
||||||
|
match = self.action_pattern.match(action_content)
|
||||||
|
if not match:
|
||||||
|
self.state.set_invalid_move(
|
||||||
|
"Invalid format: must be [Etch: row, column] with row,col in 1–3."
|
||||||
|
)
|
||||||
|
return self.state.step()
|
||||||
|
|
||||||
|
try:
|
||||||
|
row, col = int(match.group(1)) - 1, int(match.group(2)) - 1
|
||||||
|
except (ValueError, IndexError):
|
||||||
|
self.state.set_invalid_move(
|
||||||
|
"Out of bounds: coordinates must be between 1 and 3."
|
||||||
|
)
|
||||||
|
return self.state.step()
|
||||||
|
|
||||||
|
if row not in range(3) or col not in range(3):
|
||||||
|
self.state.set_invalid_move(
|
||||||
|
"Out of bounds: coordinates must be between 1 and 3."
|
||||||
|
)
|
||||||
|
return self.state.step()
|
||||||
|
|
||||||
|
if board[row][col] != "_":
|
||||||
|
self.state.set_invalid_move("Cell already occupied.")
|
||||||
|
return self.state.step()
|
||||||
|
|
||||||
|
# Apply action
|
||||||
|
symbol = self.player_symbols[current_role]
|
||||||
|
board[row][col] = symbol
|
||||||
|
self.state.game_state["last_action"] = action_content
|
||||||
|
self.state.game_state["turn_count"] += 1
|
||||||
|
|
||||||
|
# Announce move
|
||||||
|
move_msg = f"{current_role} etched a {symbol} glyph at ({row+1},{col+1})."
|
||||||
|
self.state.add_observation(move_msg, ta.ObservationType.GAME_MESSAGE)
|
||||||
|
|
||||||
|
# Show updated board
|
||||||
|
board_render = self._render_runeboard(board)
|
||||||
|
self.state.add_observation(
|
||||||
|
board_render, ta.ObservationType.GAME_BOARD, from_id=-1, to_id=-1
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check win condition
|
||||||
|
if self._check_winner(board, symbol):
|
||||||
|
self.state.game_state["winner"] = current_role
|
||||||
|
self.state.game_state["is_terminal"] = True
|
||||||
|
self.state.set_winner(
|
||||||
|
player_id=current_id, reason=f"{current_role} formed a line of glyphs."
|
||||||
|
)
|
||||||
|
return self.state.step()
|
||||||
|
|
||||||
|
# Check draw condition
|
||||||
|
if self.state.game_state["turn_count"] >= 9:
|
||||||
|
self.state.game_state["is_terminal"] = True
|
||||||
|
self.state.set_draw("Runeboard is full with no alignment. Draw.")
|
||||||
|
return self.state.step()
|
||||||
|
|
||||||
|
# Switch player
|
||||||
|
self.state.game_state["current_player"] = opponent_role
|
||||||
|
return self.state.step()
|
||||||
|
```
|
||||||
Reference in New Issue
Block a user