Add env.py from Openverse builder

This commit is contained in:
Openverse Builder
2001-01-01 00:00:00 +00:00
commit 36a94230d3

240
env.py Normal file
View File

@@ -0,0 +1,240 @@
```python
import re
import random
from typing import Any, Dict, Optional, Tuple
import textarena as ta
class RunicGridEnv(ta.Env):
"""
TextArena Environment for the Runic Grid game.
Two rival mystics, the Solar Scribe (☼) and the Lunar Scribe (☽), compete
to inscribe runes on a 3×3 grid until one achieves an aligned triad.
"""
def __init__(self, max_turns: int = 9):
self.max_turns = max_turns
self.grammar_pattern = re.compile(r'^\[Inscribe:[0-2],[0-2]\]$')
# -------------------------------------------------------------------------
# Helper Functions
# -------------------------------------------------------------------------
def _extract_answer_content(self, action: str) -> str:
"""
Extract the content from inside a \\boxed{} structure in a player's response.
Args:
action: The full text of the player's action, possibly containing \\boxed{}.
Returns:
Extracted inner content string.
"""
match = re.search(r'\\boxed\{([^}]*)\}', action, re.DOTALL)
if match:
return match.group(1).strip()
return action.strip()
def _check_winner(self, board: list[list[Optional[str]]], symbol: str) -> bool:
"""Return True if the given symbol has a 3-in-a-line."""
# Rows and columns
for i in range(3):
if all(board[i][c] == symbol for c in range(3)):
return True
if all(board[r][i] == symbol for r in range(3)):
return True
# Diagonals
if all(board[i][i] == symbol for i in range(3)):
return True
if all(board[i][2 - i] == symbol for i in range(3)):
return True
return False
def _is_full(self, board: list[list[Optional[str]]]) -> bool:
return all(cell is not None for row in board for cell in row)
# -------------------------------------------------------------------------
# Prompt Generator
# -------------------------------------------------------------------------
def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str:
"""
Generate the player prompt message describing current game status.
"""
player_name = "Solar Scribe" if player_id == 0 else "Lunar Scribe"
symbol = game_state["players"][player_name]["symbol"]
board = game_state["board"]
board_str = "\n".join(
" ".join(cell if cell is not None else "." for cell in row) for row in board
)
prompt = (
f"You are the **{player_name}**, a mystic engraving runes upon the sacred Runic Tablet.\n"
f"Your rune symbol: {symbol}\n\n"
"Goal:\n"
"- Form a continuous line of three of your runes horizontally, vertically, or diagonally before your opponent.\n\n"
f"Current Runic Tablet:\n{board_str}\n\n"
"Action Format:\n"
"- To inscribe, use syntax: \u005cboxed{{[Inscribe:x,y]}}\n"
"- Coordinates (x,y) range from 0 to 2.\n\n"
"Example Coordinate Map:\n"
"(0,0) (0,1) (0,2)\n"
"(1,0) (1,1) (1,2)\n"
"(2,0) (2,1) (2,2)\n\n"
"Example valid response:\n"
"I shall inscribe my rune on the center tile for strength.\n"
"\\boxed{[Inscribe:1,1]}\n\n"
"Example invalid response:\n"
"I think I will go middle-right.\n"
"\\boxed{[Move:1,2]}\n\n"
"Put your final answer within \\boxed{} at the end of your response."
)
return prompt
# -------------------------------------------------------------------------
# Reset
# -------------------------------------------------------------------------
def reset(self, num_players: int, seed: Optional[int] = None):
"""
Reset the environment to starting conditions for Runic Grid.
Args:
num_players: Must be 2 for this game.
seed: Optional deterministic seed for reproducibility.
"""
if num_players != 2:
raise ValueError("Runic Grid only supports 2 players.")
self.state = ta.TwoPlayerState(
num_players=num_players, seed=seed, max_turns=self.max_turns
)
board = [[None for _ in range(3)] for _ in range(3)]
players = {
"Solar Scribe": {"symbol": "", "actions": []},
"Lunar Scribe": {"symbol": "", "actions": []},
}
# Determine who starts (Solar Scribe always first, deterministic)
current_player = "Solar Scribe"
game_state = {
"turn_count": 0,
"current_player": current_player,
"board": board,
"players": players,
"winner": None,
"outcome": "ongoing",
"observations": [],
}
self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt)
# Add initial game messages
self.state.add_observation(
"Welcome to Runic Grid: The duel of Solar and Lunar Scribes.",
ta.ObservationType.GAME_MESSAGE,
)
self.state.add_observation(
f"The {current_player} begins and will place the first rune.",
ta.ObservationType.GAME_MESSAGE,
)
# -------------------------------------------------------------------------
# Step
# -------------------------------------------------------------------------
def step(self, action: str) -> Tuple[bool, ta.Info]:
"""
Execute a single player step in the Runic Grid game.
Args:
action: The player's proposed move text, including reasoning and \\boxed{} token.
Returns:
(done, info): True if terminal, info dictionary from framework.
"""
player_id = self.state.current_player_id
player_name = "Solar Scribe" if player_id == 0 else "Lunar Scribe"
opponent_id = 1 - player_id
opponent_name = "Lunar Scribe" if player_id == 0 else "Solar Scribe"
symbol = self.state.game_state["players"][player_name]["symbol"]
# Log the raw action
self.state.add_observation(
action,
ta.ObservationType.PLAYER_ACTION,
from_id=player_id,
to_id=-1,
)
# Parse and validate
content = self._extract_answer_content(action)
if not content or not isinstance(content, str):
self.state.set_invalid_move(reason="Malformed boxed syntax")
return self.state.step()
match = self.grammar_pattern.match(content)
if not match:
self.state.set_invalid_move(reason="Action does not match grammar [Inscribe:x,y]")
return self.state.step()
try:
x_y = content.strip("[]").split(":")[1]
x, y = map(int, x_y.split(","))
except Exception:
self.state.set_invalid_move(reason="Malformed coordinates")
return self.state.step()
board = self.state.game_state["board"]
if x not in range(3) or y not in range(3):
self.state.set_invalid_move(reason="Coordinates out of bounds")
return self.state.step()
if board[x][y] is not None:
self.state.set_invalid_move(reason="Tile already inscribed")
return self.state.step()
# Apply move
board[x][y] = symbol
self.state.game_state["turn_count"] += 1
self.state.game_state["players"][player_name]["actions"].append(content)
self.state.game_state["observations"].append(
{"player": player_name, "action": content}
)
self.state.game_state["board"] = board
self.state.add_observation(
f"{player_name} inscribed {symbol} at ({x},{y}).",
ta.ObservationType.GAME_MESSAGE,
)
# Check terminal conditions
if self._check_winner(board, symbol):
self.state.game_state["winner"] = player_name
self.state.game_state["outcome"] = "win"
self.state.set_winner(player_id=player_id, reason=f"{player_name} achieved a triad alignment.")
return self.state.step()
if self._is_full(board):
self.state.game_state["outcome"] = "draw"
self.state.set_draw(reason="Runic Tablet fully inscribed with no alignment.")
return self.state.step()
# Otherwise continue
self.state.game_state["current_player"] = opponent_name
done, info = self.state.step()
return done, info
# -------------------------------------------------------------------------
# Close
# -------------------------------------------------------------------------
def close(self) -> Tuple[Dict, Dict]:
return self.state.rewards, self.state.game_info
```