Add env.py from Openverse builder

This commit is contained in:
Openverse Builder
2001-01-01 00:00:00 +00:00
commit 1e40154fa0

233
env.py Normal file
View File

@@ -0,0 +1,233 @@
```python
import re
from typing import Any, Dict, Optional, Tuple, List
import textarena as ta
class StarGridDuelEnv(ta.Env):
"""
Implementation of the 'StarGrid Duel' game environment.
Deterministic two-player strategy game where navigators place energy beacons
on a 3x3 grid aiming to align three of their own beacons in a row, column, or diagonal.
"""
def __init__(self, max_turns: int = 9):
self.max_turns = max_turns
# Compile regex patterns once
self.place_pattern = re.compile(r"^\[Place:\s*(A|B|C)(1|2|3)\]$")
# Cell labels in order
self.all_cells = [f"{r}{c}" for r in "ABC" for c in "123"]
# ------------------------ Helper Methods ------------------------
def _extract_answer_content(self, action: str) -> str:
"""
Extract content inside \boxed{} for machine parsing.
Falls back to entire content (trimmed) if no match.
"""
match = re.search(r"\\boxed\{\{([^}]*)\}\}", action)
if not match: # Also support single braces in case formatting differs
match = re.search(r"\\boxed\{([^}]*)\}", action)
if match:
return match.group(1).strip()
return action.strip()
def _check_victory(self, board: Dict[str, Optional[str]], color: str) -> bool:
"""Check all 8 winning line combinations for the specified color."""
lines = [
["A1", "A2", "A3"],
["B1", "B2", "B3"],
["C1", "C2", "C3"],
["A1", "B1", "C1"],
["A2", "B2", "C2"],
["A3", "B3", "C3"],
["A1", "B2", "C3"],
["A3", "B2", "C1"],
]
for line in lines:
if all(board[cell] == color for cell in line):
return True
return False
def _generate_board_str(self, board: Dict[str, Optional[str]]) -> str:
"""Render the 3x3 StarGrid as a simple text table."""
rows = []
for r in "ABC":
row_cells = []
for c in "123":
val = board[f"{r}{c}"]
if val is None:
row_cells.append(f"{r}{c}")
else:
symbol = "B" if val == "Blue" else "C"
row_cells.append(symbol)
rows.append(" | ".join(row_cells))
return "\n".join(rows)
def _get_active_player_label(self, player_id: int) -> str:
return "Navigator Alpha" if player_id == 0 else "Navigator Beta"
def _cell_valid(self, cell: str) -> bool:
return cell in self.all_cells
# ------------------------ Core Env API ------------------------
def reset(self, num_players: int, seed: Optional[int] = None):
"""
Resets the environment to an initial state.
Args:
num_players: Number of players in the game. Must be 2.
seed: Optional seed for determinism.
Returns:
None (or self.state for compatibility)
"""
if num_players != 2:
raise ValueError("StarGrid Duel requires exactly 2 players.")
self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_turns)
empty_board = {cell: None for cell in self.all_cells}
game_state: Dict[str, Any] = {
"turn_index": 0,
"active_player": "A",
"board": empty_board,
"player_symbols": {"A": "Blue", "B": "Crimson"},
"move_history": [],
"winner": None,
"is_draw": False,
"observations": {"A": "", "B": ""},
"seed": seed,
}
role_mapping = {0: "Navigator Alpha", 1: "Navigator Beta"}
# Initialize internal game state
self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt, role_mapping=role_mapping)
# Onboarding observations
onboarding_msg = (
"Welcome to StarGrid Duel!\n"
"Two navigators will alternately place energy beacons on the 3×3 StarGrid.\n"
"Your mission is to align three of your beacons in a line before your rival."
)
self.state.add_observation(onboarding_msg, ta.ObservationType.GAME_MESSAGE)
board_msg = self._generate_board_str(empty_board)
self.state.add_observation(board_msg, ta.ObservationType.GAME_BOARD)
return self.state
def step(self, action: str) -> Tuple[bool, ta.Info]:
"""
Perform a single environment step for the current player.
Args:
action: The action text submitted by the current player.
Returns:
A tuple (done, info) where:
done: True if the episode has concluded
info: A ta.Info object with auxiliary details
"""
player_id = self.state.current_player_id
player_key = "A" if player_id == 0 else "B"
player_color = self.state.game_state["player_symbols"][player_key]
# 1. Log the raw player action
self.state.add_observation(action, ta.ObservationType.PLAYER_ACTION, from_id=player_id, to_id=-1)
# 2. Extract the boxed content
extracted = self._extract_answer_content(action)
# 3. Validate the action pattern
match = self.place_pattern.match(extracted)
if not match:
self.state.set_invalid_move(reason="MalformedAction: The action must strictly follow '[Place: <cell_id>]' format.")
return self.state.step()
cell_id = f"{match.group(1)}{match.group(2)}"
if not self._cell_valid(cell_id):
self.state.set_invalid_move(reason=f"CellOutOfRange: {cell_id} is not a valid StarGrid coordinate.")
return self.state.step()
board = self.state.game_state["board"]
if board[cell_id] is not None:
self.state.set_invalid_move(reason=f"CellOccupied: {cell_id} is already occupied.")
return self.state.step()
# 4. Execute valid action: place beacon
board[cell_id] = player_color
self.state.game_state["board"] = board
# Record move
self.state.game_state["move_history"].append(
{"player": player_key, "action": extracted}
)
# Increment turn index and rotate active player (unless terminal)
self.state.game_state["turn_index"] += 1
# 5. Check for victory
if self._check_victory(board, player_color):
self.state.game_state["winner"] = player_key
winner_str = self._get_active_player_label(player_id)
self.state.set_winner(player_id=player_id, reason=f"{winner_str} aligned three energy beacons in a line.")
board_str = self._generate_board_str(board)
self.state.add_observation(board_str, ta.ObservationType.GAME_BOARD)
return self.state.step()
# 6. Check for draw (grid filled, no winner)
if all(v is not None for v in board.values()):
self.state.game_state["is_draw"] = True
self.state.set_draw(reason="All cells filled with no aligned beacons—a balanced standoff.")
board_str = self._generate_board_str(board)
self.state.add_observation(board_str, ta.ObservationType.GAME_BOARD)
return self.state.step()
# 7. Update board observation for next player
board_str = self._generate_board_str(board)
self.state.add_observation(board_str, ta.ObservationType.GAME_BOARD)
# 8. Rotate turn
self.state.game_state["active_player"] = "B" if player_key == "A" else "A"
# Proceed to next step
return self.state.step()
# -------------------- Player Prompt Generation --------------------
def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str:
"""
Generate a detailed prompt for each player at the start or on every turn.
"""
role = "Navigator Alpha" if player_id == 0 else "Navigator Beta"
color = game_state["player_symbols"]["A" if player_id == 0 else "B"]
active_pid = game_state["active_player"]
active_label = "Navigator Alpha" if active_pid == "A" else "Navigator Beta"
board_repr = self._generate_board_str(game_state["board"])
open_cells = [cell for cell, val in game_state["board"].items() if val is None]
allowed_actions = [f"[Place: {cell}]" for cell in open_cells]
prompt = (
f"You are {role}, commanding the {color} energy.\n"
"Your goal: deploy energy beacons across the StarGrid to align three of your own in a straight line before your opponent.\n\n"
f"Current Board:\n{board_repr}\n\n"
f"Your Color: {color}\nActive Navigator: {active_label}\n\n"
f"Allowed Actions:\nFormat: [Place: <cell_id>]\nAvailable cells: {', '.join(open_cells) if open_cells else 'None'}\n\n"
"Response Format:\n"
"You may describe your reasoning, then finalize your move as:\n\n"
"Example valid response:\n"
"I will claim the center of the grid to control diagonals.\n"
"\\boxed{{[Place: B2]}}\n\n"
"Example invalid response:\n"
"I think I'll move now.\n"
"\\boxed{{[Move: B2]}}\n\n"
"Put your final answer within \\boxed{{}} at the end of your response."
)
return prompt
```