Initial commit from Openverse UI
This commit is contained in:
332
env.py
Normal file
332
env.py
Normal file
@@ -0,0 +1,332 @@
|
||||
```python
|
||||
import re
|
||||
import random
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
import textarena as ta
|
||||
|
||||
|
||||
class EchoMazeEnv(ta.Env):
|
||||
"""
|
||||
EchoMaze: The Labyrinth Duel
|
||||
Deterministic, two-player, turn-based maze exploration game.
|
||||
"""
|
||||
|
||||
def __init__(self, max_turns: int = 60, maze_size: int = 9):
|
||||
"""
|
||||
Initialize environment config (not game-state).
|
||||
"""
|
||||
self.max_turns = max_turns
|
||||
self.maze_size = maze_size
|
||||
self.valid_actions = [
|
||||
"[Scan]",
|
||||
"[Mark]",
|
||||
"[Rest]",
|
||||
"[Move: North]",
|
||||
"[Move: South]",
|
||||
"[Move: East]",
|
||||
"[Move: West]",
|
||||
]
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Reset
|
||||
# ----------------------------------------------------------------------
|
||||
def reset(self, num_players: int, seed: Optional[int] = None):
|
||||
"""
|
||||
Resets the environment to an initial state.
|
||||
|
||||
Args:
|
||||
num_players: Must be 2
|
||||
seed: random seed for determinism
|
||||
"""
|
||||
if num_players != 2:
|
||||
raise ValueError("EchoMaze requires exactly 2 players (Sun and Moon).")
|
||||
|
||||
self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_turns)
|
||||
random.seed(seed)
|
||||
|
||||
# Generate base maze using seed for deterministic layout
|
||||
maze_layout, exit_location, sun_start, moon_start = self._generate_maze(seed)
|
||||
|
||||
# Build game_state following Stage 1 schema
|
||||
game_state: Dict[str, Any] = {
|
||||
"maze_seed": seed,
|
||||
"turn_count": 0,
|
||||
"max_turns": self.max_turns,
|
||||
"maze_layout": maze_layout,
|
||||
"exit_location": exit_location,
|
||||
"players": {
|
||||
"Sun": {
|
||||
"position": sun_start,
|
||||
"markers": [],
|
||||
"focus": 5,
|
||||
"observations": [
|
||||
f"Turn 1: Started at {tuple(sun_start)}."
|
||||
],
|
||||
"last_action": None,
|
||||
},
|
||||
"Moon": {
|
||||
"position": moon_start,
|
||||
"markers": [],
|
||||
"focus": 5,
|
||||
"observations": [
|
||||
f"Turn 1: Started at {tuple(moon_start)}."
|
||||
],
|
||||
"last_action": None,
|
||||
},
|
||||
},
|
||||
"public_transcript": [],
|
||||
"winner": None,
|
||||
"is_terminal": False,
|
||||
"invalid_move_reason": None,
|
||||
}
|
||||
|
||||
# Reset game state
|
||||
self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt,
|
||||
role_mapping={0: "Sun", 1: "Moon"})
|
||||
|
||||
# Announce
|
||||
self.state.add_observation("Welcome to EchoMaze: The Labyrinth Duel!", ta.ObservationType.GAME_MESSAGE)
|
||||
self.state.add_observation(f"Exit Glyph hidden at {tuple(exit_location)} (secretly known to system).",
|
||||
ta.ObservationType.GAME_MESSAGE)
|
||||
return self.state
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Step
|
||||
# ----------------------------------------------------------------------
|
||||
def step(self, action: str) -> Tuple[bool, ta.Info]:
|
||||
"""
|
||||
Perform a single environment step for the current player.
|
||||
"""
|
||||
player_id = self.state.current_player_id
|
||||
player_name = "Sun" if player_id == 0 else "Moon"
|
||||
|
||||
self.state.add_observation(
|
||||
action,
|
||||
ta.ObservationType.PLAYER_ACTION,
|
||||
from_id=player_id,
|
||||
to_id=-1,
|
||||
)
|
||||
|
||||
extracted_action = self._extract_answer_content(action)
|
||||
current_state = self.state.game_state
|
||||
player_data = current_state["players"][player_name]
|
||||
|
||||
# If game already terminal
|
||||
if current_state["winner"] or current_state["is_terminal"]:
|
||||
return self.state.step()
|
||||
|
||||
# --- Validation ---
|
||||
if extracted_action not in self.valid_actions:
|
||||
self.state.set_invalid_move("Unrecognized action syntax.")
|
||||
current_state["invalid_move_reason"] = "Unrecognized action syntax."
|
||||
current_state["is_terminal"] = True
|
||||
return self.state.step()
|
||||
|
||||
if player_data["focus"] <= 0 and extracted_action != "[Rest]":
|
||||
self.state.set_invalid_move("Insufficient focus to perform action.")
|
||||
current_state["invalid_move_reason"] = "Insufficient focus to perform action."
|
||||
current_state["is_terminal"] = True
|
||||
return self.state.step()
|
||||
|
||||
# Execute effect
|
||||
result_message = ""
|
||||
if extracted_action.startswith("[Move:"):
|
||||
direction = extracted_action.split(":")[1].strip(" ]")
|
||||
result_message = self._process_move(player_name, direction, current_state)
|
||||
elif extracted_action == "[Scan]":
|
||||
result_message = self._process_scan(player_name, current_state)
|
||||
player_data["focus"] -= 1
|
||||
elif extracted_action == "[Mark]":
|
||||
result_message = self._process_mark(player_name, current_state)
|
||||
player_data["focus"] -= 1
|
||||
elif extracted_action == "[Rest]":
|
||||
result_message = self._process_rest(player_name, current_state)
|
||||
|
||||
player_data["last_action"] = extracted_action
|
||||
current_state["public_transcript"].append(f"{player_name}: {extracted_action}")
|
||||
current_state["turn_count"] += 1
|
||||
|
||||
# --- Check Terminal Conditions after action ---
|
||||
exit_loc = current_state["exit_location"]
|
||||
sun_pos = current_state["players"]["Sun"]["position"]
|
||||
moon_pos = current_state["players"]["Moon"]["position"]
|
||||
|
||||
if sun_pos == exit_loc and moon_pos == exit_loc:
|
||||
self.state.set_draw("Both players reached the Exit Glyph simultaneously.")
|
||||
current_state["winner"] = "Draw"
|
||||
current_state["is_terminal"] = True
|
||||
elif sun_pos == exit_loc:
|
||||
self.state.set_winner(0, "Sun reached the Exit Glyph.")
|
||||
current_state["winner"] = "Sun"
|
||||
current_state["is_terminal"] = True
|
||||
elif moon_pos == exit_loc:
|
||||
self.state.set_winner(1, "Moon reached the Exit Glyph.")
|
||||
current_state["winner"] = "Moon"
|
||||
current_state["is_terminal"] = True
|
||||
elif current_state["turn_count"] >= self.max_turns:
|
||||
sun_dist = self._manhattan_distance(sun_pos, exit_loc)
|
||||
moon_dist = self._manhattan_distance(moon_pos, exit_loc)
|
||||
if sun_dist < moon_dist:
|
||||
self.state.set_winner(0, "Sun is closer to the Exit Glyph after max turns.")
|
||||
current_state["winner"] = "Sun"
|
||||
elif moon_dist < sun_dist:
|
||||
self.state.set_winner(1, "Moon is closer to the Exit Glyph after max turns.")
|
||||
current_state["winner"] = "Moon"
|
||||
else:
|
||||
self.state.set_draw("Equal distance to Exit Glyph after max turns.")
|
||||
current_state["winner"] = "Draw"
|
||||
current_state["is_terminal"] = True
|
||||
|
||||
# Log observation message
|
||||
self.state.add_observation(result_message, ta.ObservationType.GAME_MESSAGE)
|
||||
return self.state.step()
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ----------------------------------------------------------------------
|
||||
def _generate_maze(self, seed: int):
|
||||
"""
|
||||
Produces deterministic maze layout with walls (#), open cells (.), Exit (E).
|
||||
Ensures reproducibility.
|
||||
"""
|
||||
size = self.maze_size
|
||||
random.seed(seed)
|
||||
maze = [["#" for _ in range(size)] for _ in range(size)]
|
||||
|
||||
# Create random open cells
|
||||
for i in range(1, size - 1):
|
||||
for j in range(1, size - 1):
|
||||
maze[i][j] = "." if random.random() > 0.25 else "#"
|
||||
|
||||
# Place exit
|
||||
exit_x, exit_y = random.randint(1, size - 2), random.randint(1, size - 2)
|
||||
maze[exit_x][exit_y] = "E"
|
||||
|
||||
# Find top-left open for Sun
|
||||
sun_start = self._find_open_cell(maze, from_top=True)
|
||||
moon_start = self._find_open_cell(maze, from_top=False)
|
||||
maze[sun_start[0]][sun_start[1]] = "S" # Mark starting
|
||||
maze[moon_start[0]][moon_start[1]] = "M"
|
||||
|
||||
return maze, [exit_x, exit_y], sun_start, moon_start
|
||||
|
||||
def _find_open_cell(self, maze: List[List[str]], from_top: bool = True) -> List[int]:
|
||||
size = len(maze)
|
||||
row_range = range(size) if from_top else range(size - 1, -1, -1)
|
||||
for i in row_range:
|
||||
for j in row_range:
|
||||
if maze[i][j] == ".":
|
||||
return [i, j]
|
||||
# Fallback if none open
|
||||
return [1, 1] if from_top else [size - 2, size - 2]
|
||||
|
||||
def _extract_answer_content(self, action: str) -> str:
|
||||
"""Extract content from \\boxed{}"""
|
||||
match = re.search(r"\\boxed\{([^}]*)\}", action, re.DOTALL)
|
||||
if match:
|
||||
return match.group(1).strip()
|
||||
return action.strip()
|
||||
|
||||
def _manhattan_distance(self, a: List[int], b: List[int]) -> int:
|
||||
return abs(a[0] - b[0]) + abs(a[1] - b[1])
|
||||
|
||||
def _process_move(self, player: str, direction: str, game_state: Dict[str, Any]) -> str:
|
||||
pos = game_state["players"][player]["position"]
|
||||
x, y = pos
|
||||
dx, dy = 0, 0
|
||||
if direction == "North":
|
||||
dx = -1
|
||||
elif direction == "South":
|
||||
dx = 1
|
||||
elif direction == "East":
|
||||
dy = 1
|
||||
elif direction == "West":
|
||||
dy = -1
|
||||
|
||||
new_x, new_y = x + dx, y + dy
|
||||
maze = game_state["maze_layout"]
|
||||
if not (0 <= new_x < len(maze) and 0 <= new_y < len(maze[0])):
|
||||
self.state.set_invalid_move("Cannot move outside bounds.")
|
||||
game_state["invalid_move_reason"] = "Cannot move outside bounds."
|
||||
game_state["is_terminal"] = True
|
||||
return f"{player} attempted to move outside bounds."
|
||||
if maze[new_x][new_y] == "#":
|
||||
self.state.set_invalid_move("Cannot move through wall or outside bounds.")
|
||||
game_state["invalid_move_reason"] = "Cannot move through wall."
|
||||
game_state["is_terminal"] = True
|
||||
return f"{player} tried to move into a wall."
|
||||
|
||||
game_state["players"][player]["position"] = [new_x, new_y]
|
||||
game_state["players"][player]["focus"] -= 1
|
||||
return f"{player} moved {direction} to {(new_x, new_y)}."
|
||||
|
||||
def _process_scan(self, player: str, game_state: Dict[str, Any]) -> str:
|
||||
pos = game_state["players"][player]["position"]
|
||||
maze = game_state["maze_layout"]
|
||||
dirs = {
|
||||
"North": (pos[0] - 1, pos[1]),
|
||||
"South": (pos[0] + 1, pos[1]),
|
||||
"East": (pos[0], pos[1] + 1),
|
||||
"West": (pos[0], pos[1] - 1),
|
||||
}
|
||||
result = {}
|
||||
for dir_name, (x, y) in dirs.items():
|
||||
if 0 <= x < len(maze) and 0 <= y < len(maze[0]):
|
||||
result[dir_name] = "Wall" if maze[x][y] == "#" else "Open"
|
||||
else:
|
||||
result[dir_name] = "Out of bounds"
|
||||
obs_msg = ", ".join(f"{k}: {v}" for k, v in result.items())
|
||||
return f"{player} scanned surroundings. {obs_msg}"
|
||||
|
||||
def _process_mark(self, player: str, game_state: Dict[str, Any]) -> str:
|
||||
pos = game_state["players"][player]["position"]
|
||||
markers = game_state["players"][player]["markers"]
|
||||
if pos not in markers:
|
||||
markers.append(pos.copy())
|
||||
return f"{player} marked the cell at {tuple(pos)}."
|
||||
|
||||
def _process_rest(self, player: str, game_state: Dict[str, Any]) -> str:
|
||||
game_state["players"][player]["focus"] += 1
|
||||
return f"{player} rested and recovered 1 Focus (now {game_state['players'][player]['focus']})."
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Prompt
|
||||
# ----------------------------------------------------------------------
|
||||
def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str:
|
||||
"""
|
||||
Generates player prompt at start of game.
|
||||
"""
|
||||
player_name = "Sun" if player_id == 0 else "Moon"
|
||||
pos = tuple(game_state["players"][player_name]["position"])
|
||||
focus = game_state["players"][player_name]["focus"]
|
||||
intro = (
|
||||
f"You are **Player {player_name}**, an explorer within the mystic underground labyrinth of EchoMaze.\n"
|
||||
f"Your current position is {pos} with Focus = {focus}.\n"
|
||||
"Your objective is to reach the Exit Glyph before your rival.\n"
|
||||
"Actions must be exactly one of:\n"
|
||||
" - [Move: North], [Move: South], [Move: East], [Move: West]\n"
|
||||
" - [Scan] — Reveal walls around you.\n"
|
||||
" - [Mark] — Leave a marker in this cell.\n"
|
||||
" - [Rest] — Skip turn, regain 1 Focus.\n\n"
|
||||
"Only one action per turn. Place it inside \\boxed{} like so:\n"
|
||||
"Example valid response:\n"
|
||||
"I decide to move north.\n"
|
||||
"\\boxed{[Move: North]}\n\n"
|
||||
"Example invalid response:\n"
|
||||
"I will move upward.\n"
|
||||
"\\boxed{[Move: Up]} <-- invalid action\n"
|
||||
)
|
||||
return intro
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Framework helpers
|
||||
# ----------------------------------------------------------------------
|
||||
def get_observation(self) -> Tuple[int, List]:
|
||||
"""Return observation for current player"""
|
||||
return self.state.current_player_id, self.state.game_state
|
||||
|
||||
def close(self) -> Tuple[Dict, Dict]:
|
||||
"""Return final info"""
|
||||
return self.state.rewards, self.state.game_state
|
||||
```
|
||||
Reference in New Issue
Block a user