Files
testtest7/env.py

251 lines
11 KiB
Python
Raw Normal View History

2025-11-21 09:44:15 +00:00
import re
import random
from typing import Any, Dict, List, Optional, Tuple
import textarena as ta
class MazeConquerorsEnv(ta.Env):
"""Turn-based two-player deterministic environment for Maze Conquerors."""
def __init__(self, maze_size: int = 7, turn_limit: int = 30):
self.maze_size = maze_size
self.turn_limit = turn_limit
self.state: Optional[ta.TwoPlayerState] = None
# Precompile regexes for action grammar
self.move_pattern = re.compile(r'^\[Move:(up|down|left|right)\]$')
self.scan_pattern = re.compile(r'^\[Scan:[1-3]\]$')
self.claim_pattern = re.compile(r'^\[Claim\]$')
self.wait_pattern = re.compile(r'^\[Wait\]$')
# ------------------------------------------------------------------ #
# Helper: extract boxed content
# ------------------------------------------------------------------ #
def _extract_answer_content(self, action: str) -> str:
match = re.search(r'\\boxed\{\{([^}]*)\}\}', action)
if not match:
match = re.search(r'\\boxed\{([^}]*)\}', action)
return match.group(1).strip() if match else action.strip()
# ------------------------------------------------------------------ #
# Maze generation
# ------------------------------------------------------------------ #
def _generate_maze(self, seed: int) -> List[List[str]]:
random.seed(seed)
size = self.maze_size
grid = [["." for _ in range(size)] for _ in range(size)]
# place walls and runes
for i in range(size):
for j in range(size):
roll = random.random()
if roll < 0.15:
grid[i][j] = "#"
elif roll < 0.25:
grid[i][j] = "R"
# mark start and goal positions
grid[0][0] = "S"
grid[size - 1][size - 1] = "G"
return grid
def _initial_visible_tiles(self, pos: Tuple[int, int]) -> List[List[int]]:
visible = []
for dx in (-1, 0, 1):
for dy in (-1, 0, 1):
x, y = pos[0] + dx, pos[1] + dy
if 0 <= x < self.maze_size and 0 <= y < self.maze_size:
visible.append([x, y])
return visible
# ------------------------------------------------------------------ #
# Reset
# ------------------------------------------------------------------ #
def reset(self, num_players: int, seed: Optional[int] = None):
"""
Resets the environment to an initial state.
"""
if num_players != 2:
raise ValueError("Maze Conquerors requires exactly two players.")
self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.turn_limit)
if seed is None:
seed = random.randint(0, 9999999)
maze_layout = self._generate_maze(seed)
game_state = {
"global_turn": 0,
"turn_limit": self.turn_limit,
"maze_dimensions": [self.maze_size, self.maze_size],
"seed": seed,
"maze_layout": maze_layout,
"players": {
"ExplorerA": {
"position": [0, 0],
"runes_collected": 0,
"moves_remaining": 5,
"visible_tiles": self._initial_visible_tiles((0, 0)),
"last_action": None,
"is_trapped": False,
},
"ExplorerB": {
"position": [self.maze_size - 1, self.maze_size - 1],
"runes_collected": 0,
"moves_remaining": 5,
"visible_tiles": self._initial_visible_tiles(
(self.maze_size - 1, self.maze_size - 1)
),
"last_action": None,
"is_trapped": False,
},
},
"observation_log": [],
"game_status": "active",
"winner": None,
}
roles = {0: "ExplorerA", 1: "ExplorerB"}
self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt, role_mapping=roles)
self.state.add_observation("Maze Conquerors initialized!", ta.ObservationType.GAME_MESSAGE)
return self.state
# ------------------------------------------------------------------ #
# Step
# ------------------------------------------------------------------ #
def step(self, action: str) -> Tuple[bool, ta.Info]:
"""
Perform a single environment step for the current player.
"""
player_id = self.state.current_player_id
role = "ExplorerA" if player_id == 0 else "ExplorerB"
gs = self.state.game_state
player = gs["players"][role]
maze = gs["maze_layout"]
# Record raw action
self.state.add_observation(action, ta.ObservationType.PLAYER_ACTION, from_id=player_id)
content = self._extract_answer_content(action)
player["last_action"] = content
# Validate action syntax
if not (
self.move_pattern.match(content)
or self.scan_pattern.match(content)
or self.claim_pattern.match(content)
or self.wait_pattern.match(content)
):
self.state.set_invalid_move(reason="Invalid format: action not recognized.")
return self.state.step()
action_result = None
if content.startswith("[Move:"):
direction = content[6:-1]
dx, dy = {"up": (-1, 0), "down": (1, 0), "left": (0, -1), "right": (0, 1)}[direction]
newx, newy = player["position"][0] + dx, player["position"][1] + dy
if not (0 <= newx < self.maze_size and 0 <= newy < self.maze_size):
self.state.set_invalid_move("Invalid move: outside maze bounds.")
elif maze[newx][newy] == "#":
self.state.set_invalid_move("Invalid move: path blocked.")
else:
player["position"] = [newx, newy]
player["visible_tiles"] = self._initial_visible_tiles((newx, newy))
action_result = "moved successfully"
elif content.startswith("[Scan:"):
radius = int(content[6:-1])
new_visible = []
px, py = player["position"]
for i in range(px - radius, px + radius + 1):
for j in range(py - radius, py + radius + 1):
if 0 <= i < self.maze_size and 0 <= j < self.maze_size:
new_visible.append([i, j])
player["visible_tiles"] = list({tuple(v) for v in player["visible_tiles"] + new_visible})
action_result = "revealed tiles"
elif content == "[Claim]":
px, py = player["position"]
if maze[px][py] == "R":
player["runes_collected"] += 1
maze[px][py] = "."
action_result = "claimed rune"
else:
self.state.set_invalid_move("Invalid claim: no rune present.")
elif content == "[Wait]":
action_result = "waited"
gs["observation_log"].append(
{"turn": gs["global_turn"], "player": role, "action": content, "result": action_result or "invalid"}
)
# Advance global turn
gs["global_turn"] += 1
# Terminal condition check
done = self._check_terminal_conditions()
if done:
return True, {}
return self.state.step()
# ------------------------------------------------------------------ #
# Terminal conditions
# ------------------------------------------------------------------ #
def _check_terminal_conditions(self) -> bool:
gs = self.state.game_state
turn = gs["global_turn"]
if turn >= gs["turn_limit"]:
self._determine_winner(reason="Turn limit reached.")
return True
# Check if all runes collected
if not any("R" in row for row in gs["maze_layout"]):
self._determine_winner(reason="All runes collected.")
return True
return False
# ------------------------------------------------------------------ #
# Winner determination
# ------------------------------------------------------------------ #
def _determine_winner(self, reason: str):
gs = self.state.game_state
a, b = gs["players"]["ExplorerA"], gs["players"]["ExplorerB"]
if a["runes_collected"] > b["runes_collected"]:
self.state.set_winner(player_id=0, reason=reason)
elif a["runes_collected"] < b["runes_collected"]:
self.state.set_winner(player_id=1, reason=reason)
else:
core = (self.maze_size // 2, self.maze_size // 2)
dist_a = abs(a["position"][0] - core[0]) + abs(a["position"][1] - core[1])
dist_b = abs(b["position"][0] - core[0]) + abs(b["position"][1] - core[1])
if dist_a < dist_b:
self.state.set_winner(player_id=0, reason=reason)
elif dist_b < dist_a:
self.state.set_winner(player_id=1, reason=reason)
else:
self.state.set_draw(reason=reason)
# ------------------------------------------------------------------ #
# Prompt
# ------------------------------------------------------------------ #
def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str:
role = "ExplorerA" if player_id == 0 else "ExplorerB"
player = game_state["players"][role]
status = (
f"You are {role}, traversing an ancient shifting labyrinth to gather mystical runes.\n"
f"Turn {game_state['global_turn']} of {game_state['turn_limit']}.\n"
f"You have collected {player['runes_collected']} runes.\n"
)
surroundings = "Your visible tiles: " + str(player["visible_tiles"]) + "\n"
grammar = (
"Allowed actions:\n"
"[Move:up], [Move:down], [Move:left], [Move:right]\n"
"[Scan:13], [Claim], [Wait]\n"
"Put your final answer within \\boxed{{}} at the end of your response.\n"
"Example valid response:\n"
"I will explore the passage ahead.\n"
"\\boxed{{[Move:right]}}\n"
)
return status + surroundings + grammar
# ------------------------------------------------------------------ #
# Boilerplate
# ------------------------------------------------------------------ #
def get_observation(self) -> Tuple[int, List]:
return self.state.current_player_id, []
def close(self) -> Tuple[Dict, Dict]:
return self.state.rewards, self.state.game_info