commit 9d4755a5fc43b52d930cba7d37d1c4606873ec09 Author: bobbycxy Date: Fri Nov 21 09:44:15 2025 +0000 Initial commit from Openverse UI diff --git a/README.md b/README.md new file mode 100644 index 0000000..5da4da3 --- /dev/null +++ b/README.md @@ -0,0 +1,212 @@ +# Game Design Document: **"Maze Conquerors"** + +--- + +## 1. Concept Paragraph + +**Maze Conquerors** is a deterministic, turn-based strategy game where two rival explorers traverse a shifting labyrinth to collect mystical runes scattered across the grid. Each turn, a player can **move**, **scan**, or **claim** spaces within the maze to gain strategic advantage. The maze itself is a fixed grid seeded at initialization to ensure reproducibility. The players cannot see the whole maze initially—they progressively reveal it through scanning or movement. The objective is to collect more runes than the opponent before the turn limit is reached or one player becomes trapped. + +This design is **entirely unrelated** to any negotiation or commerce environment; it deals solely with spatial planning, exploration, and deterministic turn resolution. + +--- + +## 2. Roles and Win Condition + +- **Roles:** + - Two players: **Explorer A** and **Explorer B**, each beginning at opposite edges of the maze. + +- **Player Objective:** + - Navigate the grid to collect as many **Runes** as possible. + - Avoid getting trapped in dead ends or blocked paths. + +- **Win Condition:** + - The game ends when the turn limit is reached **OR** both players have no valid moves left. + - The winner is the player with **more collected runes**. + - If both have the same number of runes, the winner is the one **closer to the Maze Core** (measured as Manhattan distance). + - If still tied, the game is declared a **draw**. + +--- + +## 3. Turn Structure and Determinism + +- **Turn Order:** + - Explorer A acts first on odd-numbered turns; Explorer B on even-numbered turns. + +- **Turn Limit:** + - Default: 30 turns (modifiable via environment configuration). + +- **Determinism:** + - All maze generation and rune placements are produced using a seeded random generator during `reset(seed)` to guarantee reproducibility of identical sequences. + +--- + +## 4. Action Grammar (Machine-Parseable) + +The content within `\boxed{{}}` defines the action. Each action must strictly follow one of the grammars below. + +### **Allowed Action Tokens** + +| Action Type | Grammar Format | Example Valid | Example Invalid | Invalidity Reason | +|--------------|----------------|----------------|------------------| +| **[Move:]** | `^\\[Move:(up|down|left|right)\\]$` | `[Move:up]` | `[Move:upper]` | "upper" not a valid direction | +| **[Scan:]** | `^\\[Scan:[1-3]\\]$` | `[Scan:2]` | `[Scan:5]` | radius out of allowed range | +| **[Claim]** | `^\\[Claim\\]$` | `[Claim]` | `[Claim:rune]` | extra parameters not permitted | +| **[Wait]** | `^\\[Wait\\]$` | `[Wait]` | `[Pause]` | Invalid token name | + +--- + +## 5. Game State Schema + +```json +{ + "global_turn": 7, + "turn_limit": 30, + "maze_dimensions": [7, 7], + "seed": 12345, + "maze_layout": [ + ["S", ".", ".", "#", ".", ".", "R"], + [".", "#", ".", ".", ".", "#", "."], + [".", ".", "R", ".", ".", ".", "."], + ["#", ".", ".", ".", "#", ".", "#"], + [".", "R", ".", ".", ".", "R", "."], + [".", ".", ".", "#", ".", ".", "."], + ["R", ".", "#", ".", ".", ".", "G"] + ], + "players": { + "ExplorerA": { + "position": [0, 0], + "runes_collected": 2, + "moves_remaining": 5, + "visible_tiles": [[0,0],[0,1],[1,0]], + "last_action": "[Move:down]", + "is_trapped": false + }, + "ExplorerB": { + "position": [6, 6], + "runes_collected": 3, + "moves_remaining": 5, + "visible_tiles": [[6,6],[6,5],[5,6]], + "last_action": "[Scan:2]", + "is_trapped": false + } + }, + "observation_log": [ + {"turn":1, "player":"ExplorerA", "action":"[Move:right]", "result":"moved successfully"}, + {"turn":1, "player":"ExplorerB", "action":"[Scan:2]", "result":"revealed tiles"} + ], + "game_status": "active", + "winner": null +} +``` + +--- + +## 6. Initialization Rules + +- Maze generated using `maze_dimensions` and `seed`. +- Starting positions: Explorer A at top-left corner `(0,0)`, Explorer B at bottom-right corner `(n-1,n-1)`. +- `runes` are distributed pseudo-randomly on open tiles based on `seed`. +- Each player begins with a 3x3 visible region centered on their starting tile. +- Initial observation: description of immediate surroundings and actions available. + +--- + +## 7. Validation and Error Handling + +- If extracted action text fails regex match for any valid token → `Invalid format`. +- If movement direction leads to a wall or outside the grid → `Invalid move: path blocked`. +- If `Scan` exceeds remaining moves or repeats within two consecutive turns → `Invalid scan usage`. +- If `Claim` executed on a cell without rune → `Invalid claim: no rune present`. +- If `Wait` occurs while trapped → allowed but auto-check for terminal trap condition afterward. + +Each invalid action triggers `set_invalid_move(player, reason)` and ends that player’s turn with no state change. + +--- + +## 8. Terminal Conditions and Scoring + +**Terminal checks performed at end of each full round:** + +1. All players trapped (no legal moves remain) → immediate end. +2. `global_turn` >= `turn_limit` → end game. +3. All runes collected → end game. + +**Scoring Rules:** +- Primary: `runes_collected` +- Secondary (tie-break): distance to center (`maze_core` `(n//2, n//2)`) +- Tertiary: earliest to reach current score (fewer turns used wins) +- If all equal → draw. + +--- + +## 9. Player Prompt Specification + +Each turn, `_generate_player_prompt` summarizes environment and expectations. + +**Prompt Outline:** + +- Brief identity blurb and status summary: + - "You are Explorer A, traversing an ancient labyrinth to gather runes before your rival." +- Details of known surroundings (visible tiles, walls, runes, opponent position if visible). +- State remaining turns, runes collected, and possible legal actions. +- Grammar requirement and response format. + +**Example Section within Prompt:** + +``` +Your visible surroundings: +◎ = your position +# = wall +R = rune +. = empty tile + +Turn 7 of 30. You have collected 2 runes. + +Allowed actions: +[Move:up], [Move:down], [Move:left], [Move:right] +[Scan:1–3] +[Claim] +[Wait] + +Put your final answer within \boxed{{}} at the end of your response. + +Example valid response: +I need to get closer to the center and gather more runes. +\boxed{{[Move:right]}} + +Example invalid response: +\boxed{{Go right}} ← not matching action grammar +``` + +**Required Helper:** +Function `_extract_answer_content(self, action: str) -> str` isolates content inside the `\boxed{{}}` for validation and interpretation. + +--- + +## 10. API Mapping Plan + +**`reset(seed: int) -> (game_state, observations)`** +- Clears previous game data. +- Generates maze using deterministic seed. +- Initializes players, turns, and logs. +- Returns initial `game_state` and per-player observations. + +**`step(action_dict) -> (game_state, observations, rewards, done, info)`** +- Receives both players’ boxed actions. +- Uses `_extract_answer_content` to parse action text. +- Validates actions, applies successful ones deterministically. +- Updates `maze_layout`, player positions, visibility, and `observation_log`. +- Checks terminal conditions, computes rewards based on runes and outcomes. +- Returns updated game state and next observations. + +**`_generate_player_prompt(player)`** +- Builds text described above based on current `game_state`. +- Outlines visible region, remaining turns, and valid command grammar. + +--- + +## 11. Copy-Check Against the Example + +All concepts in this document—**maze navigation, runes, explorers, grid movement, scanning, and claiming**—are **unique** and unrelated to any negotiation or offer-making scenarios. +Resource names (`runes`, `maze_layout`, `visible_tiles`, `explorers`) and objectives (spatial exploration, rune collection) are fully original and distinct from any trade or dialog games. +The `game_state` schema, prompt text, and mechanics exclusively belong to this **Maze Conquerors** design. \ No newline at end of file diff --git a/env.py b/env.py new file mode 100644 index 0000000..1a0d6ec --- /dev/null +++ b/env.py @@ -0,0 +1,252 @@ +```python +import re +import random +from typing import Any, Dict, List, Optional, Tuple +import textarena as ta + + +class MazeConquerorsEnv(ta.Env): + """Turn-based two-player deterministic environment for Maze Conquerors.""" + + def __init__(self, maze_size: int = 7, turn_limit: int = 30): + self.maze_size = maze_size + self.turn_limit = turn_limit + self.state: Optional[ta.TwoPlayerState] = None + # Precompile regexes for action grammar + self.move_pattern = re.compile(r'^\[Move:(up|down|left|right)\]$') + self.scan_pattern = re.compile(r'^\[Scan:[1-3]\]$') + self.claim_pattern = re.compile(r'^\[Claim\]$') + self.wait_pattern = re.compile(r'^\[Wait\]$') + + # ------------------------------------------------------------------ # + # Helper: extract boxed content + # ------------------------------------------------------------------ # + def _extract_answer_content(self, action: str) -> str: + match = re.search(r'\\boxed\{\{([^}]*)\}\}', action) + if not match: + match = re.search(r'\\boxed\{([^}]*)\}', action) + return match.group(1).strip() if match else action.strip() + + # ------------------------------------------------------------------ # + # Maze generation + # ------------------------------------------------------------------ # + def _generate_maze(self, seed: int) -> List[List[str]]: + random.seed(seed) + size = self.maze_size + grid = [["." for _ in range(size)] for _ in range(size)] + # place walls and runes + for i in range(size): + for j in range(size): + roll = random.random() + if roll < 0.15: + grid[i][j] = "#" + elif roll < 0.25: + grid[i][j] = "R" + # mark start and goal positions + grid[0][0] = "S" + grid[size - 1][size - 1] = "G" + return grid + + def _initial_visible_tiles(self, pos: Tuple[int, int]) -> List[List[int]]: + visible = [] + for dx in (-1, 0, 1): + for dy in (-1, 0, 1): + x, y = pos[0] + dx, pos[1] + dy + if 0 <= x < self.maze_size and 0 <= y < self.maze_size: + visible.append([x, y]) + return visible + + # ------------------------------------------------------------------ # + # Reset + # ------------------------------------------------------------------ # + def reset(self, num_players: int, seed: Optional[int] = None): + """ + Resets the environment to an initial state. + """ + if num_players != 2: + raise ValueError("Maze Conquerors requires exactly two players.") + + self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.turn_limit) + if seed is None: + seed = random.randint(0, 9999999) + maze_layout = self._generate_maze(seed) + + game_state = { + "global_turn": 0, + "turn_limit": self.turn_limit, + "maze_dimensions": [self.maze_size, self.maze_size], + "seed": seed, + "maze_layout": maze_layout, + "players": { + "ExplorerA": { + "position": [0, 0], + "runes_collected": 0, + "moves_remaining": 5, + "visible_tiles": self._initial_visible_tiles((0, 0)), + "last_action": None, + "is_trapped": False, + }, + "ExplorerB": { + "position": [self.maze_size - 1, self.maze_size - 1], + "runes_collected": 0, + "moves_remaining": 5, + "visible_tiles": self._initial_visible_tiles( + (self.maze_size - 1, self.maze_size - 1) + ), + "last_action": None, + "is_trapped": False, + }, + }, + "observation_log": [], + "game_status": "active", + "winner": None, + } + + roles = {0: "ExplorerA", 1: "ExplorerB"} + self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt, role_mapping=roles) + self.state.add_observation("Maze Conquerors initialized!", ta.ObservationType.GAME_MESSAGE) + return self.state + + # ------------------------------------------------------------------ # + # Step + # ------------------------------------------------------------------ # + def step(self, action: str) -> Tuple[bool, ta.Info]: + """ + Perform a single environment step for the current player. + """ + player_id = self.state.current_player_id + role = "ExplorerA" if player_id == 0 else "ExplorerB" + gs = self.state.game_state + player = gs["players"][role] + maze = gs["maze_layout"] + + # Record raw action + self.state.add_observation(action, ta.ObservationType.PLAYER_ACTION, from_id=player_id) + content = self._extract_answer_content(action) + player["last_action"] = content + + # Validate action syntax + if not ( + self.move_pattern.match(content) + or self.scan_pattern.match(content) + or self.claim_pattern.match(content) + or self.wait_pattern.match(content) + ): + self.state.set_invalid_move(reason="Invalid format: action not recognized.") + return self.state.step() + + action_result = None + if content.startswith("[Move:"): + direction = content[6:-1] + dx, dy = {"up": (-1, 0), "down": (1, 0), "left": (0, -1), "right": (0, 1)}[direction] + newx, newy = player["position"][0] + dx, player["position"][1] + dy + if not (0 <= newx < self.maze_size and 0 <= newy < self.maze_size): + self.state.set_invalid_move("Invalid move: outside maze bounds.") + elif maze[newx][newy] == "#": + self.state.set_invalid_move("Invalid move: path blocked.") + else: + player["position"] = [newx, newy] + player["visible_tiles"] = self._initial_visible_tiles((newx, newy)) + action_result = "moved successfully" + elif content.startswith("[Scan:"): + radius = int(content[6:-1]) + new_visible = [] + px, py = player["position"] + for i in range(px - radius, px + radius + 1): + for j in range(py - radius, py + radius + 1): + if 0 <= i < self.maze_size and 0 <= j < self.maze_size: + new_visible.append([i, j]) + player["visible_tiles"] = list({tuple(v) for v in player["visible_tiles"] + new_visible}) + action_result = "revealed tiles" + elif content == "[Claim]": + px, py = player["position"] + if maze[px][py] == "R": + player["runes_collected"] += 1 + maze[px][py] = "." + action_result = "claimed rune" + else: + self.state.set_invalid_move("Invalid claim: no rune present.") + elif content == "[Wait]": + action_result = "waited" + + gs["observation_log"].append( + {"turn": gs["global_turn"], "player": role, "action": content, "result": action_result or "invalid"} + ) + + # Advance global turn + gs["global_turn"] += 1 + + # Terminal condition check + done = self._check_terminal_conditions() + if done: + return True, {} + + return self.state.step() + + # ------------------------------------------------------------------ # + # Terminal conditions + # ------------------------------------------------------------------ # + def _check_terminal_conditions(self) -> bool: + gs = self.state.game_state + turn = gs["global_turn"] + if turn >= gs["turn_limit"]: + self._determine_winner(reason="Turn limit reached.") + return True + # Check if all runes collected + if not any("R" in row for row in gs["maze_layout"]): + self._determine_winner(reason="All runes collected.") + return True + return False + + # ------------------------------------------------------------------ # + # Winner determination + # ------------------------------------------------------------------ # + def _determine_winner(self, reason: str): + gs = self.state.game_state + a, b = gs["players"]["ExplorerA"], gs["players"]["ExplorerB"] + if a["runes_collected"] > b["runes_collected"]: + self.state.set_winner(player_id=0, reason=reason) + elif a["runes_collected"] < b["runes_collected"]: + self.state.set_winner(player_id=1, reason=reason) + else: + core = (self.maze_size // 2, self.maze_size // 2) + dist_a = abs(a["position"][0] - core[0]) + abs(a["position"][1] - core[1]) + dist_b = abs(b["position"][0] - core[0]) + abs(b["position"][1] - core[1]) + if dist_a < dist_b: + self.state.set_winner(player_id=0, reason=reason) + elif dist_b < dist_a: + self.state.set_winner(player_id=1, reason=reason) + else: + self.state.set_draw(reason=reason) + + # ------------------------------------------------------------------ # + # Prompt + # ------------------------------------------------------------------ # + def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str: + role = "ExplorerA" if player_id == 0 else "ExplorerB" + player = game_state["players"][role] + status = ( + f"You are {role}, traversing an ancient shifting labyrinth to gather mystical runes.\n" + f"Turn {game_state['global_turn']} of {game_state['turn_limit']}.\n" + f"You have collected {player['runes_collected']} runes.\n" + ) + surroundings = "Your visible tiles: " + str(player["visible_tiles"]) + "\n" + grammar = ( + "Allowed actions:\n" + "[Move:up], [Move:down], [Move:left], [Move:right]\n" + "[Scan:1–3], [Claim], [Wait]\n" + "Put your final answer within \\boxed{{}} at the end of your response.\n" + "Example valid response:\n" + "I will explore the passage ahead.\n" + "\\boxed{{[Move:right]}}\n" + ) + return status + surroundings + grammar + + # ------------------------------------------------------------------ # + # Boilerplate + # ------------------------------------------------------------------ # + def get_observation(self) -> Tuple[int, List]: + return self.state.current_player_id, [] + def close(self) -> Tuple[Dict, Dict]: + return self.state.rewards, self.state.game_info +``` \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..3559aa9 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,14 @@ +# pyproject.toml + +[project] +name = "game_20251121_094304" +version = "0.1.0" +description = "Game Design Document: **'Maze Conquerors'** environment generated for TextArena." +dependencies = [ + "textarena>=0.7.3" +] + +[openverse] +entry_point = "env:MazeConquerorsEnv" +tags = ["openverse", "generated"] +author = "Openverse"