From d6f8ed4bf86b0811042f725df68cf12e226bf8cf Mon Sep 17 00:00:00 2001
From: bobbycxy <bobbycxy1994@gmail.com>
Date: Fri, 21 Nov 2025 06:58:15 +0000
Subject: [PATCH] Initial commit from Openverse UI (via Openverse CLI)

---
 README.md      |   2 +
 env.py         | 278 +++++++++++++++++++++++++++++++++++++++++++++++++
 environment.md | 215 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 495 insertions(+)
 create mode 100644 README.md
 create mode 100644 env.py
 create mode 100644 environment.md

diff --git a/README.md b/README.md
new file mode 100644
index 0000000..2d8c0d2
--- /dev/null
+++ b/README.md
@@ -0,0 +1,2 @@
+# testtest2
+Generated via Openverse
\ No newline at end of file
diff --git a/env.py b/env.py
new file mode 100644
index 0000000..f184066
--- /dev/null
+++ b/env.py
@@ -0,0 +1,278 @@
+```python
+import re
+import random
+from typing import Any, Dict, List, Optional, Tuple
+
+import textarena as ta
+
+
+class LabyrinthCommandEnv(ta.Env):
+    """
+    Deterministic, turn-based two-player tactical maze environment: "Labyrinth Command"
+    Two players (Explorer A and B) move through a deterministic maze to reach the Central Beacon.
+    """
+
+    def __init__(self, max_turns: int = 40, maze_width: int = 7, maze_height: int = 7):
+        self.max_turns = max_turns
+        self.maze_width = maze_width
+        self.maze_height = maze_height
+        self.move_pattern = re.compile(r"^\[Move:(North|South|East|West)\]$")
+        self.scan_pattern = re.compile(r"^\[Scan\]$")
+        self.wait_pattern = re.compile(r"^\[Wait\]$")
+
+    # -------------------------------------------------------------------------
+    # ========== Helper: Extract boxed command ==========
+    def _extract_answer_content(self, action: str) -> str:
+        """Extract content within \\boxed{{...}}."""
+        match = re.search(r"\\boxed\{\{(.*?)\}\}", action, re.DOTALL)
+        if match:
+            return match.group(1).strip()
+        match = re.search(r"\\boxed\{(.*?)\}", action, re.DOTALL)
+        if match:
+            return match.group(1).strip()
+        return action.strip()
+
+    # -------------------------------------------------------------------------
+    # ========== Maze and visibility helpers ==========
+    def _generate_deterministic_maze(self, seed: int) -> List[List[str]]:
+        """Generate deterministic maze using random seeded layout of blocked cells."""
+        random.seed(seed)
+        maze = [["." for _ in range(self.maze_width)] for _ in range(self.maze_height)]
+        num_blocks = (self.maze_width * self.maze_height) // 10  # about 10% blocked
+        for _ in range(num_blocks):
+            x = random.randint(0, self.maze_width - 1)
+            y = random.randint(0, self.maze_height - 1)
+            if (x, y) != (0, 0) and (x, y) != (self.maze_width - 1, self.maze_height - 1):
+                maze[y][x] = "X"
+        return maze
+
+    def _compute_visible_map(self, maze: List[List[str]], pos: Tuple[int, int]) -> List[List[str]]:
+        """Compute a 3x3 visible map centered on pos."""
+        visible = []
+        for dy in range(-1, 2):
+            row = []
+            for dx in range(-1, 2):
+                nx, ny = pos[0] + dx, pos[1] + dy
+                if 0 <= nx < self.maze_width and 0 <= ny < self.maze_height:
+                    row.append(maze[ny][nx])
+                else:
+                    row.append("?")
+            visible.append(row)
+        return visible
+
+    def _distance(self, a: Tuple[int, int], b: Tuple[int, int]) -> int:
+        return abs(a[0] - b[0]) + abs(a[1] - b[1])
+
+    # -------------------------------------------------------------------------
+    # ========== Reset ==========
+    def reset(self, num_players: int, seed: Optional[int] = None):
+        """
+        Resets the environment to an initial state.
+
+        Args:
+            num_players: must be 2.
+            seed: optional deterministic seed.
+        """
+        if num_players != 2:
+            raise ValueError("Labyrinth Command requires exactly 2 players.")
+
+        seed = seed if seed is not None else random.randint(1, 999999)
+        self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_turns)
+        maze = self._generate_deterministic_maze(seed)
+        beacon_pos = (self.maze_width // 2, self.maze_height // 2)
+        maze[beacon_pos[1]][beacon_pos[0]] = "B"
+
+        start_A = (0, 0)
+        start_B = (self.maze_width - 1, self.maze_height - 1)
+
+        player_states = {
+            "A": {
+                "position": start_A,
+                "visible_map": self._compute_visible_map(maze, start_A),
+                "visited_cells": [list(start_A)],
+                "last_action": None,
+            },
+            "B": {
+                "position": start_B,
+                "visible_map": self._compute_visible_map(maze, start_B),
+                "visited_cells": [list(start_B)],
+                "last_action": None,
+            },
+        }
+
+        cells_blocked = [[x, y] for y in range(self.maze_height) for x in range(self.maze_width) if maze[y][x] == "X"]
+
+        game_state = {
+            "seed": seed,
+            "turn_index": 0,
+            "max_turns": self.max_turns,
+            "maze_width": self.maze_width,
+            "maze_height": self.maze_height,
+            "beacon_position": list(beacon_pos),
+            "cells_blocked": cells_blocked,
+            "player_states": player_states,
+            "transcript": [],
+            "winner": None,
+            "terminated": False,
+        }
+
+        self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt)
+        self.state.add_observation(message="Welcome to Labyrinth Command!", observation_type=ta.ObservationType.GAME_MESSAGE)
+        self.state.add_observation(message=f"Seed: {seed} ensures deterministic maze generation.", observation_type=ta.ObservationType.GAME_MESSAGE)
+        return self.state
+
+    # -------------------------------------------------------------------------
+    # ========== Step ==========
+    def step(self, action: str) -> Tuple[bool, ta.Info]:
+        """
+        Perform a single environment step for the current player.
+        """
+        # log the player action
+        self.state.add_observation(action, ta.ObservationType.PLAYER_ACTION, from_id=self.state.current_player_id, to_id=-1)
+        player_id = self.state.current_player_id
+        player_label = "A" if player_id == 0 else "B"
+        opponent_label = "B" if player_label == "A" else "A"
+
+        if self.state.done:
+            self.state.set_invalid_move("Game already finished.")
+            return self.state.step()
+
+        answer = self._extract_answer_content(action)
+        gs = self.state.game_state
+        player_state = gs["player_states"][player_label]
+        opponent_state = gs["player_states"][opponent_label]
+        current_pos = tuple(player_state["position"])
+        beacon = tuple(gs["beacon_position"])
+
+        # Validate action syntax
+        if not (self.move_pattern.match(answer) or self.scan_pattern.match(answer) or self.wait_pattern.match(answer)):
+            self.state.set_invalid_move(reason="Invalid token format.")
+            return self.state.step()
+
+        new_pos = current_pos
+        maze_width, maze_height = gs["maze_width"], gs["maze_height"]
+        blocked = set(tuple(cell) for cell in gs["cells_blocked"])
+
+        # execute move if movement
+        if answer.startswith("[Move:"):
+            direction = answer[len("[Move:"):-1]
+            dx, dy = 0, 0
+            if direction == "North":
+                dy = -1
+            elif direction == "South":
+                dy = 1
+            elif direction == "West":
+                dx = -1
+            elif direction == "East":
+                dx = 1
+            nx, ny = current_pos[0] + dx, current_pos[1] + dy
+            if not (0 <= nx < maze_width and 0 <= ny < maze_height):
+                self.state.set_invalid_move("Move out of bounds")
+                return self.state.step()
+            if (nx, ny) in blocked:
+                self.state.set_invalid_move("Cell blocked")
+                return self.state.step()
+            new_pos = (nx, ny)
+            player_state["position"] = list(new_pos)
+            player_state["visited_cells"].append(list(new_pos))
+            player_state["visible_map"] = self._compute_visible_map(
+                [["X" if [x, y] in gs["cells_blocked"] else "." for x in range(maze_width)] for y in range(maze_height)],
+                new_pos,
+            )
+        elif answer == "[Scan]":
+            player_state["visible_map"] = self._compute_visible_map(
+                [["X" if [x, y] in gs["cells_blocked"] else "." for x in range(maze_width)] for y in range(maze_height)],
+                current_pos,
+            )
+        elif answer == "[Wait]":
+            pass  # do nothing
+
+        player_state["last_action"] = answer
+        gs["transcript"].append({"player": player_label, "action": answer})
+        gs["turn_index"] += 1
+
+        # ===== Check terminal conditions =====
+        reached_A = tuple(gs["player_states"]["A"]["position"]) == beacon
+        reached_B = tuple(gs["player_states"]["B"]["position"]) == beacon
+
+        if reached_A and reached_B:
+            self.state.set_draw(reason="Both players reached the Beacon simultaneously.")
+            gs["winner"] = "Draw"
+            gs["terminated"] = True
+            return self.state.step()
+        elif reached_A:
+            self.state.set_winner(player_id=0, reason="Explorer A reached the Beacon first.")
+            gs["winner"] = "A"
+            gs["terminated"] = True
+            return self.state.step()
+        elif reached_B:
+            self.state.set_winner(player_id=1, reason="Explorer B reached the Beacon first.")
+            gs["winner"] = "B"
+            gs["terminated"] = True
+            return self.state.step()
+
+        # Check turn limit
+        if self.state.check_turn_limit():
+            posA = tuple(gs["player_states"]["A"]["position"])
+            posB = tuple(gs["player_states"]["B"]["position"])
+            distA = self._distance(posA, beacon)
+            distB = self._distance(posB, beacon)
+            if distA < distB:
+                self.state.set_winner(player_id=0, reason="Explorer A is closer to Beacon at turn limit.")
+                gs["winner"] = "A"
+            elif distB < distA:
+                self.state.set_winner(player_id=1, reason="Explorer B is closer to Beacon at turn limit.")
+                gs["winner"] = "B"
+            else:
+                self.state.set_draw(reason="Both explorers equally distant at turn limit.")
+                gs["winner"] = "Draw"
+            gs["terminated"] = True
+
+        return self.state.step()
+
+    # -------------------------------------------------------------------------
+    # ========== Prompt ==========
+    def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str:
+        """Generate player prompt based on Stage 1 design."""
+        player_label = "A" if player_id == 0 else "B"
+        state = game_state["player_states"][player_label]
+        pos = state["position"]
+        visible_map = "\n".join([" ".join(row) for row in state["visible_map"]])
+        turn_index = game_state["turn_index"]
+        max_turns = game_state["max_turns"]
+        opponent_label = "B" if player_label == "A" else "A"
+        last_opp_action = (
+            game_state["player_states"][opponent_label]["last_action"] or "None yet"
+        )
+
+        prompt = f"""
+You are Explorer {player_label} navigating the labyrinth. Your goal is to reach the Central Beacon before your rival.
+Each turn you may issue one command from this action grammar:
+
+[Move:North] | [Move:South] | [Move:East] | [Move:West] | [Scan] | [Wait]
+
+Remember:
+- Maze bounds are 0 ≤ x < {game_state['maze_width']}, 0 ≤ y < {game_state['maze_height']}.
+- Moving into blocked walls ('X') or out of bounds is invalid.
+- The beacon lies at the labyrinth’s center at {game_state['beacon_position']}.
+- You must wrap your command inside \\boxed{{}}.
+
+Current turn: {turn_index}/{max_turns}
+Your current position: {pos}
+Your visible 3×3 map:
+{visible_map}
+
+Your opponent’s last known action: {last_opp_action}
+
+Example valid response:
+I want to go north toward the Beacon.
+\\boxed{{[Move:North]}}
+
+Example invalid response:
+Let's go northeast! ← invalid direction keyword
+
+Now choose your next command carefully.
+Put your final answer within \\boxed{{}} at the end of your response.
+""".strip()
+        return prompt
+```
\ No newline at end of file
diff --git a/environment.md b/environment.md
new file mode 100644
index 0000000..79526d7
--- /dev/null
+++ b/environment.md
@@ -0,0 +1,215 @@
+# **Game Design Document: “Labyrinth Command”**
+
+---
+
+## 1. Concept Paragraph
+
+**“Labyrinth Command”** is a deterministic, turn-based two-player tactical maze exploration game. Two rival explorers are trapped inside a grid-shaped labyrinth and must reach the **Central Beacon** at the maze’s heart before their opponent. Each turn, players issue one command from a fixed grammar of movement and interaction tokens (e.g., `[Move:North]`, `[Scan]`, `[Wait]`). The maze layout, beacon position, and obstacles are generated deterministically from a single seed, ensuring reproducibility. The game is **not** related to any economic, negotiation, or resource-trading example—its theme focuses purely on spatial logic and exploration within a confined environment.
+
+---
+
+## 2. Roles and Win Condition
+
+**Roles**
+- **Explorer A** and **Explorer B** are rival adventurers in identical labyrinth conditions.
+- Both start at distinct, opposite corners of the maze.
+
+**Objectives**
+- Reach the **Central Beacon Cell (B)** before the opponent.
+- A secondary scoring system tracks proximity to the Beacon at game end if neither player reaches it within the turn limit.
+
+**Win Rule**
+1. A player *wins immediately* if they enter the Beacon cell first.
+2. If both reach simultaneously on the same turn: **Draw**.
+3. If turn limit expires with no beacon reached: player closer (Manhattan distance) to the Beacon **wins**.
+4. If both are equally distant: **Draw**.
+
+---
+
+## 3. Turn Structure and Determinism
+
+- The game proceeds in **alternating turns**, starting with Explorer A.
+- Each turn = one player action followed by environment update and opponent observation.
+- **Turn limit:** 20 turns per player (40 total).
+- Maze generation and beacon placement use a **seed** value set at `reset`, guaranteeing fully deterministic structure and outcomes for identical seeds.
+- All elements of randomness (e.g., obstacle positions) derive from this same seed.
+
+---
+
+## 4. Action Grammar (Machine-Parseable)
+
+**Allowed Action Tokens (case-sensitive):**
+
+| Token Pattern | Meaning |
+|----------------|----------|
+| `[Move:Direction]` | Move one cell in a cardinal direction (`North`, `South`, `East`, `West`) if not blocked. |
+| `[Scan]` | Reveal contents of adjacent cells to update the player’s visible map. |
+| `[Wait]` | Skip the move, useful for strategic timing. |
+
+**Formal Patterns (Regex-style):**
+1. `^\\[Move:(North|South|East|West)\\]$`
+2. `^\\[Scan\\]$`
+3. `^\\[Wait\\]$`
+
+**Examples**
+
+| Action | Validity | Explanation |
+|--------|-----------|-------------|
+| `[Move:North]` | ✅ Valid | Matches move pattern |
+| `[Scan]` | ✅ Valid | Matches scan pattern |
+| `[Wait]` | ✅ Valid | Matches wait pattern |
+| `[Move:Northeast]` | ❌ Invalid | Direction not allowed |
+| `[move:North]` | ❌ Invalid | Case-sensitive mismatch |
+| `[Attack]` | ❌ Invalid | Unsupported token |
+
+---
+
+## 5. Game State Schema
+
+```json
+{
+  "seed": 18457,
+  "turn_index": 6,
+  "max_turns": 40,
+  "maze_width": 7,
+  "maze_height": 7,
+  "beacon_position": [3, 3],
+  "cells_blocked": [[0,1],[2,2],[4,5]],
+  "player_states": {
+    "A": {
+      "position": [0,0],
+      "visible_map": [["?", "X", "?", "?"],["?", ".", ".", "?"],["?", "?", ".", "?"]],
+      "visited_cells": [[0,0],[1,0]],
+      "last_action": "[Move:South]"
+    },
+    "B": {
+      "position": [6,6],
+      "visible_map": [["?", ".", "?"],[".", ".", "?"],["?", "?", "?"]],
+      "visited_cells": [[6,6]],
+      "last_action": "[Scan]"
+    }
+  },
+  "transcript": [
+    {"player":"A", "action":"[Move:South]"},
+    {"player":"B", "action":"[Scan]"}
+  ],
+  "winner": null,
+  "terminated": false
+}
+```
+
+---
+
+## 6. Initialization Rules
+
+- Maze layout generated through seeded deterministic algorithm (`seed` provided or auto-generated).
+- Both players placed:
+  - Explorer A → top-left corner `[0,0]`
+  - Explorer B → bottom-right corner `[width-1,height-1]`
+- Beacon placed at center `(width//2, height//2)`.
+- `visible_map` initialized with limited visibility: only 3×3 region around player marked or unknown.
+- At `reset`, each player receives:
+  - Maze dimensions
+  - Starting coordinates
+  - Number of turns and win condition summary
+
+---
+
+## 7. Validation and Error Handling
+
+**Invalid Move Detection Rules**
+- Action not matching one of the defined regex patterns → `Invalid token format`
+- Action would move explorer outside maze bounds → `Move out of bounds`
+- Action would move explorer into blocked cell → `Cell blocked`
+- Any attempt made after terminal state → `Game already finished`
+
+System calls `set_invalid_move(player, reason)` upon detection.
+
+---
+
+## 8. Terminal Conditions and Scoring
+
+**Terminal Triggers**
+1. Player enters the Beacon cell → Win for that player.
+2. Both reach Beacon simultaneously → Draw.
+3. Turn limit reached → Compare distance to Beacon.
+   - Smaller Manhattan distance → Win.
+   - Equal → Draw.
+
+**Scoring Computation**
+- Winner gets `1`, loser `0`, draw `0.5`.
+- Stored in `winner` key as `"A"`, `"B"`, or `"Draw"`.
+
+---
+
+## 9. Player Prompt Specification
+
+**Prompt Content Outline**
+- Game title and theme summary
+- Player’s identity (Explorer A or B)
+- Current turn number and limits
+- Player’s current position, visible map grid, and last known opponent action
+- List of allowable command formats
+- Reminder to place final command inside `\boxed{{}}`
+- Examples of valid vs invalid formatting
+
+**Prompt Example**
+```
+You are Explorer A navigating the labyrinth. Your goal is to reach the Central Beacon before your rival. 
+You can issue ONE command per turn using the following grammar:
+
+[Move:North] | [Move:South] | [Move:East] | [Move:West] | [Scan] | [Wait]
+
+Remember: 
+- Moving into blocked walls or out of bounds is invalid.
+- The beacon lies at the labyrinth’s center.
+- You must wrap your command inside \\boxed{{}}.
+
+Example valid response:
+I want to go north to advance toward the beacon.
+\boxed{{[Move:North]}}
+
+Example invalid response:
+Let’s head northeast.        ← invalid direction keyword
+
+Now it is your turn. Choose your next command carefully.
+Put your final answer within \\boxed{{}} at the end of your response.
+```
+
+**Helper:** `_extract_answer_content(self, action: str) -> str`  
+Extracts the content enclosed by `\boxed{{...}}` for validation and execution.
+
+---
+
+## 10. API Mapping Plan
+
+**reset()**
+- Generate deterministic maze grid based on seed.
+- Initialize all fields of `game_state` per schema.
+- Return initial observation for each player, including map visibility and rules summary.
+
+**step(player_action)**
+- Use `_extract_answer_content` to unwrap the boxed token.
+- Validate with grammar and state constraints.
+- If invalid → call `set_invalid_move`.
+- If valid → mutate player position/visibility, append to `transcript`.
+- Perform terminal condition checks after each move; update `winner` and `terminated` appropriately.
+- Return resulting state observation and game status.
+
+**_generate_player_prompt(player_id)**
+- Construct text prompt per section 9.
+- Include available moves, last opponent move, remaining turns, and map details.
+- Append "Put your final answer within \\boxed{{}} at the end of your response."
+
+---
+
+## 11. Copy-Check Against the Example
+
+- The **Labyrinth Command** game has an *exploration and spatial logic* theme, **not** negotiation, trade, or economy-related.  
+- All entities—**maze**, **beacon**, **blocked cells**, and **explorers**—are original constructs.  
+- Action tokens `[Move:…]`, `[Scan]`, `[Wait]`, and state keys (`beacon_position`, `cells_blocked`, `visible_map`) are unique to this design.
+- No resource exchanges, offers, or bargaining are present.
+
+---
+
+**End of Design Document – “Labyrinth Command”**
\ No newline at end of file