From 24ad4e6214f13693a9ae5ceb72b1389fc9b0d8c5 Mon Sep 17 00:00:00 2001
From: admin <bobbycxy1994@gmail.com>
Date: Mon, 1 Jan 2001 00:00:00 +0000
Subject: [PATCH] Add env.py from Openverse builder

---
 env.py | 268 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 268 insertions(+)
 create mode 100644 env.py
diff --git a/env.py b/env.py
new file mode 100644
index 0000000..7f69be0
--- /dev/null
+++ b/env.py
@@ -0,0 +1,268 @@
+```python
+import re
+import random
+from typing import Any, Dict, Optional, Tuple, List
+
+import textarena as ta
+
+
+class GlyphGridDuelEnv(ta.Env):
+    """
+    GlyphGrid Duel: a deterministic two-player abstract logic game.
+
+    Players alternate inscribing glyphs ("X" or "O") on a 3×3 grid.
+    The first to align three identical glyphs along any row, column, or diagonal wins.
+    """
+
+    VALID_ACTION_PATTERN = re.compile(r"^\[Inscribe:(1|2|3),(1|2|3)\]$")
+
+    def __init__(self):
+        """Initialize reusable attributes."""
+        self.state: Optional[ta.TwoPlayerState] = None
+
+    # -------------------------------------------------------------------------
+    # Helper: Extract content inside <answer> tags
+    # -------------------------------------------------------------------------
+    def _extract_answer_content(self, action: str) -> str:
+        """
+        Extract content from <answer></answer> tags.
+        If tags are missing, fallback to stripping the entire action string.
+        """
+        match = re.search(r"<answer>(.*?)</answer>", action, re.DOTALL | re.IGNORECASE)
+        if match:
+            return match.group(1).strip()
+        return action.strip()
+
+    # -------------------------------------------------------------------------
+    # Reset environment
+    # -------------------------------------------------------------------------
+    def reset(self, num_players: int, seed: Optional[int] = None):
+        """
+        Resets the environment to an initial state.
+
+        Args:
+            num_players: Number of players (must be 2 for GlyphGrid Duel).
+            seed: Optional seed for deterministic behavior.
+
+        Notes:
+            • Must construct a State object and pass game_state and player_prompt_function
+            • Must call self.state.reset(...)
+            • Should emit initial observations if helpful
+        """
+        if num_players != 2:
+            raise ValueError("GlyphGrid Duel requires exactly 2 players.")
+
+        if seed is None:
+            seed = random.randint(0, 99999)
+
+        # Create a reproducible state manager
+        self.state = ta.TwoPlayerState(num_players=num_players, seed=seed)
+
+        # Determine which player starts (based on seed parity)
+        starting_player_id = 0 if seed % 2 == 0 else 1
+        starting_player_name = f"Player {starting_player_id + 1}"
+
+        # Construct initial 3x3 board (each cell empty string)
+        board = [["" for _ in range(3)] for _ in range(3)]
+
+        # Define player info
+        players = {
+            "Player 1": {"symbol": "X", "moves_made": 0},
+            "Player 2": {"symbol": "O", "moves_made": 0},
+        }
+
+        # Build game_state dictionary
+        game_state: Dict[str, Any] = {
+            "turn_count": 0,
+            "current_player": starting_player_name,
+            "seed": seed,
+            "board": board,
+            "players": players,
+            "winner": None,
+            "is_terminal": False,
+            "last_action": None,
+            "observation_log": [f"{starting_player_name} begins the glyph duel."],
+        }
+
+        # Initialize internal environment state
+        self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt)
+
+        # Initial observations
+        self.state.add_observation(
+            from_id=-1,
+            message=f"{starting_player_name} begins the glyph duel.",
+            observation_type=ta.ObservationType.GAME_MESSAGE,
+        )
+        self.state.add_observation(
+            from_id=-1,
+            message=self._render_board(board),
+            observation_type=ta.ObservationType.GAME_BOARD,
+        )
+
+        return self.state.game_state
+
+    # -------------------------------------------------------------------------
+    # Board and State Helpers
+    # -------------------------------------------------------------------------
+    def _render_board(self, board: List[List[str]]) -> str:
+        """Format the 3×3 board for display."""
+        header = "   1   2   3"
+        rows = []
+        for i, row in enumerate(board):
+            cells = [cell if cell else "." for cell in row]
+            rows.append(f"{i+1}  " + " | ".join(cells))
+        return f"{header}\n" + "\n".join(rows)
+
+    def _check_winner(self, symbol: str, board: List[List[str]]) -> bool:
+        """Check if the given symbol has three in a row (row, column, diagonal)."""
+        # Rows and columns
+        for i in range(3):
+            if all(board[i][j] == symbol for j in range(3)):
+                return True
+            if all(board[j][i] == symbol for j in range(3)):
+                return True
+        # Diagonals
+        if all(board[i][i] == symbol for i in range(3)):
+            return True
+        if all(board[i][2 - i] == symbol for i in range(3)):
+            return True
+        return False
+
+    def _is_board_full(self, board: List[List[str]]) -> bool:
+        """Return True if no empty cells remain."""
+        return all(cell != "" for row in board for cell in row)
+
+    # -------------------------------------------------------------------------
+    # Step Action
+    # -------------------------------------------------------------------------
+    def step(self, action: str) -> Tuple[bool, ta.Info]:
+        """
+        Perform a single environment step for the current player.
+
+        Args:
+            action: The action text submitted by the current player.
+
+        Returns:
+            A tuple (done, info) where:
+                done: True if the episode has concluded
+                info: A ta.Info object with auxiliary details
+        """
+        player_id = self.state.current_player_id
+        player_name = f"Player {player_id + 1}"
+
+        # Log player's raw action
+        self.state.add_observation(
+            from_id=player_id,
+            to_id=-1,
+            message=action,
+            observation_type=ta.ObservationType.PLAYER_ACTION,
+        )
+
+        # Extract meaningful token content
+        answer_content = self._extract_answer_content(action)
+
+        # Validate action format
+        if not self.VALID_ACTION_PATTERN.match(answer_content):
+            self.state.set_invalid_move(reason="Invalid action format. Must match [Inscribe:x,y].")
+            return self.state.step()
+
+        # Parse coordinates
+        x_str, y_str = re.findall(r"(1|2|3)", answer_content)
+        x, y = int(x_str) - 1, int(y_str) - 1
+
+        # Access current game_state
+        g = self.state.game_state
+        board = g["board"]
+
+        # Check if cell already occupied
+        if board[x][y] != "":
+            self.state.set_invalid_move(reason="Cell already occupied.")
+            return self.state.step()
+
+        # Check current player
+        if g["current_player"] != player_name:
+            self.state.set_invalid_move(reason="Not your turn.")
+            return self.state.step()
+
+        # Apply move
+        symbol = g["players"][player_name]["symbol"]
+        board[x][y] = symbol
+        g["players"][player_name]["moves_made"] += 1
+        g["turn_count"] += 1
+        g["last_action"] = answer_content
+        g["observation_log"].append(f"{player_name} placed at ({x+1},{y+1})")
+
+        # Add observation for move and board update
+        self.state.add_observation(
+            from_id=player_id,
+            message=f"{player_name} inscribed glyph '{symbol}' at ({x+1},{y+1})",
+            observation_type=ta.ObservationType.GAME_MESSAGE,
+        )
+        self.state.add_observation(
+            from_id=-1,
+            message=self._render_board(board),
+            observation_type=ta.ObservationType.GAME_BOARD,
+        )
+
+        # Check win condition
+        if self._check_winner(symbol, board):
+            g["winner"] = player_name
+            g["is_terminal"] = True
+            self.state.set_winner(player_id=player_id, reason=f"{player_name} aligned three glyphs and won the duel.")
+            return self.state.step()
+
+        # Check draw condition
+        if self._is_board_full(board):
+            g["winner"] = "Draw"
+            g["is_terminal"] = True
+            self.state.set_draw(reason="The grid is full. The duel ends in a draw.")
+            return self.state.step()
+
+        # Switch turns
+        next_player_id = 1 - player_id
+        g["current_player"] = f"Player {next_player_id + 1}"
+
+        # End step (non-terminal)
+        return self.state.step()
+
+    # -------------------------------------------------------------------------
+    # Prompt Generation
+    # -------------------------------------------------------------------------
+    def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str:
+        """
+        Produce role-appropriate instructions for a player at episode start.
+
+        Args:
+            player_id: The integer ID of the player.
+            game_state: The shared game state.
+
+        Returns:
+            A string prompt describing the current state, rules, and expected format.
+        """
+        player_name = f"Player {player_id + 1}"
+        symbol = game_state["players"][player_name]["symbol"]
+        board_str = self._render_board(game_state["board"])
+        current_turn_name = game_state["current_player"]
+
+        prompt = (
+            f"You are {player_name}, bearer of the glyph '{symbol}', in the abstract digital arena.\n"
+            "Your goal is to align three of your runes (glyphs) in a straight line—row, column, or diagonal—before your opponent does.\n\n"
+            f"Current arena state:\n{board_str}\n\n"
+            f"It is currently {current_turn_name}'s turn.\n"
+            "On your turn, inscribe your glyph in any unoccupied cell.\n\n"
+            "Action grammar (must be exact): [Inscribe:x,y]\n"
+            "  - x, y ∈ {1, 2, 3}\n"
+            "  - Example: [Inscribe:2,3] inscribes at row 2, column 3.\n\n"
+            "Formatting rules:\n"
+            "  - Put private reasoning inside <think></think>.\n"
+            "  - Put your chosen action inside <answer></answer>.\n\n"
+            "Example valid response:\n"
+            "<think>I will take the center to prepare a diagonal line.</think>\n"
+            "<answer>[Inscribe:2,2]</answer>\n\n"
+            "Example invalid response:\n"
+            "<think>I'll use a lowercase tag.</think>\n"
+            "<answer>[inscribe:2,2]</answer>  <-- Invalid keyword\n"
+        )
+
+        return prompt
+```
\ No newline at end of file