Add env.py from Openverse builder

2001-01-01 00:00:00 +00:00
commit d72cb3ce93
1 changed files with 237 additions and 0 deletions
--- a/env.py
+++ b/env.py
@@ -0,0 +1,237 @@
+```python
+import re
+import random
+from typing import Any, Dict, List, Optional, Tuple
+
+import textarena as ta
+
+
+class RunestoneClashEnv(ta.Env):
+    """
+    Environment for "Runestone Clash": a deterministic turn-based two-player grid alignment battle.
+    Players alternate imprinting magical runes ("⚙" for A, "✶" for B) on a 3×3 Stone Circle.
+    The first to align three runes in a straight line wins.
+    """
+
+    def __init__(self, max_turns: int = 9):
+        self.max_turns = max_turns
+        # Compile regexes for quick validation
+        self.imprint_pattern = re.compile(r"^\[Imprint:(1|2|3),(1|2|3)\]$")
+        self.pass_pattern = re.compile(r"^\[Pass\]$")
+
+    # =====================================================
+    # Core Helpers
+    # =====================================================
+    def _extract_answer_content(self, action: str) -> str:
+        """
+        Extract boxed content from \boxed{...} for machine parsing.
+        Falls back to returning the raw trimmed string on parse failure.
+        """
+        match = re.search(r"\\boxed\{([^}]*)\}", action, re.DOTALL)
+        if match:
+            return match.group(1).strip()
+        return action.strip()
+
+    # =====================================================
+    # Initialization
+    # =====================================================
+    def reset(self, num_players: int, seed: Optional[int] = None):
+        """
+        Resets the environment to an initial Runestone Clash state.
+
+        Args:
+            num_players: Number of players (must be 2).
+            seed: Deterministic seed for reproducible starts.
+
+        Returns:
+            None
+        """
+        if num_players != 2:
+            raise ValueError("Runestone Clash requires exactly two players.")
+
+        self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_turns)
+        rng = random.Random(seed)
+        starting_player = rng.choice([0, 1])
+
+        game_state: Dict[str, Any] = {
+            "turn_number": 1,
+            "active_player": "PlayerA" if starting_player == 0 else "PlayerB",
+            "rune_grid": [["" for _ in range(3)] for _ in range(3)],
+            "players": {
+                "PlayerA": {"symbol": "⚙", "imprints": 0, "skips": 0, "status": "active"},
+                "PlayerB": {"symbol": "✶", "imprints": 0, "skips": 0, "status": "active"},
+            },
+            "winner": None,
+            "draw": False,
+            "transcript": [],
+            "seed": seed,
+        }
+
+        # Set manually active player according to chosen start
+        self.state.reset(
+            game_state=game_state,
+            player_prompt_function=self._generate_player_prompt,
+            role_mapping={0: "PlayerA", 1: "PlayerB"}
+        )
+        self.state.manually_set_current_player_id(starting_player)
+
+        self.state.add_observation(
+            message="The Stone Circle hums with latent power. Runemages, prepare to begin.",
+            observation_type=ta.ObservationType.GAME_MESSAGE
+        )
+
+        return None
+
+    # =====================================================
+    # Player Prompt
+    # =====================================================
+    def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str:
+        """
+        Compose the role prompt shown to the current Runemage.
+        """
+        role_name = "Runemage A" if player_id == 0 else "Runemage B"
+        player_key = "PlayerA" if player_id == 0 else "PlayerB"
+        opponent_key = "PlayerB" if player_id == 0 else "PlayerA"
+        player_symbol = game_state["players"][player_key]["symbol"]
+        opponent_symbol = game_state["players"][opponent_key]["symbol"]
+
+        grid = game_state["rune_grid"]
+        display_grid = "\n".join(
+            [
+                "  ".join(f"[{(cell if cell else ' ')}]" for cell in row)
+                for row in grid
+            ]
+        )
+        open_cells = sum(1 for row in grid for c in row if c == "")
+        turn_number = game_state["turn_number"]
+
+        prompt = (
+            f"You are {role_name}, facing your rival in Runestone Clash.\n"
+            f"The current Stone Circle (3×3) state:\n{display_grid}\n"
+            f"Your sigil: {player_symbol}\nOpponent's sigil: {opponent_symbol}\n"
+            f"Turn {turn_number}, open cells remaining: {open_cells}\n\n"
+            f"Allowed actions:\n"
+            f" - [Imprint:x,y] : Imprint your rune at coordinates x,y (1–3) if empty.\n"
+            f" - [Pass] : Skip your turn, only if cells remain.\n"
+            f"Ensure syntax matches exactly (e.g., [Imprint:2,3]).\n\n"
+            "Put your final answer within \\boxed{} at the end of your response.\n\n"
+            "Example valid response:\n"
+            "I will secure the center of the Stone Circle.\n"
+            "\\boxed{[Imprint:2,2]}\n\n"
+            "Example valid response:\n"
+            "The board is tight; I will bide my time.\n"
+            "\\boxed{[Pass]}"
+        )
+        return prompt
+
+    # =====================================================
+    # Step Logic
+    # =====================================================
+    def step(self, action: str) -> Tuple[bool, ta.Info]:
+        """
+        Perform a single environment step for the current player.
+
+        Args:
+            action: Raw text from player action.
+
+        Returns:
+            Tuple (done, info)
+        """
+        # Log the raw message
+        self.state.add_observation(
+            message=action,
+            observation_type=ta.ObservationType.PLAYER_ACTION,
+            from_id=self.state.current_player_id,
+            to_id=-1
+        )
+        current_id = self.state.current_player_id
+        current_player_key = "PlayerA" if current_id == 0 else "PlayerB"
+        opponent_player_key = "PlayerB" if current_id == 0 else "PlayerA"
+        game_state = self.state.game_state
+
+        # Extract boxed content and validate
+        parsed_action = self._extract_answer_content(action)
+
+        match_imprint = self.imprint_pattern.match(parsed_action)
+        match_pass = self.pass_pattern.match(parsed_action)
+        grid = game_state["rune_grid"]
+
+        def check_full_grid(g):
+            return all(c != "" for row in g for c in row)
+
+        # -------------------- VALIDATION --------------------
+        if not (match_imprint or match_pass):
+            self.state.set_invalid_move("Invalid syntax: does not match required action pattern.")
+            return self.state.step()
+
+        if match_imprint:
+            x, y = int(match_imprint.group(1)), int(match_imprint.group(2))
+            if not (1 <= x <= 3 and 1 <= y <= 3):
+                self.state.set_invalid_move(f"Invalid coordinates: cell ({x},{y}) is outside grid boundaries.")
+                return self.state.step()
+            if grid[x - 1][y - 1] != "":
+                self.state.set_invalid_move("Cell already claimed by another rune.")
+                return self.state.step()
+
+            # Perform imprint
+            symbol = game_state["players"][current_player_key]["symbol"]
+            grid[x - 1][y - 1] = symbol
+            game_state["players"][current_player_key]["imprints"] += 1
+
+            game_state["transcript"].append({"player": current_player_key, "action": f"[Imprint:{x},{y}]"})
+            self.state.add_observation(
+                message=f"{current_player_key} imprinted a rune at ({x},{y}).",
+                observation_type=ta.ObservationType.GAME_MESSAGE
+            )
+
+        elif match_pass:
+            if check_full_grid(grid):
+                self.state.set_invalid_move("Cannot pass: grid fully imprinted.")
+                return self.state.step()
+            game_state["players"][current_player_key]["skips"] += 1
+            game_state["transcript"].append({"player": current_player_key, "action": "[Pass]"})
+            self.state.add_observation(
+                message=f"{current_player_key} chose to pass this turn.",
+                observation_type=ta.ObservationType.GAME_MESSAGE
+            )
+
+        # -------------------- GAME STATE UPDATE --------------------
+        game_state["turn_number"] += 1
+
+        # -------------------- WIN CHECK --------------------
+        def check_win(symbol: str) -> bool:
+            g = grid
+            # Rows, columns
+            for i in range(3):
+                if g[i][0] == g[i][1] == g[i][2] == symbol and symbol != "":
+                    return True
+                if g[0][i] == g[1][i] == g[2][i] == symbol and symbol != "":
+                    return True
+            # Diagonals
+            if g[0][0] == g[1][1] == g[2][2] == symbol and symbol != "":
+                return True
+            if g[0][2] == g[1][1] == g[2][0] == symbol and symbol != "":
+                return True
+            return False
+
+        current_symbol = game_state["players"][current_player_key]["symbol"]
+        if check_win(current_symbol):
+            game_state["winner"] = current_player_key
+            game_state["players"][current_player_key]["status"] = "won"
+            game_state["players"][opponent_player_key]["status"] = "lost"
+            self.state.set_winner(player_id=current_id, reason=f"{current_player_key} aligned three runes and harnessed the Stone Circle!")
+            return self.state.step()
+
+        # -------------------- DRAW CHECK --------------------
+        if check_full_grid(grid) or game_state["turn_number"] > 9:
+            game_state["draw"] = True
+            self.state.set_draw(reason="The Stone Circle is filled; no alignment achieved.")
+            return self.state.step()
+
+        # -------------------- NEXT TURN --------------------
+        next_player = (current_id + 1) % 2
+        game_state["active_player"] = "PlayerA" if next_player == 0 else "PlayerB"
+        self.state.manually_set_current_player_id(next_player)
+
+        return self.state.step()
+```