Add env.py from Openverse builder

2001-01-01 00:00:00 +00:00
commit 0de755f4fe
1 changed files with 223 additions and 0 deletions
--- a/env.py
+++ b/env.py
@@ -0,0 +1,223 @@
+```python
+import re, random
+from typing import Any, Dict, Optional, Tuple
+import textarena as ta
+
+
+class ElementalChampionsEnv(ta.Env):
+    """
+    Implementation of 'Elemental Champions: The Tournament of Triads'
+    as defined in the Stage 1 design document.
+    """
+
+    def __init__(self):
+        # Game constants
+        self.valid_elements = ["Flame", "Tide", "Gale"]
+        self.pattern = re.compile(r"^\[Channel:\s*(Flame|Tide|Gale)\]$")
+        self.max_rounds = 5
+        self.score_to_win = 3
+        self.dominance = {
+            "Flame": "Gale",  # Flame over Gale
+            "Gale": "Tide",   # Gale over Tide
+            "Tide": "Flame",  # Tide over Flame
+        }
+        self.state = None
+
+    def _extract_answer_content(self, action: str) -> str:
+        """Extract content from \\boxed{} for parsing."""
+        match = re.search(r'\\boxed\{\{([^}]*)\}\}', action, re.DOTALL)
+        if match:
+            return match.group(1).strip()
+        # Fallback single bracket variant
+        match2 = re.search(r'\\boxed\{([^}]*)\}', action, re.DOTALL)
+        if match2:
+            return match2.group(1).strip()
+        return action.strip()
+
+    def reset(self, num_players: int, seed: Optional[int] = None):
+        """
+        Resets the environment to an initial state.
+
+        Args:
+            num_players: Number of players (must be 2)
+            seed: Optional seed for deterministic behavior
+        """
+        if num_players != 2:
+            raise ValueError("Elemental Champions is strictly a 2-player game.")
+
+        self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_rounds)
+
+        game_state: Dict[str, Any] = {
+            "seed": seed or random.randint(1, 9999999),
+            "current_round": 0,
+            "max_rounds": self.max_rounds,
+            "score_to_win": self.score_to_win,
+            "duelist_A": {
+                "name": "duelist_A",
+                "essence_points": 0,
+                "last_action": None
+            },
+            "duelist_B": {
+                "name": "duelist_B",
+                "essence_points": 0,
+                "last_action": None
+            },
+            "transcript": [],
+            "winner": None,
+            "is_terminal": False,
+            "invalid_reason": None
+        }
+
+        self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt)
+
+        welcome_message = (
+            "Welcome to the Tournament of Triads! First to 3 Essence Points wins.\n"
+            "Choose your elemental channel each round: Flame, Tide, or Gale."
+        )
+        self.state.add_observation(
+            welcome_message, ta.ObservationType.GAME_MESSAGE, from_id=-1, to_id=-1
+        )
+
+        # Prepare round buffer for simultaneous actions
+        self._round_actions = {0: None, 1: None}
+
+        return self.state
+
+    def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str:
+        """Generates per-player prompts consistent with Stage 1 document."""
+        opponent_id = 1 - player_id
+        player_key = "duelist_A" if player_id == 0 else "duelist_B"
+        opponent_key = "duelist_B" if player_id == 0 else "duelist_A"
+
+        info_section = (
+            f"Current round: {game_state['current_round']} / {game_state['max_rounds']}\n"
+            f"Your Essence Points: {game_state[player_key]['essence_points']}\n"
+            f"Opponent Essence Points: {game_state[opponent_key]['essence_points']}\n"
+        )
+        if game_state[player_key]["last_action"]:
+            info_section += f"Your last channel: {game_state[player_key]['last_action']}\n"
+        if game_state[opponent_key]["last_action"]:
+            info_section += f"Opponent's last channel was: {game_state[opponent_key]['last_action']}\n"
+
+        rules_section = (
+            "You are a mystical duelist in the Tournament of Triads, channeling elemental forces "
+            "of Flame, Tide, and Gale. Each round, select one element to channel.\n\n"
+            "Rules of Elemental Dominance:\n"
+            " - Flame defeats Gale\n"
+            " - Gale defeats Tide\n"
+            " - Tide defeats Flame\n\n"
+            "Use the following exact tokens (placed inside \\boxed{{}}):\n"
+            "[Channel: Flame]\n[Channel: Tide]\n[Channel: Gale]\n\n"
+            "Put your final answer within \\boxed{{}} at the end of your response.\n\n"
+            "Example valid response:\n"
+            "Flame is powerful this turn; I trust its strength.\n"
+            "\\boxed{{[Channel: Flame]}}\n\n"
+            "Example invalid response:\n"
+            "I summon Fire!\n"
+            "\\boxed{{[Cast: Fire]}}\n(reason: invalid token or element)\n"
+        )
+
+        transcript_section = ""
+        if game_state["transcript"]:
+            transcript_lines = []
+            for t in game_state["transcript"]:
+                transcript_lines.append(
+                    f"Round {t['round']}: Duelist A={t['A']} Duelist B={t['B']} → {t['outcome']}"
+                )
+            transcript_section = "Battle Log:\n" + "\n".join(transcript_lines) + "\n\n"
+
+        return info_section + transcript_section + rules_section
+
+    def step(self, action: str) -> Tuple[bool, ta.Info]:
+        """
+        Perform a single environment step for the current player.
+        Handles simultaneous element selection, outcome computation,
+        scoring, and termination.
+        """
+        player_id = self.state.current_player_id
+        player_key = "duelist_A" if player_id == 0 else "duelist_B"
+
+        self.state.add_observation(
+            action, ta.ObservationType.PLAYER_ACTION, from_id=player_id, to_id=-1
+        )
+        action_token = self._extract_answer_content(action)
+
+        # Validate format
+        if not self.pattern.match(action_token):
+            self.state.set_invalid_move(reason="Malformed or unsupported action format.")
+            self.state.game_state["invalid_reason"] = "Malformed or unsupported action format."
+            return self.state.step()
+
+        # Store move
+        game_state = self.state.game_state
+        game_state[player_key]["last_action"] = action_token
+        self._round_actions[player_id] = action_token
+
+        # Wait until both actions received
+        if None in self._round_actions.values():
+            # Not both actions received yet, just advance turn but not round
+            return self.state.step()
+
+        # Both actions are in, resolve the round
+        a_action = self._round_actions[0]
+        b_action = self._round_actions[1]
+        element_A = re.search(r"(Flame|Tide|Gale)", a_action).group(1)
+        element_B = re.search(r"(Flame|Tide|Gale)", b_action).group(1)
+
+        # Determine winner for the round
+        outcome_text = "Draw"
+        if element_A == element_B:
+            outcome_text = "Draw"
+        elif self.dominance[element_A] == element_B:
+            outcome_text = "A wins"
+            game_state["duelist_A"]["essence_points"] += 1
+        elif self.dominance[element_B] == element_A:
+            outcome_text = "B wins"
+            game_state["duelist_B"]["essence_points"] += 1
+
+        # Record transcript
+        game_state["current_round"] += 1
+        game_state["transcript"].append(
+            {
+                "round": game_state["current_round"],
+                "A": a_action,
+                "B": b_action,
+                "outcome": outcome_text,
+            }
+        )
+
+        # Check for tournament winner
+        a_points = game_state["duelist_A"]["essence_points"]
+        b_points = game_state["duelist_B"]["essence_points"]
+
+        if a_points >= self.score_to_win:
+            game_state["winner"] = "duelist_A"
+            game_state["is_terminal"] = True
+            self.state.set_winner(player_id=0, reason="Duelist A reached 3 Essence Points.")
+
+        elif b_points >= self.score_to_win:
+            game_state["winner"] = "duelist_B"
+            game_state["is_terminal"] = True
+            self.state.set_winner(player_id=1, reason="Duelist B reached 3 Essence Points.")
+
+        elif game_state["current_round"] >= game_state["max_rounds"]:
+            # Check for draw or winner by points
+            if a_points > b_points:
+                game_state["winner"] = "duelist_A"
+                game_state["is_terminal"] = True
+                self.state.set_winner(player_id=0, reason="Duelist A has higher Essence Points.")
+            elif b_points > a_points:
+                game_state["winner"] = "duelist_B"
+                game_state["is_terminal"] = True
+                self.state.set_winner(player_id=1, reason="Duelist B has higher Essence Points.")
+            else:
+                game_state["winner"] = "Draw"
+                game_state["is_terminal"] = True
+                self.state.set_draw(reason="Both duelists have equal points after final round.")
+
+        # Reset simultaneous actions buffer for next round
+        self._round_actions = {0: None, 1: None}
+
+        # Proceed to next turn
+        return self.state.step()
+```