commit c1ab3a4227783f1f29c183cf4af92c5accd8cd4e Author: Openverse Builder Date: Mon Jan 1 00:00:00 2001 +0000 Add env.py from Openverse builder diff --git a/env.py b/env.py new file mode 100644 index 0000000..9290537 --- /dev/null +++ b/env.py @@ -0,0 +1,249 @@ +```python +import re +import random +from typing import Any, Dict, Optional, Tuple, List +import textarena as ta + + +class HoneyHeistBattleEnv(ta.Env): + """ + Environment for "Honey Heist: Battle of the Bears". + Two rival bears compete for honey in a turn-based deterministic environment. + """ + + def __init__(self, max_turns: Optional[int] = 20): + self.max_turns = max_turns + # regex patterns for validation + self.patterns = { + "forage": re.compile(r"^\[Forage:(1|2|3)\]$"), + "steal": re.compile(r"^\[Steal:(1|2|3)\]$"), + "defend": re.compile(r"^\[Defend\]$"), + } + + # --------------------------------------------------------------------- # + # Helper for extracting boxed content + # --------------------------------------------------------------------- # + def _extract_answer_content(self, action: str) -> str: + """Extract content inside \boxed{} or return full stripped string.""" + match = re.search(r'\\boxed\{([^}]*)\}', action, re.DOTALL) + if match: + return match.group(1).strip() + return action.strip() + + # --------------------------------------------------------------------- # + # Reset / Initialization + # --------------------------------------------------------------------- # + def reset(self, num_players: int, seed: Optional[int] = None): + """ + Resets the environment to an initial state. + """ + if num_players != 2: + raise ValueError("Honey Heist: Battle of the Bears requires exactly 2 players.") + + # Initialize TwoPlayerState + self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_turns) + + rng = random.Random(seed) + hive_honey = rng.randint(15, 20) + + game_state = { + "turn_number": 1, + "current_player": "BearA", + "hive_honey": hive_honey, + "max_turns": self.max_turns, + "players": { + "BearA": {"stored_honey": 0, "last_action": None, "defending": False, "score": 0}, + "BearB": {"stored_honey": 0, "last_action": None, "defending": False, "score": 0}, + }, + "history": [], + "winner": None, + "draw": False, + "seed": seed if seed is not None else 0, + } + + role_mapping = {0: "BearA", 1: "BearB"} + self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt, role_mapping=role_mapping) + + self.state.add_observation("Welcome to Honey Heist: Battle of the Bears!", ta.ObservationType.GAME_MESSAGE) + self.state.add_observation( + f"The hive contains {hive_honey} units of honey. BearA goes first.", + ta.ObservationType.GAME_MESSAGE + ) + + return self.state + + # --------------------------------------------------------------------- # + # Player Prompt Generator + # --------------------------------------------------------------------- # + def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str: + """Produce role-appropriate prompt for each bear.""" + role = "BearA" if player_id == 0 else "BearB" + rival = "BearB" if role == "BearA" else "BearA" + hive_honey = game_state["hive_honey"] + player_honey = game_state["players"][role]["stored_honey"] + rival_honey = game_state["players"][rival]["stored_honey"] + turn_number = game_state["turn_number"] + max_turns = game_state["max_turns"] + + prompt = f""" +You are a hungry bear competing for the last honey in the forest. + +- Your goal: End the game with more honey than your rival. +- Each turn, choose ONE of the following actions: + [Forage:X] Gather X units (1–3) from the hive. + [Defend] Protect your honey from theft this turn. + [Steal:X] Steal X units (1–3) from your rival if they do not defend. + +Game facts: +- Hive honey remaining: {hive_honey} +- Your stored honey: {player_honey} +- Rival stored honey: {rival_honey} +- Turn {turn_number} / {max_turns} + +Format rule: +State your reasoning briefly, then put your final action in the following format: + +"Put your final answer within \\boxed{{}} at the end of your response." + +Example valid response: +I think foraging is safe early on. +\\boxed{{[Forage:3]}} + +Example invalid response: +\\boxed{{Forage 3}} <-- Must include brackets and colon. +""" + return prompt.strip() + + # --------------------------------------------------------------------- # + # Step - main game logic + # --------------------------------------------------------------------- # + def step(self, action: str) -> Tuple[bool, ta.Info]: + """ + Perform a single environment step for the current player. + """ + player_id = self.state.current_player_id + player_role = "BearA" if player_id == 0 else "BearB" + opponent_role = "BearB" if player_role == "BearA" else "BearA" + + game_state = self.state.game_state + + # record action to transcript + self.state.add_observation(action, ta.ObservationType.PLAYER_ACTION, from_id=player_id, to_id=-1) + + # extract and validate + token = self._extract_answer_content(action) + if game_state["winner"] or game_state["draw"]: + self.state.set_invalid_move("Game is already over.") + return self.state.step() + + valid_action = False + reason = None + if self.patterns["forage"].match(token): + valid_action = True + act_type = "forage" + qty = int(re.findall(r"\d+", token)[0]) + elif self.patterns["steal"].match(token): + valid_action = True + act_type = "steal" + qty = int(re.findall(r"\d+", token)[0]) + elif self.patterns["defend"].match(token): + valid_action = True + act_type = "defend" + qty = 0 + else: + reason = "Invalid format, must use [Forage:X], [Steal:X], or [Defend]." + + if not valid_action: + self.state.set_invalid_move(reason) + return self.state.step() + + # handle action + player_data = game_state["players"][player_role] + opp_data = game_state["players"][opponent_role] + + if act_type == "forage": + if game_state["hive_honey"] < qty: + self.state.set_invalid_move("Not enough honey in hive.") + return self.state.step() + # update hive and player + game_state["hive_honey"] -= qty + player_data["stored_honey"] += qty + player_data["score"] = player_data["stored_honey"] + + elif act_type == "defend": + player_data["defending"] = True + + elif act_type == "steal": + if opp_data["stored_honey"] < qty: + self.state.set_invalid_move("Opponent has insufficient honey.") + return self.state.step() + if opp_data["defending"]: + transfer = 0 # blocked + else: + transfer = qty + opp_data["stored_honey"] -= transfer + player_data["stored_honey"] += transfer + player_data["score"] = player_data["stored_honey"] + opp_data["score"] = opp_data["stored_honey"] + + # update metadata + player_data["last_action"] = token + entry = {"turn": game_state["turn_number"], "actor": player_role, "action": token} + game_state["history"].append(entry) + game_state["current_player"] = opponent_role + + # Next player's defending status reset check + # Every full round (both bears act), clear defending flags + if player_role == "BearB": + game_state["players"]["BearA"]["defending"] = False + game_state["players"]["BearB"]["defending"] = False + + # increment turn number + game_state["turn_number"] += 1 + + # Check terminal conditions + done = False + reason_end = "" + if game_state["hive_honey"] <= 0: + done = True + reason_end = "Hive honey depleted." + elif game_state["turn_number"] > game_state["max_turns"]: + done = True + reason_end = "Maximum turns reached." + else: + total_honey = ( + game_state["hive_honey"] + + game_state["players"]["BearA"]["stored_honey"] + + game_state["players"]["BearB"]["stored_honey"] + ) + if total_honey <= 0: + done = True + reason_end = "All honey depleted." + + if done: + a_honey = game_state["players"]["BearA"]["stored_honey"] + b_honey = game_state["players"]["BearB"]["stored_honey"] + + if a_honey > b_honey: + game_state["winner"] = "BearA" + self.state.set_winner(player_id=0, reason=reason_end + " BearA has more honey.") + elif b_honey > a_honey: + game_state["winner"] = "BearB" + self.state.set_winner(player_id=1, reason=reason_end + " BearB has more honey.") + else: + game_state["draw"] = True + self.state.set_draw(reason=reason_end + " Equal honey scores.") + + return self.state.step() + + # --------------------------------------------------------------------- # + # Standard required Env accessors + # --------------------------------------------------------------------- # + def get_observation(self) -> Tuple[int, List]: + """Return current player's observations.""" + return self.state.current_player_id, self.state.observations + + def close(self) -> Tuple[Dict, Dict]: + """Return rewards and game_info at close.""" + return self.state.rewards, self.state.game_info +``` \ No newline at end of file