commit 6580f8e929c9cea116b74ea163ea63674d174a65 Author: bobbycxy Date: Tue Nov 25 13:02:22 2025 +0000 Initial commit from Openverse UI diff --git a/README.md b/README.md new file mode 100644 index 0000000..28cb967 --- /dev/null +++ b/README.md @@ -0,0 +1,248 @@ +# GAME DESIGN DOCUMENT — *“DUEL OF SIGNS: A ROCK–PAPER–SCISSORS TOURNAMENT”* + +--- + +## 1. Concept Paragraph + +**Concept:** +*Duel of Signs* is a deterministic, text-based tournament between two contenders — called **Signmasters** — who compete through successive matches of an expanded **Rock–Paper–Scissors** variant. The universe is a ceremonial arena where hand-sign duels determine dominance. Each player secretly chooses one of three signs (`[Rock]`, `[Paper]`, `[Scissors]`) per round. The goal is to win a majority of rounds in a predefined number of matches. Players can also invoke the `[Predict:]` token to gamble on the opponent’s next move for bonus points. This design is **entirely unrelated to any negotiation or trading scenario**; it contains no resource bargaining, currency, or offers — only symbolic duels and prediction tactics. + +--- + +## 2. Roles and Win Condition + +**Roles:** +- **Player A:** Signmaster 1 +- **Player B:** Signmaster 2 + +Each behaves symmetrically. + +**Primary Objective:** +Earn more **tournament points** than the opponent after all rounds have concluded. + +**Scoring:** +- Win a duel → +2 points +- Draw a duel → +1 point +- Loss → 0 points +- Correct use of `[Predict:]` (if correct) → +1 bonus point +- Incorrect prediction → −1 penalty point + +**Decision Rule:** +At the end of the final round: +- Higher score → **Winner** +- Equal scores → **Draw** + +--- + +## 3. Turn Structure and Determinism + +Each **round** has two **turns** (one per player in sequential mode). +The game proceeds through a fixed **number of rounds**, e.g., `5`. +Turn order alternates each round (A starts odd rounds, B starts even). + +After both have chosen, the system resolves the duel deterministically: +- Rock beats Scissors +- Scissors beats Paper +- Paper beats Rock +All scoring and predictions apply deterministically without randomness. + +A **random seed** is used only for reproducibility during environment reset (e.g., for deciding initial starting player or generating tournament metadata). Given the same seed, the entire match sequence will replay exactly. + +--- + +## 4. Action Grammar (Machine‑Parsable) + +### 4.1 Allowed Action Tokens + +| Action Type | Token Format | Semantic Meaning | +|--------------|---------------|------------------| +| Play Sign | `[Play:Rock]`, `[Play:Paper]`, `[Play:Scissors]` | Commit to a sign | +| Predict | `[Predict:Rock]`, `[Predict:Paper]`, `[Predict:Scissors]` | Attempt to forecast opponent’s next Play sign | +| Concede | `[Concede]` | Forfeit the match immediately | + +### 4.2 Formal Grammar + +Regular expressions (expressed textually): + +``` +PLAY_TOKEN = r'^\[Play:(Rock|Paper|Scissors)\]$' +PREDICT_TOKEN = r'^\[Predict:(Rock|Paper|Scissors)\]$' +CONCEDE_TOKEN = r'^\[Concede\]$' +``` + +**Valid examples:** + +- ✔ `[Play:Rock]` → valid play +- ✔ `[Predict:Paper]` → valid prediction + +**Invalid examples:** + +- ❌ `[Play: Stones]` → invalid sign (unrecognized subtype) +- ❌ `[PredictPaper]` → missing colon and brackets, invalid format +- ❌ `[yield]` → token not in allowed action set + +--- + +## 5. Game State Schema + +```json +{ + "tournament_name": "Grand Duels 2077", + "seed": 123, + "round_index": 3, + "max_rounds": 5, + "turn_order": ["PlayerA", "PlayerB"], + "players": { + "PlayerA": { + "score": 5, + "last_action": "[Play:Rock]", + "predicted_action": "[Predict:Paper]", + "round_wins": 2 + }, + "PlayerB": { + "score": 4, + "last_action": "[Play:Scissors]", + "predicted_action": null, + "round_wins": 1 + } + }, + "round_history": [ + { + "round": 1, + "PlayerA_action": "[Play:Rock]", + "PlayerB_action": "[Play:Paper]", + "winner": "PlayerB" + }, + { + "round": 2, + "PlayerA_action": "[Predict:Scissors]", + "PlayerB_action": "[Play:Scissors]", + "winner": "Draw" + } + ], + "current_turn": "PlayerB", + "status": "active", + "winner": null, + "observation_log": [ + "Round 1: PlayerA played Rock; PlayerB played Paper → PlayerB wins.", + "Round 2: PlayerA predicted Scissors correctly." + ] +} +``` + +--- + +## 6. Initialization Rules + +- **Seed**: A reproducible integer input for deterministic setup. +- **Starting Player**: Chooses first mover by fixed or seeded rule (e.g., even seed → PlayerA starts). +- **Scores & History**: Zeroed at start. +- **Observations:** Each player receives an opening message: + - Tournament title + - Number of rounds + - Their player role (A or B) + - Reminder of available actions and scoring system. + +--- + +## 7. Validation and Error Handling + +When an action is received (content extracted from inside `\boxed{}`): + +**Validation Steps:** +1. Confirm the format matches one of the regex tokens. +2. Confirm it’s the player’s turn. +3. Ensure they haven’t already played a `[Play:*]` or `[Predict:*]` in this round (disallow duplicates). +4. If `[Concede]`, mark immediate terminal outcome. + +**Invalid Move Reasons:** +- “Unrecognized token format.” +- “It is not your turn.” +- “Duplicate action this round.” +- “Concession not allowed after round resolution.” + +If invalid, `set_invalid_move(player, reason)` will be triggered and the opponent **automatically wins** the match. + +--- + +## 8. Terminal Conditions and Scoring + +**Terminal checks per turn:** +1. If both players have legal `[Play:*]` actions, resolve round outcome: + - Update scores + - Record to `round_history` + - Increment `round_index` +2. If `[Concede]` action is made → opponent receives **instant win** +3. If `round_index > max_rounds` → tournament ends + +**Endgame Scoring:** +- `winner = player with higher score` +- if equal → `"Draw"` + +**Tie‑breakers (if desired extension):** +If tied, compare number of individual round wins; if still tied, declare draw. + +--- + +## 9. Player Prompt Specification + +**Prompt Identity Blurb:** +> You are a Signmaster in the grand arena of *Duel of Signs.* Each round, you must choose a hand sign to defeat your opponent or attempt a daring prediction for extra points. + +**Prompt Contents:** +- Current round, scores, and last known actions. +- Reminder of valid action format and consequences. +- Scoring recap. +- End instruction: + “Put your final answer within `\boxed{}` at the end of your response.” + +**Allowed Actions:** +``` +[Play:Rock] +[Play:Paper] +[Play:Scissors] +[Predict:Rock] +[Predict:Paper] +[Predict:Scissors] +[Concede] +``` + +**Example Valid Response:** +``` +I think Paper will protect me from Rock. +\boxed{[Play:Paper]} +``` + +**Example Invalid Response:** +``` +I choose to play paper. +\boxed{PlayPaper} +``` +→ invalid because brackets and colon missing. + +**Helper extraction rule:** +`\_extract_answer_content(self, action: str) -> str` +Parses text inside `\boxed{}` and returns only the internal token string for validation and step logic. + +--- + +## 10. API Mapping Plan + +| API Method | Purpose | Reads / Writes | Outcome | +|-------------|----------|----------------|----------| +| **reset(seed=None)** | Initialize tournament state using provided or default seed. Create base observations and metadata. | Writes: `seed`, `tournament_name`, `round_index=1`, `players` fields. | Returns initial observation to both players. | +| **step(player_action)** | Process one player’s turn. Validate syntax and legality. Update game state accordingly. If round complete, compute duel result. | Reads: `current_turn`, `players[...]`, `round_history`. Writes: `scores`, `round_index`, `winner`, `status`. | Returns new observations, and signals win/draw when terminal. | +| **_generate_player_prompt(player_id)** | Builds contextual textual prompt per player at their turn. Displays current scores, allowed tokens, round state, and format examples. | Reads: full `game_state`. | Returns formatted text prompt for model or user. | + +--- + +## 11. Copy‑Check Against the Example + +- **Theme & Objective:** Completely distinct — ceremonial hand‑sign duels rather than negotiation or trading. +- **Entities & Resources:** Signs, rounds, scores; no goods, offers, or currencies. +- **Prompt Text:** Focused on symbolic combat, prediction bonuses, and deterministic scoring. +- **Game State Keys:** (`tournament_name`, `round_index`, `round_history`, `predicted_action`, `round_wins`, etc.) — all unique to this design. + +--- + +**End of “Duel of Signs” Game Design Document** \ No newline at end of file diff --git a/env.py b/env.py new file mode 100644 index 0000000..0a4a2a0 --- /dev/null +++ b/env.py @@ -0,0 +1,300 @@ +import re +import random +from typing import Any, Dict, Optional, Tuple + +import textarena as ta + + +class DuelOfSignsEnv(ta.Env): + def __init__(self, max_rounds: int = 5): + """Environment implementing 'Duel of Signs: A Rock–Paper–Scissors Tournament'.""" + self.max_rounds = max_rounds + self.play_pattern = re.compile(r'^\[Play:(Rock|Paper|Scissors)\]$') + self.predict_pattern = re.compile(r'^\[Predict:(Rock|Paper|Scissors)\]$') + self.concede_pattern = re.compile(r'^\[Concede\]$') + self.signs = ["Rock", "Paper", "Scissors"] + + def _extract_answer_content(self, action: str) -> str: + """Extract content from \boxed{} for validation and execution.""" + match = re.search(r'\\boxed\{([^}]*)\}', action, re.DOTALL) + if match: + return match.group(1).strip() + return action.strip() + + def reset(self, num_players: int, seed: Optional[int] = None): + """ + Initialize a new tournament according to Stage 1 design. + """ + if num_players != 2: + raise ValueError("Duel of Signs requires exactly 2 players.") + self.state = ta.TwoPlayerState(num_players=num_players, seed=seed) + + # Deterministic seed handling + rng = random.Random(seed if seed is not None else 0) + start_player = 0 if (seed is None or seed % 2 == 0) else 1 + + # Initialize game state as per schema + self.state.game_state = { + "tournament_name": "Grand Duels 2077", + "seed": seed, + "round_index": 1, + "max_rounds": self.max_rounds, + "turn_order": ["PlayerA", "PlayerB"], + "players": { + "PlayerA": { + "score": 0, + "last_action": None, + "predicted_action": None, + "round_wins": 0, + }, + "PlayerB": { + "score": 0, + "last_action": None, + "predicted_action": None, + "round_wins": 0, + }, + }, + "round_history": [], + "current_turn": "PlayerA" if start_player == 0 else "PlayerB", + "status": "active", + "winner": None, + "observation_log": [], + } + + # Reset framework state + self.state.reset( + game_state=self.state.game_state, + player_prompt_function=self._generate_player_prompt, + role_mapping={0: "PlayerA", 1: "PlayerB"}, + ) + + # Announce tournament to all players + intro_message = ( + f"Welcome to {self.state.game_state['tournament_name']}!\n" + f"This is a {self.state.game_state['max_rounds']}-round duel between two Signmasters.\n" + "Each round: choose [Play:Rock], [Play:Paper], or [Play:Scissors], or attempt [Predict:].\n" + "Win: +2 points, Draw: +1, Loss: 0, Correct Prediction: +1 bonus, Incorrect: -1.\n" + "Concede anytime with [Concede]." + ) + self.state.add_observation(intro_message, ta.ObservationType.GAME_MESSAGE) + self.state.manually_set_current_player_id(start_player) + + return self.state + + def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str: + """Generate prompt text specific to each player.""" + role = "PlayerA" if player_id == 0 else "PlayerB" + opponent = "PlayerB" if player_id == 0 else "PlayerA" + player_data = game_state["players"][role] + opp_data = game_state["players"][opponent] + return ( + f"You are {role}, a Signmaster in the grand arena of Duel of Signs.\n" + f"Round {game_state['round_index']} of {game_state['max_rounds']}.\n" + f"Your score: {player_data['score']} | Opponent's score: {opp_data['score']}\n" + "Choose your action token correctly:\n" + " [Play:Rock] | [Play:Paper] | [Play:Scissors]\n" + " [Predict:Rock] | [Predict:Paper] | [Predict:Scissors]\n" + " [Concede]\n" + "Scoring: Win=+2, Draw=+1, Predicted correctly=+1, Incorrect prediction=−1.\n" + "Put your final answer within \\boxed{} at the end of your response.\n\n" + "Example valid response:\n" + "I think Paper will protect me.\n" + "\\boxed{[Play:Paper]}\n\n" + "Example valid response with prediction:\n" + "I foresee my foe choosing Scissors.\n" + "\\boxed{[Predict:Scissors]}" + ) + + def _beats(self, sign1: str, sign2: str) -> bool: + """Return True if sign1 beats sign2 according to RPS rule.""" + return (sign1, sign2) in [ + ("Rock", "Scissors"), + ("Scissors", "Paper"), + ("Paper", "Rock"), + ] + + def _resolve_round(self, game_state: Dict[str, Any]) -> None: + """Resolve round outcome once both have played.""" + A_action = game_state["players"]["PlayerA"]["last_action"] + B_action = game_state["players"]["PlayerB"]["last_action"] + + if not (A_action and B_action): + return # not yet ready + + round_result = {"round": game_state["round_index"]} + + # Determine each player's play sign (if prediction, it’s not a play) + A_play = None + B_play = None + for act, role in [(A_action, "PlayerA"), (B_action, "PlayerB")]: + match = self.play_pattern.match(act) + if match: + if role == "PlayerA": + A_play = match.group(1) + else: + B_play = match.group(1) + + # Determine if either has conceded + if self.concede_pattern.match(A_action): + self.state.set_winner(player_id=1, reason="PlayerA conceded.") + game_state["status"] = "concluded" + game_state["winner"] = "PlayerB" + return + if self.concede_pattern.match(B_action): + self.state.set_winner(player_id=0, reason="PlayerB conceded.") + game_state["status"] = "concluded" + game_state["winner"] = "PlayerA" + return + + # Predict actions apply to next opponent play; evaluate them now that both plays known + for pid, role, opp_play in [(0, "PlayerA", B_play), (1, "PlayerB", A_play)]: + predict = game_state["players"][role]["predicted_action"] + if predict: + pred_match = self.predict_pattern.match(predict) + if pred_match: + predicted_sign = pred_match.group(1) + if predicted_sign == opp_play: + game_state["players"][role]["score"] += 1 + self.state.add_observation( + f"{role} correctly predicted {predicted_sign} (+1 bonus).", + ta.ObservationType.GAME_MESSAGE, + ) + else: + game_state["players"][role]["score"] -= 1 + self.state.add_observation( + f"{role} wrongly predicted {predicted_sign} (-1 penalty).", + ta.ObservationType.GAME_MESSAGE, + ) + game_state["players"][role]["predicted_action"] = None + + # Determine duel result if both played a valid [Play:*] + if A_play and B_play: + if A_play == B_play: + game_state["players"]["PlayerA"]["score"] += 1 + game_state["players"]["PlayerB"]["score"] += 1 + round_result["winner"] = "Draw" + outcome_text = f"Round {game_state['round_index']}: Draw ({A_play} vs {B_play})." + elif self._beats(A_play, B_play): + game_state["players"]["PlayerA"]["score"] += 2 + game_state["players"]["PlayerA"]["round_wins"] += 1 + round_result["winner"] = "PlayerA" + outcome_text = f"Round {game_state['round_index']}: PlayerA's {A_play} beats {B_play}." + else: + game_state["players"]["PlayerB"]["score"] += 2 + game_state["players"]["PlayerB"]["round_wins"] += 1 + round_result["winner"] = "PlayerB" + outcome_text = f"Round {game_state['round_index']}: PlayerB's {B_play} beats {A_play}." + + self.state.add_observation(outcome_text, ta.ObservationType.GAME_MESSAGE) + game_state["observation_log"].append(outcome_text) + game_state["round_history"].append( + { + "round": game_state["round_index"], + "PlayerA_action": A_action, + "PlayerB_action": B_action, + "winner": round_result["winner"], + } + ) + game_state["round_index"] += 1 + game_state["players"]["PlayerA"]["last_action"] = None + game_state["players"]["PlayerB"]["last_action"] = None + + # End tournament if exceeded rounds + if game_state["round_index"] > game_state["max_rounds"]: + self._determine_final_winner(game_state) + + def _determine_final_winner(self, game_state: Dict[str, Any]): + """Apply endgame scoring to determine winner.""" + A_score = game_state["players"]["PlayerA"]["score"] + B_score = game_state["players"]["PlayerB"]["score"] + if A_score > B_score: + self.state.set_winner(player_id=0, reason="Higher total score.") + game_state["winner"] = "PlayerA" + elif B_score > A_score: + self.state.set_winner(player_id=1, reason="Higher total score.") + game_state["winner"] = "PlayerB" + else: + # Tie-breaker: round wins + A_wins = game_state["players"]["PlayerA"]["round_wins"] + B_wins = game_state["players"]["PlayerB"]["round_wins"] + if A_wins > B_wins: + self.state.set_winner(player_id=0, reason="Tiebreaker by round wins.") + game_state["winner"] = "PlayerA" + elif B_wins > A_wins: + self.state.set_winner(player_id=1, reason="Tiebreaker by round wins.") + game_state["winner"] = "PlayerB" + else: + self.state.set_draw(reason="Scores and round wins drawn.") + game_state["winner"] = "Draw" + game_state["status"] = "concluded" + + def step(self, action: str) -> Tuple[bool, ta.Info]: + """ + Process a single player action. + """ + player_id = self.state.current_player_id + role = "PlayerA" if player_id == 0 else "PlayerB" + opp_role = "PlayerB" if player_id == 0 else "PlayerA" + + # Log player action text + self.state.add_observation( + message=action, + observation_type=ta.ObservationType.PLAYER_ACTION, + from_id=player_id, + to_id=-1, + ) + + # Extract boxed token for logic + token = self._extract_answer_content(action) + + # Validate format + if not ( + self.play_pattern.match(token) + or self.predict_pattern.match(token) + or self.concede_pattern.match(token) + ): + self.state.set_invalid_move(reason="Unrecognized token format.") + return self.state.step() + + # Check if game already over + if self.state.game_state["status"] != "active": + self.state.set_invalid_move(reason="Game already concluded.") + return self.state.step() + + # Fetch player state + pstate = self.state.game_state["players"][role] + + # Check duplicate action this round + if pstate["last_action"] is not None: + self.state.set_invalid_move(reason="Duplicate action this round.") + return self.state.step() + + # Record actions deterministically + if self.play_pattern.match(token): + pstate["last_action"] = token + elif self.predict_pattern.match(token): + pstate["predicted_action"] = token + self.state.add_observation( + f"{role} predicts their opponent will play {token[9:-1]}.", + ta.ObservationType.GAME_MESSAGE, + ) + elif self.concede_pattern.match(token): + pstate["last_action"] = token + + # Check if both players have acts making round resolvable + self._resolve_round(self.state.game_state) + + # Determine next player's turn (alternating per round) + if not self.state.done: + current_round = self.state.game_state["round_index"] + if self.state.game_state["status"] == "active": + if current_round % 2 == 1: + # odd => A starts + next_player = ( + 1 if player_id == 0 else 0 + ) # alternate until both acted + else: + next_player = 0 if player_id == 1 else 1 + self.state.manually_set_current_player_id(next_player) + + return self.state.step() \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..966b2a8 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,14 @@ +# pyproject.toml + +[project] +name = "game_20251125_130005" +version = "0.1.0" +description = "*“DUEL OF SIGNS: A ROCK–PAPER–SCISSORS TOURNAMENT”* environment generated for TextArena." +dependencies = [ + "textarena>=0.7.3" +] + +[openverse] +entry_point = "env:DuelOfSignsEnv" +tags = ["openverse", "generated"] +author = "Openverse"