First commit

2026-03-23 21:19:29 +01:00
commit 29fc731e6c
7 changed files with 1173 additions and 0 deletions
--- a/game.py
+++ b/game.py
@@ -0,0 +1,196 @@
+import numpy as np
+
+WIN_PATTERNS = [
+    (0, 1, 2),
+    (3, 4, 5),
+    (6, 7, 8),
+    (0, 3, 6),
+    (1, 4, 7),
+    (2, 5, 8),
+    (0, 4, 8),
+    (2, 4, 6),
+]
+
+class UltimateTicTacToe:
+    """
+    A very, very simple game of ConnectX in which we have:
+        rows: 1
+        columns: 4
+        winNumber: 2
+    """
+
+    def __init__(self):
+        self.cells = 81
+        self.board_width = 9
+        self.state_planes = 9
+
+    def get_init_board(self):
+        b = np.zeros((self.cells,), dtype=int)
+        return (b, None)
+
+    def get_board_size(self):
+        return (self.state_planes, self.board_width, self.board_width)
+
+    def get_action_size(self):
+        return self.cells
+
+    def get_next_state(self, board, player, action, verify_move=False):
+        if verify_move:
+            if self.get_valid_moves(board)[action] == 0:
+                return False
+        new_board_data = np.copy(board[0])
+        new_board_data[action] = player
+
+        next_board = ((action // 9) % 3) * 3 + (action % 3)
+        next_board = next_board if not self.is_board_full(new_board_data, next_board) else None
+
+        # Return the new game, but
+        # change the perspective of the game with negative
+        return ((new_board_data, next_board), -player)
+
+    def is_board_full(self, board_data, next_board):
+        return self._is_small_board_win(board_data, next_board, 1) or self._is_small_board_win(board_data, next_board, -1) or self._is_board_full(board_data, next_board) 
+
+    def _small_board_cells(self, inner_board_idx):
+        row_block = inner_board_idx // 3
+        col_block = inner_board_idx % 3
+
+        base = row_block * 27 + col_block * 3
+
+        return [
+            base, base + 1, base + 2,
+            base + 9, base + 10, base + 11,
+            base + 18, base + 19, base + 20
+        ]
+
+    def _is_board_full(self, board_data, next_board):
+        # Check if it is literally full
+        cells = self._small_board_cells(next_board)
+
+        for a in cells:
+            if board_data[a] == 0:
+                return False
+        return True
+
+    def _is_playable_small_board(self, board_data, inner_board_idx):
+        return not self.is_board_full(board_data, inner_board_idx)
+    
+    def has_legal_moves(self, board):
+        valid_moves = self.get_valid_moves(board)
+        for i in valid_moves:
+            if i == 1:
+                return True
+        return False
+
+    def get_valid_moves(self, board):
+        # All moves are invalid by default
+        board_data, active_board = board
+        valid_moves = [0] * self.get_action_size()
+
+        if active_board is not None and not self._is_playable_small_board(board_data, active_board):
+            active_board = None
+
+        if active_board is None:
+            playable_boards = [
+                inner_board_idx
+                for inner_board_idx in range(9)
+                if self._is_playable_small_board(board_data, inner_board_idx)
+            ]
+            for inner_board_idx in playable_boards:
+                for index in self._small_board_cells(inner_board_idx):
+                    if board_data[index] == 0:
+                        valid_moves[index] = 1
+        else:
+            for index in self._small_board_cells(active_board):
+                if board_data[index] == 0:
+                    valid_moves[index] = 1
+
+        return valid_moves
+
+    def _is_small_board_win(self, board_data, inner_board_idx, player):
+        cells = self._small_board_cells(inner_board_idx)
+    
+        for a, b, c in WIN_PATTERNS:
+            if board_data[cells[a]] == board_data[cells[b]] == board_data[cells[c]] == player:
+                return True
+        
+        return False
+    
+    def is_win(self, board, player):
+        board_data, _ = board
+        won = [self._is_small_board_win(board_data, i, player) for i in range(9)]
+        
+        # Check if any winning combination is all 1s
+        for a, b, c in WIN_PATTERNS:
+            if won[a] and won[b] and won[c]:
+                return True
+        
+        return False
+
+    def get_reward_for_player(self, board, player):
+        # return None if not ended, 1 if player 1 wins, -1 if player 1 lost
+
+        if self.is_win(board, player):
+            return 1
+        if self.is_win(board, -player):
+            return -1
+        if self.has_legal_moves(board):
+            return None
+
+        return 0
+
+    def get_canonical_board_data(self, board_data, player):
+        return player * board_data
+
+    def _small_board_mask(self, inner_board_idx):
+        mask = np.zeros((self.board_width, self.board_width), dtype=np.float32)
+        for index in self._small_board_cells(inner_board_idx):
+            row = index // self.board_width
+            col = index % self.board_width
+            mask[row, col] = 1.0
+        return mask
+
+    def encode_state(self, board):
+        board_data, active_board = board
+        board_grid = board_data.reshape(self.board_width, self.board_width)
+
+        current_stones = (board_grid == 1).astype(np.float32)
+        opponent_stones = (board_grid == -1).astype(np.float32)
+        empty_cells = (board_grid == 0).astype(np.float32)
+        legal_moves = np.array(self.get_valid_moves(board), dtype=np.float32).reshape(self.board_width, self.board_width)
+
+        active_board_mask = np.zeros((self.board_width, self.board_width), dtype=np.float32)
+        if active_board is not None and self._is_playable_small_board(board_data, active_board):
+            active_board_mask = self._small_board_mask(active_board)
+
+        current_won_boards = np.zeros((self.board_width, self.board_width), dtype=np.float32)
+        opponent_won_boards = np.zeros((self.board_width, self.board_width), dtype=np.float32)
+        playable_boards = np.zeros((self.board_width, self.board_width), dtype=np.float32)
+
+        for inner_board_idx in range(9):
+            board_mask = self._small_board_mask(inner_board_idx)
+            if self._is_small_board_win(board_data, inner_board_idx, 1):
+                current_won_boards += board_mask
+            elif self._is_small_board_win(board_data, inner_board_idx, -1):
+                opponent_won_boards += board_mask
+
+            if self._is_playable_small_board(board_data, inner_board_idx):
+                playable_boards += board_mask
+
+        move_count = np.count_nonzero(board_data) / self.cells
+        move_count_plane = np.full((self.board_width, self.board_width), move_count, dtype=np.float32)
+
+        return np.stack(
+            (
+                current_stones,
+                opponent_stones,
+                empty_cells,
+                legal_moves,
+                active_board_mask,
+                current_won_boards,
+                opponent_won_boards,
+                playable_boards,
+                move_count_plane,
+            ),
+            axis=0,
+        )