First commit
This commit is contained in:
196
game.py
Normal file
196
game.py
Normal file
@@ -0,0 +1,196 @@
|
||||
import numpy as np
|
||||
|
||||
WIN_PATTERNS = [
|
||||
(0, 1, 2),
|
||||
(3, 4, 5),
|
||||
(6, 7, 8),
|
||||
(0, 3, 6),
|
||||
(1, 4, 7),
|
||||
(2, 5, 8),
|
||||
(0, 4, 8),
|
||||
(2, 4, 6),
|
||||
]
|
||||
|
||||
class UltimateTicTacToe:
|
||||
"""
|
||||
A very, very simple game of ConnectX in which we have:
|
||||
rows: 1
|
||||
columns: 4
|
||||
winNumber: 2
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.cells = 81
|
||||
self.board_width = 9
|
||||
self.state_planes = 9
|
||||
|
||||
def get_init_board(self):
|
||||
b = np.zeros((self.cells,), dtype=int)
|
||||
return (b, None)
|
||||
|
||||
def get_board_size(self):
|
||||
return (self.state_planes, self.board_width, self.board_width)
|
||||
|
||||
def get_action_size(self):
|
||||
return self.cells
|
||||
|
||||
def get_next_state(self, board, player, action, verify_move=False):
|
||||
if verify_move:
|
||||
if self.get_valid_moves(board)[action] == 0:
|
||||
return False
|
||||
new_board_data = np.copy(board[0])
|
||||
new_board_data[action] = player
|
||||
|
||||
next_board = ((action // 9) % 3) * 3 + (action % 3)
|
||||
next_board = next_board if not self.is_board_full(new_board_data, next_board) else None
|
||||
|
||||
# Return the new game, but
|
||||
# change the perspective of the game with negative
|
||||
return ((new_board_data, next_board), -player)
|
||||
|
||||
def is_board_full(self, board_data, next_board):
|
||||
return self._is_small_board_win(board_data, next_board, 1) or self._is_small_board_win(board_data, next_board, -1) or self._is_board_full(board_data, next_board)
|
||||
|
||||
def _small_board_cells(self, inner_board_idx):
|
||||
row_block = inner_board_idx // 3
|
||||
col_block = inner_board_idx % 3
|
||||
|
||||
base = row_block * 27 + col_block * 3
|
||||
|
||||
return [
|
||||
base, base + 1, base + 2,
|
||||
base + 9, base + 10, base + 11,
|
||||
base + 18, base + 19, base + 20
|
||||
]
|
||||
|
||||
def _is_board_full(self, board_data, next_board):
|
||||
# Check if it is literally full
|
||||
cells = self._small_board_cells(next_board)
|
||||
|
||||
for a in cells:
|
||||
if board_data[a] == 0:
|
||||
return False
|
||||
return True
|
||||
|
||||
def _is_playable_small_board(self, board_data, inner_board_idx):
|
||||
return not self.is_board_full(board_data, inner_board_idx)
|
||||
|
||||
def has_legal_moves(self, board):
|
||||
valid_moves = self.get_valid_moves(board)
|
||||
for i in valid_moves:
|
||||
if i == 1:
|
||||
return True
|
||||
return False
|
||||
|
||||
def get_valid_moves(self, board):
|
||||
# All moves are invalid by default
|
||||
board_data, active_board = board
|
||||
valid_moves = [0] * self.get_action_size()
|
||||
|
||||
if active_board is not None and not self._is_playable_small_board(board_data, active_board):
|
||||
active_board = None
|
||||
|
||||
if active_board is None:
|
||||
playable_boards = [
|
||||
inner_board_idx
|
||||
for inner_board_idx in range(9)
|
||||
if self._is_playable_small_board(board_data, inner_board_idx)
|
||||
]
|
||||
for inner_board_idx in playable_boards:
|
||||
for index in self._small_board_cells(inner_board_idx):
|
||||
if board_data[index] == 0:
|
||||
valid_moves[index] = 1
|
||||
else:
|
||||
for index in self._small_board_cells(active_board):
|
||||
if board_data[index] == 0:
|
||||
valid_moves[index] = 1
|
||||
|
||||
return valid_moves
|
||||
|
||||
def _is_small_board_win(self, board_data, inner_board_idx, player):
|
||||
cells = self._small_board_cells(inner_board_idx)
|
||||
|
||||
for a, b, c in WIN_PATTERNS:
|
||||
if board_data[cells[a]] == board_data[cells[b]] == board_data[cells[c]] == player:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def is_win(self, board, player):
|
||||
board_data, _ = board
|
||||
won = [self._is_small_board_win(board_data, i, player) for i in range(9)]
|
||||
|
||||
# Check if any winning combination is all 1s
|
||||
for a, b, c in WIN_PATTERNS:
|
||||
if won[a] and won[b] and won[c]:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def get_reward_for_player(self, board, player):
|
||||
# return None if not ended, 1 if player 1 wins, -1 if player 1 lost
|
||||
|
||||
if self.is_win(board, player):
|
||||
return 1
|
||||
if self.is_win(board, -player):
|
||||
return -1
|
||||
if self.has_legal_moves(board):
|
||||
return None
|
||||
|
||||
return 0
|
||||
|
||||
def get_canonical_board_data(self, board_data, player):
|
||||
return player * board_data
|
||||
|
||||
def _small_board_mask(self, inner_board_idx):
|
||||
mask = np.zeros((self.board_width, self.board_width), dtype=np.float32)
|
||||
for index in self._small_board_cells(inner_board_idx):
|
||||
row = index // self.board_width
|
||||
col = index % self.board_width
|
||||
mask[row, col] = 1.0
|
||||
return mask
|
||||
|
||||
def encode_state(self, board):
|
||||
board_data, active_board = board
|
||||
board_grid = board_data.reshape(self.board_width, self.board_width)
|
||||
|
||||
current_stones = (board_grid == 1).astype(np.float32)
|
||||
opponent_stones = (board_grid == -1).astype(np.float32)
|
||||
empty_cells = (board_grid == 0).astype(np.float32)
|
||||
legal_moves = np.array(self.get_valid_moves(board), dtype=np.float32).reshape(self.board_width, self.board_width)
|
||||
|
||||
active_board_mask = np.zeros((self.board_width, self.board_width), dtype=np.float32)
|
||||
if active_board is not None and self._is_playable_small_board(board_data, active_board):
|
||||
active_board_mask = self._small_board_mask(active_board)
|
||||
|
||||
current_won_boards = np.zeros((self.board_width, self.board_width), dtype=np.float32)
|
||||
opponent_won_boards = np.zeros((self.board_width, self.board_width), dtype=np.float32)
|
||||
playable_boards = np.zeros((self.board_width, self.board_width), dtype=np.float32)
|
||||
|
||||
for inner_board_idx in range(9):
|
||||
board_mask = self._small_board_mask(inner_board_idx)
|
||||
if self._is_small_board_win(board_data, inner_board_idx, 1):
|
||||
current_won_boards += board_mask
|
||||
elif self._is_small_board_win(board_data, inner_board_idx, -1):
|
||||
opponent_won_boards += board_mask
|
||||
|
||||
if self._is_playable_small_board(board_data, inner_board_idx):
|
||||
playable_boards += board_mask
|
||||
|
||||
move_count = np.count_nonzero(board_data) / self.cells
|
||||
move_count_plane = np.full((self.board_width, self.board_width), move_count, dtype=np.float32)
|
||||
|
||||
return np.stack(
|
||||
(
|
||||
current_stones,
|
||||
opponent_stones,
|
||||
empty_cells,
|
||||
legal_moves,
|
||||
active_board_mask,
|
||||
current_won_boards,
|
||||
opponent_won_boards,
|
||||
playable_boards,
|
||||
move_count_plane,
|
||||
),
|
||||
axis=0,
|
||||
)
|
||||
Reference in New Issue
Block a user