First commit

2026-03-23 21:19:29 +01:00
commit 29fc731e6c
7 changed files with 1173 additions and 0 deletions
--- a/model.py
+++ b/model.py
@@ -0,0 +1,81 @@
+import numpy as np
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class ResidualBlock(nn.Module):
+    def __init__(self, channels):
+        super().__init__()
+        self.conv1 = nn.Conv2d(channels, channels, kernel_size=3, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(channels)
+        self.conv2 = nn.Conv2d(channels, channels, kernel_size=3, padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(channels)
+
+    def forward(self, x):
+        residual = x
+        x = F.relu(self.bn1(self.conv1(x)))
+        x = self.bn2(self.conv2(x))
+        return F.relu(x + residual)
+
+
+class UltimateTicTacToeModel(nn.Module):
+    def __init__(self, board_size, action_size, device, channels=64, num_blocks=6):
+        super().__init__()
+
+        self.action_size = action_size
+        self.input_shape = board_size
+        self.input_channels = board_size[0]
+        self.board_height = board_size[1]
+        self.board_width = board_size[2]
+        self.device = torch.device(device)
+
+        self.stem = nn.Sequential(
+            nn.Conv2d(self.input_channels, channels, kernel_size=3, padding=1, bias=False),
+            nn.BatchNorm2d(channels),
+            nn.ReLU(inplace=True),
+        )
+        self.residual_tower = nn.Sequential(*(ResidualBlock(channels) for _ in range(num_blocks)))
+
+        self.policy_head = nn.Sequential(
+            nn.Conv2d(channels, 32, kernel_size=1, bias=False),
+            nn.BatchNorm2d(32),
+            nn.ReLU(inplace=True),
+        )
+        self.policy_fc = nn.Linear(32 * self.board_height * self.board_width, self.action_size)
+
+        self.value_head = nn.Sequential(
+            nn.Conv2d(channels, 32, kernel_size=1, bias=False),
+            nn.BatchNorm2d(32),
+            nn.ReLU(inplace=True),
+        )
+        self.value_fc1 = nn.Linear(32 * self.board_height * self.board_width, 128)
+        self.value_fc2 = nn.Linear(128, 1)
+
+        self.to(self.device)
+
+    def forward(self, x):
+        x = x.view(-1, *self.input_shape)
+        x = self.stem(x)
+        x = self.residual_tower(x)
+
+        policy = self.policy_head(x)
+        policy = torch.flatten(policy, 1)
+        policy = self.policy_fc(policy)
+
+        value = self.value_head(x)
+        value = torch.flatten(value, 1)
+        value = F.relu(self.value_fc1(value))
+        value = torch.tanh(self.value_fc2(value))
+
+        return F.softmax(policy, dim=1), value
+
+    def predict(self, board):
+        board = torch.as_tensor(board, dtype=torch.float32, device=self.device)
+        board = board.view(1, *self.input_shape)
+        self.eval()
+        with torch.no_grad():
+            pi, v = self.forward(board)
+
+        return pi.detach().cpu().numpy()[0], float(v.item())