def init_board():
return [[" ", " ", " "],
[" ", " ", " "],
[" ", " ", " "]]
def print_board(board):
print()
print(" | 1 2 3")
print("---------")
for i, row in enumerate(board):
print("ABC"[i], "|", end=" ")
for cell in row:
if cell == " ":
cell = "."
print(cell, end=" ")
print()
board = init_board()
print_board(board)
board[1][1] = "X"
print_board(board)
board[0][2] = "Y"
print_board(board)
def ask_position(symbol):
while True:
position = input("player {} please provide position [A-C1-3, x to quit]: ".format(symbol)).upper()
if position == "X":
return position
if len(position) == 2 and position[0] in "ABC" and position[1] in "123":
return position
print("invalid input try again")
def position_to_coordinates(position):
"""transforms A1 -> (0, 0) and so on"""
assert len(position) == 2
assert isinstance(position, str)
row, col = position
x = {"A": 0, "B": 1, "C": 2}[row]
y = {"1": 0, "2": 1, "3": 2}[col]
return x, y
assert position_to_coordinates("A1") == (0, 0)
assert position_to_coordinates("A2") == (0, 1)
assert position_to_coordinates("A3") == (0, 2)
assert position_to_coordinates("B1") == (1, 0)
assert position_to_coordinates("B2") == (1, 1)
assert position_to_coordinates("B3") == (1, 2)
assert position_to_coordinates("C1") == (2, 0)
assert position_to_coordinates("C2") == (2, 1)
assert position_to_coordinates("C3") == (2, 2)
def place_symbol(board, symbol):
while True:
position = ask_position(symbol)
if position == "X":
return True
x, y = position_to_coordinates(position)
if board[x][y] == " ":
board[x][y] = symbol
return False
print("this position is already taken")
def check_winner(board):
# check if a single row has same symbols:
for row in board:
if row[0] == row[1] == row[2]:
if row[0] != " ":
return row[0]
# check if a single column as same symbols:
for col_idx in (0, 1, 2):
cells = [board[row_idx][col_idx] for row_idx in (0, 1, 2)]
if cells[0] == cells[1] == cells[2]:
if cells[0] != " ":
return cells[0]
# check diagonals:
if board[0][0] == board[1][1] == board[2][2]:
if board[0][0] != " ":
return board[0][0]
if board[2][0] == board[1][1] == board[0][2]:
if board[2][0] != " ":
return board[2][0]
return None
assert check_winner([" ", " ", " "]) is None
assert check_winner(["XXX", " ", " "]) == "X"
assert check_winner([" ", "XXX", " "]) == "X"
assert check_winner([" ", " ", "XXX"]) == "X"
assert check_winner(["X ", "X ", "X "]) == "X"
assert check_winner([" X ", " X ", " X "]) == "X"
assert check_winner([" X", " X", " X"]) == "X"
assert check_winner(["X ", " X ", " X"]) == "X"
assert check_winner([" X", " X ", "X "]) == "X"
assert check_winner(["Y X", "YX ", "X Y"]) == "X"
def main():
board = init_board()
print_board(board)
free_cells = 9
while True:
for symbol in "XY":
abort = place_symbol(board, symbol)
if abort:
return
free_cells -= 1
if free_cells == 0:
return
print_board(board)
winner = check_winner(board)
if winner is not None:
print(winner, "wins")
return
main()
As the number of possible board combinations is in the range of thousands we can use a brute force approach to let the computer learn how to play the game.
The following implementation is based on the idea from https://makezine.com/2009/11/02/mechanical-tic-tac-toe-computer/
The cocept is that we let the computer play against himself doing random moves until X or Y wins and we record if the boards which led to this result where benefitial for X or for Y.
The goal is to setup a mapping board" -> (score_x, score_y)
.
So for a given board, this mapping gives us numbers score_x
and score_y
. The higher the score_x
the better is the prospective for X
to win, the higher score_y
, the better the board is for Y
to win.
We start with scores (0, 0)
for all possible boards.
One random play updates scores as follows:
Later we can use the mapping board -> (score_x, score_y)
to choose a good move:
Lets say it's X
turn. The strategy is to iterate over all possible "subsequent boards" by testing X
at every empty position. We choose the board with the highest score for X
.
def key(board):
# list of lists to tuple of tuples, so we can use board
# as a key in a dictinary
return tuple(tuple(cell for cell in row) for row in board)
Slightly modified version to ease debugging:
def print_board(board, indent=""):
for i, row in enumerate(board):
print(indent, end="")
for cell in row:
if cell == " ":
cell = "."
print(cell, end=" ")
print()
print()
import random
def place_random(board, player):
free = [(i, j) for j in range(3) for i in range(3) if board[i][j] == " "]
i, j = random.choice(free)
board[i][j] = player
def update_score_mapping(winner, boards_played, score_mapping):
for k in boards_played:
score_x, score_y = score_mapping.get(k, (0, 0))
if winner == 'Y':
score_y += 1
score_x -= 1
else:
score_x += 1
score_y -= 1
score_mapping[k] = (score_x, score_y)
def random_play(score_mapping):
board = init_board()
if random.random() > 0.5:
choices = 'XY'
else:
choices = 'YX'
boards_played = []
for i in range(9):
player = choices[i % 2]
place_random(board, player)
boards_played.append(key(board))
winner = check_winner(board)
if winner is not None:
update_score_mapping(winner, boards_played, score_mapping)
return
def learn(n=10000):
score_mapping = {}
for _ in range(n):
random_play(score_mapping)
return score_mapping
# this runs for a long time
score_mapping = learn(700000)
Now an example how we can use multiple cores to compute the score_mapping
for many simulations in parallel:
# this runs on my machine (i7, 8 cores) in about 50 seconds:
import multiprocessing
import time
print('start learning')
started = time.time()
n_cores = 7
n_learn_per_core = 300000
pool = multiprocessing.Pool(n_cores)
# we call learn(n_learn_per_core) "n_cores" times in parallel:
score_mappings = pool.map(learn, (n_learn_per_core,) * n_cores)
# merge "n_cores" different score mappings to single mapping:
score_mapping = score_mappings[0]
for sm in score_mappings[1:]:
for k, value in sm.items():
sx, sy = score_mapping.get(k, (0, 0))
sx += value[0]
sy += value[1]
score_mapping[k] = (sx, sy)
needed = time.time() - started
minutes, seconds = divmod(needed, 60)
print("needed {} minutes and {:.1f} seconds".format(minutes, seconds))
# this runs a while:
# score_mapping = learn(1000000)
# print a view boards + scores:
for board in list(score_mapping.keys())[:5]:
score_x, score_y = score_mapping[board]
print("score_x = {}, score_y = {}".format(score_x, score_y))
print_board(board)
def copy(board):
return [row[:] for row in board]
def best_next(score_mapping, start_board, player, first=False):
free = [(i, j) for j in range(3) for i in range(3) if start_board[i][j] == " "]
if first:
# disallow center move for first move
free.remove((1, 1))
best_choice = None
best_score = -999999999
DEBUG = False
# we shuffle to randomize best move if multiple optimal boards with
# same score exist:
random.shuffle(free)
for i, j in free:
# we create a copy, so we don't need to undo symbol placements
# when iterating over possible empty positions
board = copy(start_board)
board[i][j] = player
winner = check_winner(board)
if winner == player:
return board
if DEBUG:
print('check board')
print_board(board, indent=" ")
k = key(board)
if player == 'X':
score_x, __ = score_mapping.get(k, (0, 0))
if DEBUG: print(' score X', score_x)
if score_x > best_score:
best_score = score_x
best_choice = board
else:
__, score_y = score_mapping.get(k, (0, 0))
if DEBUG: print(' score Y', score_y)
if score_y > best_score:
best_score = score_y
best_choice = board
if DEBUG: print()
return best_choice
def play_computer_vs_computer(score_mapping):
board = init_board()
i = random.randint(0, 2)
j = random.randint(0, 2)
board[i][j] = 'X'
# board = best_next(score_mapping, board, 'X', first=True)
print_board(board)
for _ in range(4):
board = best_next(score_mapping, board, 'Y')
print_board(board)
winner = check_winner(board)
if winner is not None:
print(winner, 'wins')
break
board = best_next(score_mapping, board, 'X')
print_board(board)
winner = check_winner(board)
if winner is not None:
print(winner, 'wins')
break
else:
print("it's a tie")
play_computer_vs_computer(score_mapping)
def human_vs_computer():
board = init_board()
print_board(board)
free_cells = 9
while True:
abort = place_symbol(board, "X")
if abort:
return
free_cells -= 1
if free_cells == 0:
return
print_board(board)
winner = check_winner(board)
if winner is not None:
print(winner, "wins")
return
board = best_next(score_mapping, board, 'Y')
print_board(board)
winner = check_winner(board)
if winner is not None:
print(winner, "wins")
return
human_vs_computer()