def init_board():
    return [[" ", " ", " "], 
            [" ", " ", " "], 
            [" ", " ", " "]]
    

def print_board(board):
    print()
    print("  | 1 2 3")
    print("---------")
    for i, row in enumerate(board):
        print("ABC"[i], "|", end=" ")
        for cell in row:
            if cell == " ":
                cell = "."
            print(cell, end=" ")
        print()
    
board = init_board()
print_board(board)
board[1][1] = "X"
print_board(board)
board[0][2] = "Y"
print_board(board)
  | 1 2 3
---------
A | . . . 
B | . . . 
C | . . . 

  | 1 2 3
---------
A | . . . 
B | . X . 
C | . . . 

  | 1 2 3
---------
A | . . Y 
B | . X . 
C | . . . 
def ask_position(symbol):
    while True:
        position = input("player {} please provide position [A-C1-3, x to quit]: ".format(symbol)).upper()
        if position == "X":
            return position
        if len(position) == 2 and position[0] in "ABC" and position[1] in "123":
            return position
        print("invalid input try again")
    
    
def position_to_coordinates(position):
    """transforms A1 -> (0, 0) and so on"""
    assert len(position) == 2
    assert isinstance(position, str)
    
    row, col = position
    x = {"A": 0, "B": 1, "C": 2}[row]
    y = {"1": 0, "2": 1, "3": 2}[col]
    return x, y
assert position_to_coordinates("A1") == (0, 0)
assert position_to_coordinates("A2") == (0, 1)
assert position_to_coordinates("A3") == (0, 2)
assert position_to_coordinates("B1") == (1, 0)
assert position_to_coordinates("B2") == (1, 1)
assert position_to_coordinates("B3") == (1, 2)
assert position_to_coordinates("C1") == (2, 0)
assert position_to_coordinates("C2") == (2, 1)
assert position_to_coordinates("C3") == (2, 2)
def place_symbol(board, symbol):
    
    while True:
        position = ask_position(symbol)
        if position == "X":
            return True
        x, y = position_to_coordinates(position)
        if board[x][y] == " ":
            board[x][y] = symbol
            return False
        
        print("this position is already taken")  
        
        
def check_winner(board):
    # check if a single row has same symbols:
    for row in board:
        if row[0] == row[1] == row[2]:
            if row[0] != " ":
                return row[0]
        
    # check if a single column as same symbols:
    for col_idx in (0, 1, 2):
        cells = [board[row_idx][col_idx] for row_idx in (0, 1, 2)]
        if cells[0] == cells[1] == cells[2]:
            if cells[0] != " ":
                return cells[0]
        
    # check diagonals:
    if board[0][0] == board[1][1] == board[2][2]:
        if board[0][0] != " ":
            return board[0][0]
    
    if board[2][0] == board[1][1] == board[0][2]:
        if board[2][0] != " ":
            return board[2][0]
        
    return None    
assert check_winner(["   ", "   ", "   "]) is None

assert check_winner(["XXX", "   ", "   "]) == "X"
assert check_winner(["   ", "XXX", "   "]) == "X"
assert check_winner(["   ", "   ", "XXX"]) == "X"

assert check_winner(["X  ", "X  ", "X  "]) == "X"
assert check_winner([" X ", " X ", " X "]) == "X"
assert check_winner(["  X", "  X", "  X"]) == "X"

assert check_winner(["X  ", " X ", "  X"]) == "X"
assert check_winner(["  X", " X ", "X  "]) == "X"

assert check_winner(["Y X", "YX ", "X Y"]) == "X"
def main():

    board = init_board()
    print_board(board)
    
    free_cells = 9
    
    while True:
       
        for symbol in "XY":
            
            abort = place_symbol(board, symbol)
            if abort: 
                return
            
            free_cells -= 1
            if free_cells == 0:
                return
            
            print_board(board)
            winner = check_winner(board)
            if winner is not None:
                print(winner, "wins")
                return
        
main()
  | 1 2 3
---------
A | . . . 
B | . . . 
C | . . . 
player X please provide position [A-C1-3, x to quit]: x

Let the computer learn how to play the game by playing many random games

As the number of possible board combinations is in the range of thousands we can use a brute force approach to let the computer learn how to play the game.

The following implementation is based on the idea from https://makezine.com/2009/11/02/mechanical-tic-tac-toe-computer/

The cocept is that we let the computer play against himself doing random moves until X or Y wins and we record if the boards which led to this result where benefitial for X or for Y.

The goal is to setup a mapping board" -> (score_x, score_y).

So for a given board, this mapping gives us numbers score_x and score_y. The higher the score_x the better is the prospective for X to win, the higher score_y, the better the board is for Y to win.

We start with scores (0, 0) for all possible boards.

One random play updates scores as follows:

  • random player begins and makes a random move
  • other player makes random move
  • iterate until X or Y wins or until tie.
  • if X wins we go through all boards played in this game and increment the x score for the boards by 1
  • if Y wins we go through all boards played in this game and increment the y score for the boards by 1

Later we can use the mapping board -> (score_x, score_y) to choose a good move:

Lets say it's X turn. The strategy is to iterate over all possible "subsequent boards" by testing X at every empty position. We choose the board with the highest score for X.

def key(board):
    # list of lists to tuple of tuples, so we can use board
    # as a key in a dictinary
    return tuple(tuple(cell for cell in row) for row in board)

Slightly modified version to ease debugging:

def print_board(board, indent=""):
    for i, row in enumerate(board):
        print(indent, end="")
        for cell in row:
            if cell == " ":
                cell = "."
            print(cell, end=" ")
        print()
    print()
import random


def place_random(board, player):
    free = [(i, j) for j in range(3) for i in range(3) if board[i][j] == " "]
    i, j = random.choice(free)
    board[i][j] = player


def update_score_mapping(winner, boards_played, score_mapping):
    for k in boards_played:
        score_x, score_y = score_mapping.get(k, (0, 0))
        if winner == 'Y':
            score_y += 1
            score_x -= 1
        else:
            score_x += 1
            score_y -= 1
        score_mapping[k] = (score_x, score_y)

        
def random_play(score_mapping):
    board = init_board()
    if random.random() > 0.5:
        choices = 'XY'
    else:
        choices = 'YX'
 
    boards_played = []
    for i in range(9):
        player = choices[i % 2]
        place_random(board, player)
        boards_played.append(key(board))
    
        winner = check_winner(board)
        if winner is not None:
            update_score_mapping(winner, boards_played, score_mapping)    
            return

        
def learn(n=10000):
    score_mapping = {}
    for _ in range(n):
        random_play(score_mapping)   
    return score_mapping
# this runs for a long time

score_mapping = learn(700000)

Now an example how we can use multiple cores to compute the score_mapping for many simulations in parallel:

# this runs on my machine (i7, 8 cores) in about 50 seconds:

import multiprocessing
import time

print('start learning')
started = time.time()

n_cores = 7
n_learn_per_core = 300000

pool = multiprocessing.Pool(n_cores)  

# we call learn(n_learn_per_core) "n_cores" times in parallel:
score_mappings = pool.map(learn, (n_learn_per_core,) * n_cores)

# merge "n_cores" different score mappings to single mapping:
score_mapping = score_mappings[0]
for sm in score_mappings[1:]:
    for k, value in sm.items():
        sx, sy = score_mapping.get(k, (0, 0)) 
        sx += value[0]
        sy += value[1]
        score_mapping[k] = (sx, sy)
        
needed = time.time() - started
minutes, seconds = divmod(needed, 60)
print("needed {} minutes and {:.1f} seconds".format(minutes, seconds))
start learning
needed 0.0 minutes and 49.1 seconds
# this runs a while:
# score_mapping = learn(1000000)

# print a view boards + scores:
for board in list(score_mapping.keys())[:5]:
    score_x, score_y = score_mapping[board]
    print("score_x = {}, score_y = {}".format(score_x, score_y))
    print_board(board)
score_x = 40267, score_y = -40267
. . . 
. . . 
X . . 

score_x = 184, score_y = -184
. . Y 
. . . 
X . . 

score_x = 774, score_y = -774
. . Y 
. X . 
X . . 

score_x = 348, score_y = -348
. . Y 
. X . 
X Y . 

score_x = 520, score_y = -520
X . Y 
. X . 
X Y . 

def copy(board):
    return [row[:] for row in board]
 
    
def best_next(score_mapping, start_board, player, first=False):
    free = [(i, j) for j in range(3) for i in range(3) if start_board[i][j] == " "]
    if first:
        # disallow center move for first move
        free.remove((1, 1))
    best_choice = None
    best_score = -999999999
    DEBUG = False
    
    # we shuffle to randomize best move if multiple optimal boards with
    # same score exist:
    random.shuffle(free)
    
    for i, j in free:
        # we create a copy, so we don't need to undo symbol placements
        # when iterating over possible empty positions
        board = copy(start_board)
        board[i][j] = player
        winner = check_winner(board)
        if winner == player:
            return board
        
        if DEBUG:
            print('check board')
            print_board(board, indent="  ")
        k = key(board)
        if player == 'X':
            score_x, __ = score_mapping.get(k, (0, 0))
            if DEBUG: print('  score X', score_x)
            if score_x  > best_score:
                best_score = score_x
                best_choice = board
        else: 
            __, score_y = score_mapping.get(k, (0, 0))
            if DEBUG: print('  score Y', score_y)
            if score_y  > best_score:
                best_score = score_y
                best_choice = board
        if DEBUG: print()
    return best_choice
    
    
def play_computer_vs_computer(score_mapping):
    
    board = init_board()
    i = random.randint(0, 2)
    j = random.randint(0, 2)
    board[i][j] = 'X'
    # board = best_next(score_mapping, board, 'X', first=True)
    print_board(board)

    for _ in range(4):

        board = best_next(score_mapping, board, 'Y')
        print_board(board)
        winner = check_winner(board)
        if winner is not None:
            print(winner, 'wins')
            break
        board = best_next(score_mapping, board, 'X')
        print_board(board)
        winner = check_winner(board)
        if winner is not None:
            print(winner, 'wins')
            break 
    else:
        print("it's a tie")
play_computer_vs_computer(score_mapping)
. . X 
. . . 
. . . 

. . X 
. Y . 
. . . 

. . X 
. Y . 
. . X 

. . X 
. Y Y 
. . X 

. . X 
X Y Y 
. . X 

. . X 
X Y Y 
. Y X 

. X X 
X Y Y 
. Y X 

Y X X 
X Y Y 
. Y X 

Y X X 
X Y Y 
X Y X 

it's a tie
def human_vs_computer():

    board = init_board()
    print_board(board)
    
    free_cells = 9
    
    while True:
       
        abort = place_symbol(board, "X")
        if abort: 
             return
            
        free_cells -= 1
        if free_cells == 0:
            return
            
        print_board(board)
        winner = check_winner(board)
        if winner is not None:
            print(winner, "wins")
            return
        
        board = best_next(score_mapping, board, 'Y')
        print_board(board)
        winner = check_winner(board)
        if winner is not None:
            print(winner, "wins")
            return
        
human_vs_computer()
. . . 
. . . 
. . . 

player X please provide position [A-C1-3, x to quit]: B2
. . . 
. X . 
. . . 

. . Y 
. X . 
. . . 

player X please provide position [A-C1-3, x to quit]: C2
. . Y 
. X . 
. X . 

. Y Y 
. X . 
. X . 

player X please provide position [A-C1-3, x to quit]: A1
X Y Y 
. X . 
. X . 

X Y Y 
. X . 
. X Y 

player X please provide position [A-C1-3, x to quit]: B1
X Y Y 
X X . 
. X Y 

X Y Y 
X X Y 
. X Y 

Y wins