Hexapawn AI Transposition Table not working correctly

30 views Asked by At

The code below is a python implementation of the game Hexapawn, but on a 6x6 board. I originally wrote the program in c++, but converted it to python so I could post it here, and have an easier time testing the logic. I encountered a weird issue while testing this code, and that is the fact that the entry depth has to be equal to the current depth for the transposition table to work correctly. From the Wikipedia pseudocode (https://en.wikipedia.org/wiki/Negamax#Negamax_with_alpha_beta_pruning_and_transposition_tables), it is clear that this is not how transposition tables should work.

In the code on wikipedia, the depth check is as follows:

if ttEntry is valid and ttEntry.depth ≥ depth then

Not what I have, which is:

if (entry.depth == depth) {

Here is the full code if you would like to test it. It does take several seconds to run with a depth of 22, and if you would like it to run faster for testing, you can lower it.

import time
from copy import deepcopy

class HexapawnBitboard:
    def __init__(self):
        self.white_pawns = 0b000000000000000000000000001111110000
        self.black_pawns = 0b111000000100000011000000000000000000

    def set_bit(self, square, player):
        if player == 1:
            self.white_pawns |= (1 << square)
        elif player == -1:
            self.black_pawns |= (1 << square)

    def clear_bit(self, square, player):
        if player == 1:
            self.white_pawns &= ~(1 << square)
        elif player == -1:
            self.black_pawns &= ~(1 << square)

    def get_bit(self, square, player):
        if player == 1:
            return (self.white_pawns >> square) & 1
        elif player == -1:
            return (self.black_pawns >> square) & 1
        else:
            return 0

    def unoccupied(self, square):
        return ((self.white_pawns | self.black_pawns) & (1 << square)) == 0


BoardSize = 6
squarenum = BoardSize ** 2
squarenum_minus_6 = squarenum - 6


def get_possible_moves(gameboard, player):
    possible_moves = []
    forward = player
    opponent = -player

    for square in range(squarenum):
        if gameboard.get_bit(square, player) == 1:
            y = square // BoardSize
            x = square % BoardSize

            x_minus_one = x - 1
            x_plus_one = x + 1
            y_plus_forward = y + forward
            y_plus_forward_times_six = y_plus_forward * BoardSize

            target_square = y_plus_forward_times_six + x_minus_one
            if 0 <= x_minus_one < BoardSize and 0 <= y_plus_forward < BoardSize and gameboard.get_bit(
                target_square, opponent
            ) == 1:
                possible_moves.append((square, target_square))

            target_square = y_plus_forward_times_six + x_plus_one
            if 0 <= x_plus_one < BoardSize and 0 <= y_plus_forward < BoardSize and gameboard.get_bit(
                target_square, opponent
            ) == 1:
                possible_moves.append((square, target_square))

            target_square = y_plus_forward_times_six + x
            if 0 <= y_plus_forward < BoardSize and gameboard.unoccupied(target_square):
                possible_moves.append((square, target_square))

    return possible_moves


def check_win(gameboard, player):
    start_square = squarenum_minus_6 if player == 1 else 0
    end_square = squarenum if player == 1 else BoardSize

    for square in range(start_square, end_square):
        if gameboard.get_bit(square, player) == 1:
            return True

    possible_moves = get_possible_moves(gameboard, -player)
    return not possible_moves


def check_win_negamax(gameboard, player, possible_moves):
    start_square = squarenum_minus_6 if player == 1 else 0
    end_square = squarenum if player == 1 else BoardSize

    for square in range(start_square, end_square):
        if gameboard.get_bit(square, player) == 1:
            return True

    return not possible_moves


def get_user_input():
    user_input = input("Enter a move: ")
    xy_coords = tuple(map(int, user_input.split()))
    return xy_coords


def make_move(gameboard, start_square, target_square, player):
    opponent = -player
    if gameboard.get_bit(target_square, opponent) == 1:
        gameboard.clear_bit(target_square, opponent)

    gameboard.set_bit(target_square, player)
    gameboard.clear_bit(start_square, player)


class TTEntry:
    def __init__(self, score, depth, upperbound, lowerbound):
        self.score = score
        self.depth = depth
        self.upperbound = upperbound
        self.lowerbound = lowerbound


class TranspositionTable(dict):
    pass


def combine(white_pawns, black_pawns):
    return (white_pawns << 64) | black_pawns


def negamax(gameboard, player, depth, alpha, beta, TT):
    alpha_orig = alpha
    key = combine(gameboard.white_pawns, gameboard.black_pawns)

    if key in TT:
        entry = TT[key]
        entry_score = entry.score

        if entry.depth == depth:
            if entry.lowerbound:
                alpha = max(alpha, entry_score)
            elif entry.upperbound:
                beta = min(beta, entry_score)
            else:
                return entry_score

            if alpha >= beta:
                return entry_score

    opponent = -player
    possible_moves = get_possible_moves(gameboard, player)

    if check_win_negamax(gameboard, opponent, possible_moves):
        return -(depth + 1)

    if depth == 0:
        return 0

    best_score = -10000

    next_depth = depth - 1
    first_node = True

    for move in possible_moves:
        board_copy = deepcopy(gameboard)
        make_move(board_copy, move[0], move[1], player)

        if first_node:
            score = -negamax(board_copy, opponent, next_depth, -beta, -alpha, TT)
            first_node = False
        else:
            score = -negamax(board_copy, opponent, next_depth, -alpha - 1, -alpha, TT)
            if alpha < score < beta:
                score = -negamax(board_copy, opponent, next_depth, -beta, -alpha, TT)

        if score > best_score:
            best_score = score

        alpha = max(alpha, score)

        if alpha >= beta:
            break

    upperbound, lowerbound = False, False

    if best_score <= alpha_orig:
        upperbound = True
    elif best_score >= beta:
        lowerbound = True

    TT[key] = TTEntry(best_score, depth, upperbound, lowerbound)

    return best_score


def solve(gameboard, player, depth):
    best_move = None
    best_score = -10000
    alpha = -10000
    beta = 10000

    TT = TranspositionTable()

    possible_moves = get_possible_moves(gameboard, player)
    opponent = -player
    next_depth = depth - 1
    first_node = True

    for move in possible_moves:
        board_copy = deepcopy(gameboard)
        make_move(board_copy, move[0], move[1], player)

        if first_node:
            score = -negamax(board_copy, opponent, next_depth, -beta, -alpha, TT)
            first_node = False
        else:
            score = -negamax(board_copy, opponent, next_depth, -alpha - 1, -alpha, TT)
            if alpha < score < beta:
                score = -negamax(board_copy, opponent, next_depth, -beta, -alpha, TT)

        if score > best_score:
            best_score = score
            best_move = move
        
        print(f"{move}    Score: {score}    Alpha: {alpha}")

    TT.clear()

    return best_move, best_score


def display_board(gameboard):
    for y in range(BoardSize):
        for x in range(BoardSize):
            square = y * BoardSize + x
            if gameboard.get_bit(square, 1) == 1:
                symbol = 'A'
            elif gameboard.get_bit(square, -1) == 1:
                symbol = 'P'
            else:
                symbol = '-'
            print(symbol, end=' ')
        print()


if __name__ == "__main__":
    gameboard = HexapawnBitboard()
    current_player = 1

    display_board(gameboard)

    while True:
        if current_player == -1:
            move = get_user_input()
            make_move(gameboard, move[0], move[1], -1)
            display_board(gameboard)
            if check_win(gameboard, -1):
                print("Player 2 wins!")
                break
            current_player = 1

        if current_player == 1:
            start_time = time.time()
            ai_solution = solve(gameboard, 1, 22)
            ai_move = ai_solution[0]
            make_move(gameboard, ai_move[0], ai_move[1], 1)
            print()
            display_board(gameboard)
            print("Evaluation:", ai_solution[1])
            end_time = time.time()
            print(f"AI move took {end_time - start_time} seconds to calculate.\n")
            if check_win(gameboard, 1):
                print("Player 1 wins!")
                break
            current_player = -1

Any help possible is greatly appreciated. I would love to know why my code isn't working as expected even though it is modeled off the Wikipedia pseudocode.

0

There are 0 answers