Difficulty correctly implementing B-trees in Python

93 views Asked by At

I am working on a project and have fleshed out this class but I am not passing my test cases. Here is the implementation of my B-tree class:

class BTree:
    def __init__(self, M: int, L: int):
        self.M = M
        self.L = L
        self.root_addr: Address = DISK.new()
        DISK.write(self.root_addr, BTreeNode(self.root_addr, None, None, True))

    def _find_leaf(self, key: KT) -> Address:
        current_addr = self.root_addr
        while True:
            node = DISK.read(current_addr)
            if node.is_leaf:
                return current_addr

            idx = node.find_idx(key)
            if idx < len(node.keys) and node.keys[idx] == key:
                idx += 1
            current_addr = node.children_addrs[idx]

    def insert(self, key: KT, value: VT) -> None:
        leaf_addr = self._find_leaf(key)
        leaf = DISK.read(leaf_addr)

        leaf.insert_data(key, value)

        if len(leaf.keys) > self.M - 1:
            self._split_node(leaf_addr)

        DISK.write(leaf_addr, leaf)

    def _split_node(self, node_addr: Address) -> None:
        node = DISK.read(node_addr)
        middle_index = len(node.keys) // 2
        middle_key = node.keys[middle_index]

        left_node = BTreeNode(DISK.new(), node.parent_addr, None, node.is_leaf)
        right_node = BTreeNode(DISK.new(), node.parent_addr, None, node.is_leaf)

        left_node.keys = node.keys[:middle_index]
        right_node.keys = node.keys[middle_index + 1:]

        if node.is_leaf:
            left_node.data = node.data[:middle_index]
            right_node.data = node.data[middle_index + 1:]
        else:
            left_node.children_addrs = node.children_addrs[:middle_index + 1]
            right_node.children_addrs = node.children_addrs[middle_index + 1:]

        DISK.write(left_node.my_addr, left_node)
        DISK.write(right_node.my_addr, right_node)

        if node.parent_addr is not None:
            parent = DISK.read(node.parent_addr)
            insert_index = parent.find_idx(middle_key)

            parent.keys.insert(insert_index, middle_key)
            parent.children_addrs[insert_index] = left_node.my_addr
            parent.children_addrs.insert(insert_index + 1, right_node.my_addr)

            DISK.write(node.parent_addr, parent)
        else:
            new_root = BTreeNode(DISK.new(), None, None, False)
            new_root.keys = [middle_key]
            new_root.children_addrs = [left_node.my_addr, right_node.my_addr]
            self.root_addr = new_root.my_addr

            DISK.write(new_root.my_addr, new_root)

    def find(self, key: KT) -> Optional[VT]:
        current_addr = self.root_addr

        while True:
            node = DISK.read(current_addr)

            idx = node.find_idx(key)

            if node.is_leaf:
                if idx < len(node.keys) and node.keys[idx] == key:
                    return node.data[idx]
                return None
            else:
                if idx < len(node.keys) and node.keys[idx] == key:
                    idx += 1
                current_addr = node.children_addrs[idx]

Here is the tests that are failing:

def btree_properties_recurse(root_node_addr, node, M, L):

    assert sorted(node.keys) == node.keys # Keys should remain sorted so that a binary search is possible

    if node.is_leaf:
        # Leaf node general properties
        assert len(node.children_addrs) == 0
        assert len(node.keys) == len(node.data)
        assert len(node.data) <= L
    else:
        # Non-leaf node general properties
        assert len(node.data) == 0
        assert len(node.keys) == len(node.children_addrs) - 1
        assert len(node.children_addrs) <= M

    if node.my_addr == root_node_addr:
        # Root node properties
        assert node.parent_addr is None
        assert node.index_in_parent is None
        if not node.is_leaf:
            assert len(node.children_addrs) >= 2
    else:
        # Non-root node properties
        assert node.parent_addr is not None
        assert node.index_in_parent is not None
        if node.is_leaf:
            assert len(node.data) >= (L+1)//2
        else:
            assert len(node.children_addrs) >= (M+1)//2
        
    # Run the assertions on all children
    for child_addr in node.children_addrs:
        btree_properties_recurse(root_node_addr, DISK.read(child_addr), M, L)

def test_btree_properties_even() -> None:
    M = 6
    L = 6
    btree = BTree(M, L)
    for i in range(100):
        btree.insert(i, str(i))
    for i in range(0, -100, -1):
        btree.insert(i, str(i))

    root_addr = btree.root_addr
    btree_properties_recurse(root_addr, DISK.read(root_addr), M, L)

The tests are fairly generic and my implementation is not working as intended. I have tried to understand with assert statements that I have since removed. Any guidance is welcome, my tests do run, but the assertions are always wrong. I believe my _split_node function is incorrect.

0

There are 0 answers