B+Tree Insert Implementation (DISK) in Python

158 views Asked by At

Below is my code for the split method of BTree class. Now, when I test the insertion and more than 1 split occurs, it doesn't reflect on the tree structure, although I see the keys list is updated in the parent node when debugging and the splitting is done correctly. As shown in the code, I do write back the changes to the DISK. Any suggestion why it's not working properly?

FYI, the split method is called in the insert_recursive method below:

    def _insert_recursive(self, node_addr: Address, parent_addr: Address, key: KT, value: VT):
        node = get_node(node_addr)
        node.parent_addr = parent_addr

        if node.is_leaf:
            node.insert_data(key, value)
            node.keys, node.data = self._sort_keys_data(node.keys, node.data)

            if len(node.keys) > self.L:
                # If the leaf node overflows, split it
                self._split_leaf_node(node)
            node.write_back()  # Update changes made to the leaf node

        else:
            # Finding the appropriate child node to insert the key
            idx = node.find_idx(key)
            child_addr = node.children_addrs[idx]
            self._insert_recursive(child_addr, node_addr, key, value)
            node.write_back()  # Update changes made to the internal node
    def _split_leaf_node(self, node: BTreeNode) -> None:
        mid = len(node.keys) // 2

        # Create a new leaf node
        new_node = BTreeNode(DISK.new(), node.parent_addr, None, True)
        new_node.keys = node.keys[mid:]
        new_node.data = node.data[mid:]

        # Update the node's keys and data
        node.keys = node.keys[:mid]
        node.data = node.data[:mid]

        # Update the parent's children addresses
        if node.parent_addr is not None:
            parent_node = get_node(node.parent_addr)
            idx = parent_node.children_addrs.index(node.my_addr)
            parent_node.children_addrs.insert(idx + 1, new_node.my_addr)
            parent_node.keys.insert(idx, new_node.keys[0])  # Update the keys in the parent node

            node.index_in_parent = idx
            new_node.index_in_parent = idx + 1
            DISK.write(parent_node.my_addr,parent_node)

            if len(parent_node.keys) > self.M - 1:
                self._split_internal(parent_node, node.parent_addr)

            # Update the index_in_parent of node and new_node in the new parent node
            if parent_node.parent_addr is not None:
                new_parent_node = get_node(parent_node.parent_addr)
                idx = new_parent_node.children_addrs.index(parent_node.my_addr)
                new_parent_node.children_addrs.insert(idx + 1, new_node.my_addr)
                new_parent_node.keys.insert(idx, parent_node.keys[-1])

                node.index_in_parent = idx
                new_node.index_in_parent = idx + 1
                new_parent_node.write_back()
                node.write_back()
                new_node.write_back()

        # Update the root node if necessary
        if node.my_addr == self.root_addr:
            root = BTreeNode(DISK.new(), None, None, False)
            root.keys = [new_node.keys[0]]  # Assign the mid key to the new root
            root.children_addrs = [node.my_addr, new_node.my_addr]  # Children are the split nodes
            node.index_in_parent = root.children_addrs.index(node.my_addr)
            new_node.index_in_parent = root.children_addrs.index(new_node.my_addr)
            node.parent_addr = root.my_addr
            new_node.parent_addr = root.my_addr
            self.root_addr = root.my_addr

            node.write_back()
            new_node.write_back()
            root.write_back()
        else:
            node.write_back()
            new_node.write_back()

I ran the following test which require 2 splits, and the result is only showing the first split.

def test_big_tree():
    M = 3
    L = 3
    btree = BTree(M, L)
    for i in range(6):
        btree.insert(i, str(i))
    for i in range(6):
        assert btree.find(i) == str(i)

enter image description here

0

There are 0 answers