adding immutability to the avl search tree class

97 views Asked by At

I wrote a dictionary class of an avl search tree with values in leaves, but it is necessary to add immutability to it, that is, when deleting and adding anything (whether it is changing an element with an existing key or adding a new element), a new tree should be created based on an existing one. I can't figure out how to write the delete function. It should return a new tree based on the old one, but without the deleted element. I will be very grateful for any help(question of life and death)

class Node:
    def __init__(self,key,value=None,left = None, right = None, inv = False):
        self.key = key
        self.value = value
        if inv:
            left,right = right,left
        if left and right:
            self.height = max(left.height,right.height)+1
        if left:
            self.height = left.height+1
        if right:
            self.height = right.height+1
        else:
            self.height = 1
        self.left = left
        self.right = right

class Dict:
    def __init__(self):
        self.root = None
        self.len = 0

    def __len__(self):
        return self.len

    def __getitem__(self,key):
        if self.root:
            return self.get(self.root,key)
        raise KeyError()

    def __contains__(self,key):
            try:
                self.__getitem__(key)
                return True
            except:
                return False
    def __setitem__(self,key,value):
        if not self.root:
            self.root = Node(key,value)
        else:
            self.root = self.put(self.root,key,value)
        self.len+=1

    def __delitem__(self,key):
        if key not in self:
            raise KeyError()
        else:
            self.root = self.delete(self.root,key)
            self.len-=1

    #def delete(self,tree,key):
        #???

    def height(self,tree): return tree.height if tree else 0

    def children(self,tree, inv): return (tree.right,tree.left) if inv else (tree.left,tree.right)

    def put(self,tree,key,value):
        if not tree.left:
            if key == tree.key:
                self.len-=1
                return Node(key,value,tree.left,tree.right)
            elif key < tree.key:
                return Node(tree.key,tree,Node(key,value),tree)
            else:
                return Node(tree.key, tree, tree, Node(key, value))
        if tree.key == key:
            self.put(tree.value,key,value)
            return tree
        inv = key < tree.key
        left, right = self.children(tree, inv)
        return self.avl(Node(tree.key, tree.value, left, self.put(right, key, value), inv), inv)

    def get(self,tree,key):
        if not tree.left:
            if key != tree.key:
                raise KeyError()
            return tree.value
        if key == tree.key:
            return self.get(tree.value,key)
        return self.get(self.children(tree, key < tree.key)[1], key)

    def avl(self,tree,inv):
        l,r = self.children(tree,inv)
        if self.height(r) - self.height(l) < 2:
            return tree
        rl, rr = self.children(r, inv)
        if self.height(rl) - self.height(rr) == 1:
            r = self.reassoc(r, not inv)
        return self.reassoc(Node(tree.key, tree.value, l, r, inv), inv)

    def reassoc(self, tree, inv):
        l, r = self.children(tree, inv)
        rl, rr = self.children(r, inv)
        return Node(r.key, r.value, Node(tree.key, tree.value, l, rl, inv), rr, inv)

def print_tree(tree, indent = ""):
    if tree.right: 
        print_tree(tree.right, indent + "    ")
    if tree.left == tree.right:
        print(f"{indent}{tree.key} -> {tree.value}")
    else:
        print(f"{indent}{tree.key}")
        if tree.value is None:
            raise ValueError
    if tree.left: 
        print_tree(tree.left, indent + "    ")
t = Dict()
for v, k in enumerate([5,7,2,1,3,6,2,7]):
  t.__setitem__(k, v)
  print_tree(t.root)
  print()
1

There are 1 answers

1
trincot On BEST ANSWER

The put method needs correction:

When a leaf node with the same key is replaced, this new node does not get referenced by a new parent node. If you would get it referenced by a parent node, you would also need to adjust the value references to that leaf.

This means that value reference in internal nodes loses a bit of its usefulness. I would therefore go for another approach where the value attribute of internal nodes will always be None. When the targeted key is found in an internal node, then let the drill down continue in both left and right directions. One of these will successfully find the leaf node with that key, and the other not.

To make sure that nodes are not replaced when they occur in the subtree that does not change -- as this is an unnecessary burden for memory management -- you could write a getNode method that would take the existing node, and the parameters for which you are about to create a new node. It would only create a node when the parameters (key, value, ...etc) are not exactly those of the node that already exists. Otherwise it will return the node as-is.

The delete operation is not too difficult. Since your tree has all the values in its leaves, apply this procedure:

  • Find the leaf with the targeted key (standard procedure)
  • Return None to the parent of this node, to indicate its deletion
  • At the parent we should then take the other (only remaining) child and return that node as replacement for the current parent node.

So, this makes a leaf from a previous parent of (two) leaves.

Another challenge is what to do with a key that is deleted, but still occurs in internal nodes. In principle this is not a problem, but for the put algorithm it is easier if the presence of a key in an internal node is a guarantee that it exists in a leaf too. Surely it could be solved, but I went for an approach where internal keys are replaced when the corresponding key was deleted from the tree. This replacement key could bubble up from recursion too, as it will be the key of the sibling leaf, next to the deleted leaf.

Apply the avl logic as you did with put.

Here is your adapted code. Comments indicate where the most important changes were applied:

class Node:
    def __init__(self,key, value=None, left=None, right=None, inv=False):
        self.key = key
        self.value = value
        self.left, self.right = (right, left) if inv else (left, right)
        self.height = max(left.height if left else 0, right.height if right else 0) + 1

class Dict:
    def __init__(self):
        self.root = None
        self.len = 0

    def __len__(self):
        return self.len

    def __getitem__(self, key):
        if self.root:
            return self.get(self.root, key)
        raise KeyError()

    def __contains__(self, key):
        try:
            self.__getitem__(key)
            return True
        except:
            return False
                
    def __setitem__(self, key, value):
        self.root = self.put(self.root, key, value) if self.root else Node(key, value)
        self.len += 1

    def __delitem__(self, key):
        if key not in self:
            raise KeyError()
        else:
            self.root = self.delete(self.root, key)[0]
            self.len -= 1

    # This method intends to return a new node, except when the current node has exactly
    #       the same attributes: in that case it returns that node without creating a new one
    # Using this is optional, but it can save on the number of nodes that is created
    def getNode(self, tree, key, value, left=None, right=None, inv=False):
        if inv:
            left, right = right, left
        if (tree.key, tree.value, tree.left, tree.right) != (key, value, left, right):
            return Node(key, value, left, right)
        return tree

    # delete: returns the root of the given subtree after deletion, and also a new key that can be 
    #    used in ancestor nodes that happen to have the key that was deleted. This way we guarantee
    #    that keys in internal nodes always correspond to a key that exist in a leaf node.
    def delete(self, tree, key):
        if not tree.left:  # is leaf
            if key == tree.key:
                return None, None  # indicate removal to parent, who will have to pull the sibling up
            return tree, None  # not found here: no removal (node will be found in an alternative path)
        if key == tree.key:
            # we assume this means the key is present as a leaf somewhere: try both sides
            left, leftkey = self.delete(tree.left, key)
            right, rightkey = self.delete(tree.right, key)
            # If direct child was removed, pull up its sibling
            #    and use its key for ancestors that have old key
            if not right:
                return left, left.key
            if not left:
                return right, right.key
            inv = leftkey is None
            if inv:
                left, right = right, left
            treekey = newkey = leftkey
        else:
            inv = key > tree.key
            left, right = self.children(tree, inv)
            left, newkey = self.delete(left, key)
            if not left:  # this child was the node that was deleted
                return right, right.key  # pull up sibling
            treekey = tree.key
        return self.avl(self.getNode(tree, treekey, None, left, right, inv), inv), newkey
            
    def height(self, tree):
        return tree.height if tree else 0

    def children(self, tree, inv):
        return (tree.right, tree.left) if inv else (tree.left, tree.right)

    def put(self, tree, key, value, onlyreplace=False):
        if not tree.left:  # It is a leaf
            if key == tree.key:  # Replacing
                self.len -= 1
                return self.getNode(tree, key, value)
            if onlyreplace:  # Not found
                return tree  # Don't create node
            if key < tree.key:
                return Node(tree.key, None, Node(key, value), tree)
            return Node(tree.key, None, tree, Node(key, value))
        if key == tree.key:
            # we assume this means the key is present as a leaf somewhere
            return self.getNode(tree, key, None, self.put(tree.left, key, value, True),
                                                 self.put(tree.right, key, value, True))
        inv = key < tree.key
        left, right = self.children(tree, inv)
        return self.avl(self.getNode(tree, tree.key, None, left, self.put(right, key, value, onlyreplace), inv), inv)

    def get(self,tree,key):
        if not tree.left:
            if key != tree.key:
                raise KeyError()
            return tree.value
        if key == tree.key:
            # try both sides...
            try:
                return self.get(tree.left, key)
            except:
                return self.get(tree.right, key)
        return self.get(self.children(tree, key < tree.key)[1], key)

    def avl(self,tree,inv):
        l,r = self.children(tree,inv)
        if self.height(r) - self.height(l) < 2:
            return tree
        rl, rr = self.children(r, inv)
        if self.height(rl) - self.height(rr) == 1:
            r = self.reassoc(r, not inv)
        return self.reassoc(Node(tree.key, tree.value, l, r, inv), inv)

    def reassoc(self, tree, inv):
        l, r = self.children(tree, inv)
        rl, rr = self.children(r, inv)
        return Node(r.key, r.value, Node(tree.key, tree.value, l, rl, inv), rr, inv)