The user provides a text file to be searched and a pattern to search for. The program builds a suffix tree and uses it to find all occurrences of the pattern in text, then prints their indexes.
class Node:
def __init__(self, start, substr):
self.start = start
self.substr = substr
self.branches = {}
def insert_into_tree(subroot, suffix, start):
prefix_len = len(subroot.substr)
new_suffix = str(suffix[prefix_len:])
if(len(subroot.branches) == 0):
left_child = Node(subroot.start, "")
right_child = Node(start, new_suffix)
subroot.branches[""] = left_child
subroot.branches[new_suffix] = right_child
else:
for (substr, node) in subroot.branches.items():
if len(substr) > 0 and new_suffix.startswith(substr):
insert_into_tree(node, new_suffix, start)
break
else:
new_child = Node(start, new_suffix)
subroot.branches[new_suffix] = new_child
def build_suffix_tree(t_str):
len_str = len(t_str)
i = len_str - 1
root = Node(len_str, "")
while i >= 0:
insert_into_tree(root, str(t_str[i:]), i)
i -= 1
return root
def find_occurrences(root, pattern):
for (substr, node) in root.branches.items():
if pattern in substr:
print(f"Found at index {node.start}")
find_occurrences(node, pattern)
file_path = input("Provide path to the text file\n>")
file = open(file_path, "r")
text = file.read()
file.close()
pattern = input("Provide the pattern you're searching for\n>")
root = build_suffix_tree(text)
find_occurrences(root, pattern)
My find_occurrences function is supposed to print every index the pattern occurs at. Instead, it prints the position of every index up to and including the last occurrence of the pattern
example:
Provide path to the text file
lorem.txt
Provide the pattern you're searching for
sit
Found at index 0
Found at index 19
Found at index 18
Found at index 17
Found at index 16
Found at index 15
Found at index 14
Found at index 13
Found at index 12
Found at index 11
Found at index 10
Found at index 9
Found at index 8
Found at index 7
Found at index 6
Found at index 5
Found at index 4
Found at index 3
Found at index 2
Found at index 1
Check out pdb - The Python Debugger to debug small programs
python3 -m pdb myscript.pybto set a breakpoint (so you can inspect your program there)cto run up to that point (continue)?to explore commands .. this will allow you to inspect the live state of your program wherever you breakpoint or continue to