diff --git a/prefix_tree/fruits.txt b/prefix_tree/fruits.txt new file mode 100644 index 0000000..5477b84 --- /dev/null +++ b/prefix_tree/fruits.txt @@ -0,0 +1,63 @@ +apple +apricot +avocado +banana +berry +blackberry +blood orange +blueberry +boysenberry +breadfruit +cantaloupe +cherry +citron +citrus +coconut +crabapple +cranberry +current +date +dragon fruit +durian +elderberry +fig +grape +grapefruit +guava +honeydew +jackfruit +kiwi +kumquat +lemon +lime +lingonberry +loquat +lychee +mandarin orange +mango +marionberry +melon +mulberry +nectarine +orange +papaya +passion fruit +peach +pear +persimmon +pineapple +plantain +plum +pluot +pomegranate +pomelo +prune +quince +raisin +raspberry +star fruit +strawberry +tangelo +tangerine +ugli fruit +watermelon diff --git a/prefix_tree/prefix_tree.py b/prefix_tree/prefix_tree.py new file mode 100644 index 0000000..b3fb560 --- /dev/null +++ b/prefix_tree/prefix_tree.py @@ -0,0 +1,58 @@ +# Example of a prefix tree implementation in python +# Trie data-structure implementation +# https://en.wikipedia.org/wiki/Trie +# original date: 2021-12-03 + +import fileinput + +def load_from_stdin(): + inp = fileinput.input() + words = [] + for line in inp: + word = line.strip() + # non case sensitive + words.append(word.lower()) + return words + +# create the trie data structure +def build_trie(words): + if words == []: + return {} + nodes = {} + for w in words: + if w[0] not in nodes: + nodes[w[0]] = [] + if len(w[1:]) > 0: + nodes[w[0]].append(w[1:]) + for letter in nodes: + nodes[letter] = build_trie(nodes[letter]) + return nodes + +def get_sub_tree(nodes, pattern): + if pattern == '': return nodes + if pattern[0] not in nodes: return {} + return get_sub_tree(nodes[pattern[0]], pattern[1:]) + +def get_strings(nodes): + words = [] + for k in nodes.keys(): + if nodes[k] == {}: + words += [ k ] + else: + res = get_strings(nodes[k]) + words += [ k + x for x in res] + return words + +def search_in_trie(nodes, pattern): + nodes = get_sub_tree(nodes, pattern) + words = get_strings(nodes) + return [ pattern + x for x in words ] + +def main(): + words = load_from_stdin() + res = build_trie(words) + # test matching prefix "ma" + sugg = search_in_trie(res, 'ap') + print(sugg) + +main()