"""
A Trie/Prefix Tree is a kind of search tree used to provide quick lookup
of words/patterns in a set of words. A basic Trie however has O(n^2) space complexity
making it impractical in practice. It however provides O(max(search_string, length of
longest word)) lookup time making it an optimal approach when space is not an issue.
"""
class TrieNode:
def __init__(self) -> None:
self.nodes: dict[str, TrieNode] = {} # Mapping from char to TrieNode
self.is_leaf = False
def insert_many(self, words: list[str]) -> None:
"""
Inserts a list of words into the Trie
:param words: list of string words
:return: None
"""
for word in words:
self.insert(word)
def insert(self, word: str) -> None:
"""
Inserts a word into the Trie
:param word: word to be inserted
:return: None
"""
curr = self
for char in word:
if char not in curr.nodes:
curr.nodes[char] = TrieNode()
curr = curr.nodes[char]
curr.is_leaf = True
def find(self, word: str) -> bool:
"""
Tries to find word in a Trie
:param word: word to look for
:return: Returns True if word is found, False otherwise
"""
curr = self
for char in word:
if char not in curr.nodes:
return False
curr = curr.nodes[char]
return curr.is_leaf
def delete(self, word: str) -> None:
"""
Deletes a word in a Trie
:param word: word to delete
:return: None
"""
def _delete(curr: TrieNode, word: str, index: int) -> bool:
if index == len(word):
# If word does not exist
if not curr.is_leaf:
return False
curr.is_leaf = False
return len(curr.nodes) == 0
char = word[index]
char_node = curr.nodes.get(char)
# If char not in current trie node
if not char_node:
return False
# Flag to check if node can be deleted
delete_curr = _delete(char_node, word, index + 1)
if delete_curr:
del curr.nodes[char]
return len(curr.nodes) == 0
return delete_curr
_delete(self, word, 0)
def print_words(node: TrieNode, word: str) -> None:
"""
Prints all the words in a Trie
:param node: root node of Trie
:param word: Word variable should be empty at start
:return: None
"""
if node.is_leaf:
print(word, end=" ")
for key, value in node.nodes.items():
print_words(value, word + key)
def test_trie() -> bool:
words = "banana bananas bandana band apple all beast".split()
root = TrieNode()
root.insert_many(words)
# print_words(root, "")
assert all(root.find(word) for word in words)
assert root.find("banana")
assert not root.find("bandanas")
assert not root.find("apps")
assert root.find("apple")
assert root.find("all")
root.delete("all")
assert not root.find("all")
root.delete("banana")
assert not root.find("banana")
assert root.find("bananas")
return True
def print_results(msg: str, passes: bool) -> None:
print(str(msg), "works!" if passes else "doesn't work :(")
def pytests() -> None:
assert test_trie()
def main() -> None:
"""
>>> pytests()
"""
print_results("Testing trie functionality", test_trie())
if __name__ == "__main__":
main()
Trie树(也称为前缀树)是一种树形数据结构,它显示了顺序,将父节点链接到子节点。它是存储具有共性的对象的有效方法。一个很好的例子是在存储电话号码或一般字符串时。
对于字符串示例,假设我们有一系列字符串需要存储在我们的数据存储中
并且我们需要支持的方法之一是对任何单词进行搜索操作,我们可以采用基本方法 - 选择每个单词,并进行字符串比较,逐个字母匹配。算法如下所示
## searching for ear in data store
data_store = ["egg", "eat", "ear", "end"]
to_find = "ear"
## pick each word
## do a string match letter by letter
## when you find a mismatch, move to the next string
## continue this process
## if at the end of an iteration, index has been increased to
## the length of the word to find, we have found a match
for word in data_store:
index = 0
while index < len(word):
if to_find[index] != word[index]:
break
index += 1
if index == len(to_find):
print("a match has been found")
毫无疑问,这种策略可以奏效,但执行此操作的时间复杂度为O(单词数量 x 最长单词长度),这相当昂贵。但是,如果我们将数字的存储表示为一棵树,使得每个字母在树的特定级别中仅出现一次,则可以实现更好的搜索时间。例如,请参见下面的树
e
/ | \
a n g
/ \ | |
r t d g
从上面的表示中可以看出,所有单词都在树中,从字母e开始,它位于所有单词的开头,然后是a、n和g出现在下一层,依此类推……上面的表示称为Trie树。
要开始构建 Trie 树,首先需要定义一个节点,其中包含任何 Trie 树所需的相关属性。
class Node:
def __init__(self, is_word: bool=False):
self.is_word = is_word
self.children = {}
在这里,您可以看到类Node
具有三个实例属性
然后通过为每个字母创建一个节点并将其作为子节点添加到其前面的节点来构建 Trie 树
首先初始化一个空节点
class Trie:
def __init__(self):
self.node = Node()
对于插入操作,获取起始节点,然后对于单词中的每个字母,将其添加到其前面字母的子节点中。最终节点的is_word
属性标记为True,因为我们希望知道单词在哪里结束
def insert(self, word: str) -> None:
node = self.node
for ltr in word:
if ltr not in node.children:
node.children[ltr] = Node()
node = node.children[ltr]
node.is_word=True
在上面的代码中,node
变量首先保存对空节点的引用,而ltr
迭代变量首先保存word
中的第一个字母。这将确保node
比ltr
提前一级。当它们在迭代中都向前移动时,node
将始终保持比ltr
提前一级
对于搜索操作,获取起始节点,然后对于单词中的每个字母,检查它是否出现在当前节点的children
属性中。只要存在,就对下一个字母和下一个节点重复此操作。如果在搜索过程中,我们发现一个不存在的字母,则该单词不存在于 Trie 树中。如果我们成功到达迭代的末尾,那么我们就找到了我们正在寻找的内容。现在是时候返回值了
请查看代码
def search(self, word: str) -> bool:
node = self.node
for ltr in word:
if ltr not in node.children:
return False
node = node.children[ltr]
return node.is_word
对于返回值,有两种情况
node.is_word
,因为我们要确保它实际上是一个单词,而不是一个前缀现在,这是完整的代码
class Node:
def __init__(self, is_word: bool=False):
self.is_word = is_word
self.children = {}
class Trie:
def __init__(self):
self.node = Node()
def insert(self, word: str) -> None:
node = self.node
for ltr in word:
if ltr not in node.children:
node.children[ltr] = Node()
node = node.children[ltr]
node.is_word=True
def search(self, word: str) -> bool:
node = self.node
for ltr in word:
if ltr not in node.children:
return False
node = node.children[ltr]
return node.is_word