buildtrie.py (1233B)
1 #!/usr/bin/python3 -O 2 3 """From a list of values (arbitrary) and keys (words), create a trie 4 representing this mapping""" 5 6 import json 7 import sys 8 9 # first item is a dictionnary from values to an int indicating the 10 # number of occurrences with this prefix having this value 11 # second item is a dictionnary from letters to descendent nodes 12 def empty_node(): 13 return [{}, {}] 14 15 trie = empty_node() 16 17 def insert(trie, key, val): 18 """Insert val for key in trie""" 19 values, children = trie 20 # create a new value, if needed 21 if len(key) == 0: 22 if val not in values.keys(): 23 values[val] = 0 24 # increment count for val 25 values[val] += 1 26 if len(key) > 0: 27 # create a new node if needed 28 if key[0] not in children.keys(): 29 children[key[0]] = empty_node() 30 # recurse 31 return insert(children[key[0]], key[1:], val) 32 33 while True: 34 line = sys.stdin.readline() 35 if not line: 36 break 37 line = line.strip().split('\t') 38 # a trailing space is used to mark termination of the word 39 # this is useful in cases where a prefix of a word is a complete, 40 # different word with a different value 41 # two spaces because some data words have multiple spaces 42 insert(trie, line[0]+' ', line[1]) 43 44 print(json.dumps(trie)) 45