haspirater

detect aspirated 'h' in French words (local mirror of https://gitlab.com/a3nm/haspirater)
git clone https://a3nm.net/git/haspirater/
Log | Files | Refs | README | LICENSE

buildtrie.py (1212B)


      1 #!/usr/bin/env python3
      2 
      3 """From a list of values (arbitrary) and keys (words), create a trie
      4 representing this mapping"""
      5 
      6 import json
      7 import sys
      8 
      9 # first item is a dictionnary from values to an int indicating the
     10 # number of occurrences with this prefix having this value
     11 # second item is a dictionnary from letters to descendent nodes
     12 def empty_node():
     13   return [{}, {}]
     14 
     15 def insert(trie, key, val):
     16   """Insert val for key in trie"""
     17   values, children = trie
     18   # create a new value, if needed
     19   if val not in values.keys():
     20     values[val] = 0
     21   # increment count for val
     22   values[val] += 1
     23   if len(key) > 0:
     24     # create a new node if needed
     25     if key[0] not in children.keys():
     26       children[key[0]] = empty_node()
     27     # recurse
     28     return insert(children[key[0]], key[1:], val)
     29 
     30 if __name__ == '__main__':
     31   trie = empty_node()
     32 
     33   for line in sys.stdin.readlines():
     34     line = line.split()
     35     value = line[0]
     36     word = line[1].lower() if len(line) == 2 else ''
     37     # a trailing space is used to mark termination of the word
     38     # this is useful in cases where a prefix of a word is a complete,
     39     # different word with a different value
     40     insert(trie, word+' ', value)
     41 
     42   print(json.dumps(trie))
     43