frhyme

guess the last phonemes of a French word (local mirror of https://gitlab.com/a3nm/frhyme)
git clone https://a3nm.net/git/frhyme/
Log | Files | Refs | README

buildtrie.py (1233B)


      1 #!/usr/bin/python3 -O
      2 
      3 """From a list of values (arbitrary) and keys (words), create a trie
      4 representing this mapping"""
      5 
      6 import json
      7 import sys
      8 
      9 # first item is a dictionnary from values to an int indicating the
     10 # number of occurrences with this prefix having this value
     11 # second item is a dictionnary from letters to descendent nodes
     12 def empty_node():
     13   return [{}, {}]
     14 
     15 trie = empty_node()
     16 
     17 def insert(trie, key, val):
     18   """Insert val for key in trie"""
     19   values, children = trie
     20   # create a new value, if needed
     21   if len(key) == 0:
     22     if val not in values.keys():
     23       values[val] = 0
     24     # increment count for val
     25     values[val] += 1
     26   if len(key) > 0:
     27     # create a new node if needed
     28     if key[0] not in children.keys():
     29       children[key[0]] = empty_node()
     30     # recurse
     31     return insert(children[key[0]], key[1:], val)
     32 
     33 while True:
     34   line = sys.stdin.readline()
     35   if not line:
     36     break
     37   line = line.strip().split('\t')
     38   # a trailing space is used to mark termination of the word
     39   # this is useful in cases where a prefix of a word is a complete,
     40   # different word with a different value
     41   # two spaces because some data words have multiple spaces
     42   insert(trie, line[0]+'  ', line[1])
     43 
     44 print(json.dumps(trie))
     45