haspirater

detect aspirated 'h' in French words (local mirror of https://gitlab.com/a3nm/haspirater)
git clone https://a3nm.net/git/haspirater/
Log | Files | Refs | README

majoritytrie.py (879B)


      1 #!/usr/bin/env python3
      2 
      3 """Read json trie in stdin, keep majority value at each node, remove
      4 useless leaf nodes and output trie to stdout"""
      5 
      6 import json
      7 import sys
      8 
      9 trie = json.load(sys.stdin)
     10 
     11 def get_majority(d):
     12   """What are the most probable values?"""
     13   mx = max(d.values())
     14   return [k for k in d.keys() if d[k] == mx]
     15 
     16 def majority(trie):
     17   """Keep only the most probable values at each node"""
     18   if len(trie[1].keys()) == 0:
     19     # keep all options at leaf nodes
     20     trie[0] = list(trie[0].keys())
     21   else:
     22     trie[0] = get_majority(trie[0])
     23   useless = []
     24   for child in trie[1].keys():
     25     majority(trie[1][child])
     26     # if it is relabeled to our majority value and is a leaf, drop it
     27     if trie[1][child][0] == trie[0] and trie[1][child][1] == {}:
     28       useless.append(child)
     29   for child in useless:
     30     del(trie[1][child])
     31 
     32 majority(trie)
     33 
     34 print(json.dumps(trie))
     35