haspirater

detect aspirated 'h' in French words (local mirror of https://gitlab.com/a3nm/haspirater)
git clone https://a3nm.net/git/haspirater/
Log | Files | Refs | README

uptrie.py (1003B)


      1 #!/usr/bin/env python3
      2 
      3 """Read json trie in stdin, make internal node decisions and output json dump to
      4 stdout"""
      5 
      6 import itertools
      7 import operator
      8 import json
      9 import sys
     10 
     11 trie = json.load(sys.stdin)
     12 
     13 def uptrie(trie):
     14   """Make internal node decisions if possible"""
     15   for child in trie[1].values():
     16     uptrie(child)
     17   decided_children = [(list(t[0].items())[0][0], t) for t in trie[1].values() if
     18           len(t[0].keys()) == 1]
     19   dchild_g = {}
     20   for (x, y) in decided_children:
     21       if x not in dchild_g.keys():
     22           dchild_g[x] = []
     23       dchild_g[x].append(y)
     24   sums = [(x, len(y)) for (x, y) in dchild_g.items()]
     25   if len(sums) == 0:
     26     return
     27   best = max(sums, key=operator.itemgetter(1))
     28   if best[1] >= 2:
     29     # compress here
     30     trie.append(best[0])
     31     nchildren = {}
     32     for key, child in trie[1].items():
     33       if len(child[0].keys()) != 1 or list(child[0].items())[0][0] != best[0]:
     34         nchildren[key] = child
     35     trie[1] = nchildren
     36 
     37 uptrie(trie)
     38 
     39 print(json.dumps(trie))
     40