republique

helper scripts for www.republique-numerique.fr
git clone https://a3nm.net/git/republique/
Log | Files | Refs | README

stdump.py (2227B)


      1 #!/usr/bin/python3
      2 
      3 """dump all opinions, versions, and arguments to TSV"""
      4 
      5 import html
      6 from itertools import chain
      7 from json import load, dumps
      8 from os import listdir
      9 from os.path import isfile, join
     10 import sys
     11 
     12 def getAuthor(j):
     13     pref = "https://www.republique-numerique.fr/profile/user/"
     14     pref2 = "https://www.republique-numerique.fr/profile/"
     15     s = j['_links']['profile']
     16     if not (s.startswith(pref)):
     17         assert (s.startswith(pref2))
     18         return s[len(pref2):]
     19     return s[len(pref):]
     20 
     21 def opinionId(j, i=None):
     22     return 'opinion/%d' % j['id']
     23 
     24 def argumentId(j, i):
     25     return (i + '/argument/%d') % j['id']
     26 
     27 def versionId(j, i=None):
     28     return 'opinion/%d/version/%d' % (j['parent']['id'], j['id'])
     29 
     30 def parse(j, i, key, idF):
     31     try:
     32         j = j[key]
     33     except KeyError:
     34         pass
     35     nid = idF(j, i)
     36     author = getAuthor(j['author'])
     37     try:
     38         votes_ok = j['votes_ok']
     39         votes_mitige = j['votes_mitige']
     40         votes_nok = j['votes_nok']
     41         votes_total = j['votes_total']
     42     except KeyError:
     43         votes_ok = j['votes_count']
     44         votes_mitige = 0
     45         votes_nok = 0
     46         votes_total = j['votes_count']
     47     url = j['_links']['show']
     48     try:
     49         body = html.unescape(j['title']).split('\n')[0]
     50     except KeyError:
     51         body = html.unescape(j['body']).split('\n')[0]
     52     yield (nid, author, votes_ok, votes_mitige, votes_nok, votes_total,
     53             url, body)
     54     if 'arguments' in j.keys():
     55         for ja in j['arguments']:
     56             for r in parse(ja, nid, 'argument', argumentId):
     57                 yield r
     58 
     59 def iterfiles(fdir, key, idF):
     60     for f in listdir(fdir):
     61         if not isfile(join(fdir, f)):
     62             continue
     63         with open(join(fdir, f)) as fh:
     64             j = load(fh)
     65             try:
     66                 if j['code'] == 404:
     67                     continue
     68             except KeyError:
     69                 pass
     70             for res in parse(j, None, key, idF):
     71                 yield res
     72 
     73 if __name__ == '__main__':
     74     dump = sys.argv[1]
     75     for t in chain(iterfiles(join(dump, 'opinions'), 'opinion', opinionId),
     76             iterfiles(join(dump, 'versions'), 'version', versionId)):
     77         print ("\t".join(str(x) for x in t))
     78