republique

helper scripts for www.republique-numerique.fr
git clone https://a3nm.net/git/republique/
Log | Files | Refs | README

get_votes.py (2417B)


      1 #!/usr/bin/python3 -u
      2 # Get all votes of a user
      3 
      4 from common import HEADERS, url2res
      5 from bs4 import BeautifulSoup
      6 import requests
      7 import sys
      8 import time
      9 
     10 VOTE_URL = 'https://www.republique-numerique.fr/profile/%s/votes'
     11 
     12 KEYS = {
     13     'success': 1,
     14     'warning': 0,
     15     'danger': -1,
     16     }
     17 
     18 if __name__ == '__main__':
     19     try:
     20         users = sys.argv[1:]
     21     except IndexError:
     22         print("Usage: %s USER...\n"
     23                 "Returns all votes of each USER" %
     24                 sys.argv[0], file=sys.stderr)
     25         sys.exit(1)
     26 
     27     for user in users:
     28         url = VOTE_URL % user
     29         data = requests.get(url, headers=HEADERS)
     30         time.sleep(1)
     31         votes_tree = BeautifulSoup(data.text, 'html.parser')
     32 
     33         seen = set()
     34 
     35         # redo all votes in chronological order
     36         for div in votes_tree.find_all('div'):
     37             # see get_propositions.py for why the complicated mess below is used
     38             try:
     39                 c = div.get("class")
     40             except KeyError:
     41                 continue
     42             if isinstance(c, list):
     43                 c = c[0]
     44             if c != 'opinion__data':
     45                 continue
     46             res_url = None
     47             res_title = None
     48             for a in div.find_all('a'):
     49                 v = a.get('href')
     50                 if not v:
     51                     continue
     52                 if v.startswith('/consultations'):
     53                     res_url = a.get('href')
     54                     res_title = a.string
     55                     break
     56             raw_v = None
     57             for span in div.find_all('span'):
     58                 try:
     59                     c = span.get("class")
     60                 except KeyError:
     61                     continue
     62                 if isinstance(c, list):
     63                     c = c[0]
     64                 if (c != 'label' and c != 'label-success'
     65                     and c != "label label-success"):
     66                     continue
     67                 raw_v = span.get('class')
     68                 break
     69             v = KEYS[raw_v[1].split('-')[1]]
     70 
     71             res = url2res(res_url, res_title)
     72 
     73             if res:
     74                 if res in seen:
     75                     print("warning: duplicate entry for %s" % res,
     76                             file=sys.stderr)
     77                     print("this may indicate a problem with the scraping",
     78                             file=sys.stderr)
     79                 seen.add(res)
     80                 print ("%s %s" % (res, v))
     81