get_votes.py (2417B)
1 #!/usr/bin/python3 -u 2 # Get all votes of a user 3 4 from common import HEADERS, url2res 5 from bs4 import BeautifulSoup 6 import requests 7 import sys 8 import time 9 10 VOTE_URL = 'https://www.republique-numerique.fr/profile/%s/votes' 11 12 KEYS = { 13 'success': 1, 14 'warning': 0, 15 'danger': -1, 16 } 17 18 if __name__ == '__main__': 19 try: 20 users = sys.argv[1:] 21 except IndexError: 22 print("Usage: %s USER...\n" 23 "Returns all votes of each USER" % 24 sys.argv[0], file=sys.stderr) 25 sys.exit(1) 26 27 for user in users: 28 url = VOTE_URL % user 29 data = requests.get(url, headers=HEADERS) 30 time.sleep(1) 31 votes_tree = BeautifulSoup(data.text, 'html.parser') 32 33 seen = set() 34 35 # redo all votes in chronological order 36 for div in votes_tree.find_all('div'): 37 # see get_propositions.py for why the complicated mess below is used 38 try: 39 c = div.get("class") 40 except KeyError: 41 continue 42 if isinstance(c, list): 43 c = c[0] 44 if c != 'opinion__data': 45 continue 46 res_url = None 47 res_title = None 48 for a in div.find_all('a'): 49 v = a.get('href') 50 if not v: 51 continue 52 if v.startswith('/consultations'): 53 res_url = a.get('href') 54 res_title = a.string 55 break 56 raw_v = None 57 for span in div.find_all('span'): 58 try: 59 c = span.get("class") 60 except KeyError: 61 continue 62 if isinstance(c, list): 63 c = c[0] 64 if (c != 'label' and c != 'label-success' 65 and c != "label label-success"): 66 continue 67 raw_v = span.get('class') 68 break 69 v = KEYS[raw_v[1].split('-')[1]] 70 71 res = url2res(res_url, res_title) 72 73 if res: 74 if res in seen: 75 print("warning: duplicate entry for %s" % res, 76 file=sys.stderr) 77 print("this may indicate a problem with the scraping", 78 file=sys.stderr) 79 seen.add(res) 80 print ("%s %s" % (res, v)) 81