get_propositions.py (1969B)
1 #!/usr/bin/python3 -u 2 # Get opinions and modifications of a user 3 4 from common import HEADERS, url2res 5 from bs4 import BeautifulSoup 6 import requests 7 import sys 8 import time 9 10 PROPOSITIONS_URL = 'https://www.republique-numerique.fr/profile/%s/opinions' 11 VERSIONS_URL = 'https://www.republique-numerique.fr/profile/%s/versions' 12 13 if __name__ == '__main__': 14 try: 15 users = sys.argv[1:] 16 except IndexError: 17 print("Usage: %s USER...\n" 18 "Returns all opinions and modifications of each USER" % 19 sys.argv[0], file=sys.stderr) 20 sys.exit(1) 21 22 for user in users: 23 for url in [PROPOSITIONS_URL % user, VERSIONS_URL % user]: 24 data = requests.get(url, headers=HEADERS) 25 time.sleep(1) 26 tree = BeautifulSoup(data.text, 'html.parser') 27 28 seen = set() 29 30 # the following does not work on older bs4 versions 31 #for div in tree.find_all('div', class_='opinion__data'): 32 for div in tree.find_all('div'): 33 try: 34 c = div.get("class") 35 except KeyError: 36 continue 37 if isinstance(c, list): 38 c = c[0] 39 if c != 'opinion__data': 40 continue 41 res_url = None 42 for a in div.find_all('a'): 43 v = a.get('href') 44 if not v: 45 continue 46 if v.startswith('/consultations'): 47 res_url = a.get('href') 48 break 49 50 res = url2res(res_url) 51 if res in seen: 52 print("warning: duplicate entry for %s" % res, 53 file=sys.stderr) 54 print("this may indicate a problem with the scraping", 55 file=sys.stderr) 56 seen.add(res) 57 print ("%s 1" % res) 58