republique

helper scripts for www.republique-numerique.fr
git clone https://a3nm.net/git/republique/
Log | Files | Refs | README

commit da6b3b320e64b16294994a78dd86d79201fccc7c
parent cfc5ded7132074be9341112e5b10addf3f7cf17c
Author: Antoine Amarilli <a3nm@a3nm.net>
Date:   Wed, 14 Oct 2015 12:21:35 +0200

Merge branch 'master' of a3nm.net:git/republique

Diffstat:
README | 35+++++++++++++++++++++++++++++------
common.py | 32+++++++++++++++++++++++++++++---
get_propositions.py | 13+++++++++++--
get_votes.py | 20+++++++++++++++++---
vote.py | 27+++++++++++++--------------
5 files changed, 99 insertions(+), 28 deletions(-)

diff --git a/README b/README @@ -1,8 +1,10 @@ Helper scripts for www.republique-numerique.fr -Antoine Amarilli, 2015 +Antoine Amarilli <a3nm AT a3nm DOT net>, 2015 License: see COPYING (MIT license) -Requires Debian packages python3-requests, python3-bs4, python3-lxml +Requires Debian packages python3-requests, python3-bs4, python3-lxml. +Please refer to <http://a3nm.net/blog/republique_numerique.html> for more +information (in French). - get_propositions.py USER returns the list of identifiers of all opinions and modifications of USER @@ -15,8 +17,10 @@ Requires Debian packages python3-requests, python3-bs4, python3-lxml where X is -1, 0, or 1 - vote.py EMAIL PASSWORD - connects to www.republique-numerique.fr local account EMAIL PASSWORD + connects to www.republique-numerique.fr using local account EMAIL PASSWORD and performs votes given on standard input in the form of get_votes.py + (account EMAIL/PASSWORD must have been created on www.republique-numerique.fr + beforehand) - dump_all.sh DEST dump raw JSON of all opinions and versions to DEST/opinions and DEST/versions @@ -32,8 +36,27 @@ Examples: get_propositions.py USER | vote.py YOUREMAIL YOURPASS -WARNING: These scripts are provided to simplify your life only. Please use them -responsibly. The author does not encourage any form of ballot stuffing or other -abusive behavior. +Known limitations: + +- Votes on arguments cannot be reliably distinguished from votes on opinions and + versions. The code tries to check from the title, but it could be wrong. The + code may reject a vote erroneously (so some votes may be missing), or mistake + a vote on an argument for a vote on the corresponding opinion or modification + (in which case a favorable vote on the corresponding item may be erroneously + counted). This could probably be solved reliably using the API, but it is not + documented and I can't figure out how to do. + +- Subject to the limitations above, votes on arguments are discarded. Ideally + one could want to retrieve and redo them as well, but retrieving such votes + and identifying the correct argument would be brittle anyway (it could only + look at the title) +- get_propositions.py and get_votes.py are slow due to the need to perform up to + 2 requests per item. Again, this could probably be avoided with adequate usage + of the undocumented API. + +WARNING: These scripts are provided to simplify your life only. Please use them +responsibly. I do not accept any responsibility for any erroneous votes that +this tool may do on your behalf. I do not encourage any form of ballot stuffing +or other abusive behavior. diff --git a/common.py b/common.py @@ -1,13 +1,20 @@ #!/bin/python3 from bs4 import BeautifulSoup +import json import requests import time HEADERS = { 'User-Agent': 'Mozilla' } +HEADERS_JSON = { + 'Accept': "application/json" + } +HEADERS_JSON.update(HEADERS) + URL = 'https://www.republique-numerique.fr%s' +API_URL = 'https://www.republique-numerique.fr/api/%s' -def url2res(relurl): +def url2res(relurl, res_title=None): """get identifier of URL""" # this sucks but I don't know how else to do it url = URL % relurl @@ -24,7 +31,26 @@ def url2res(relurl): except KeyError: pass if version: - return 'opinions/%s/versions/%s' % (opinion, version) + candidate = 'opinions/%s/versions/%s' % (opinion, version) + else: + candidate = 'opinions/%s' % opinion + if not res_title: + return candidate + # links to arguments have the same href as the ones to the opinions and + # versions themselves, so we need to make sure + check = requests.get(API_URL % candidate, headers=HEADERS_JSON) + time.sleep(1) + check_v = json.loads(check.text) + try: + real_title = check_v['opinion']['title'] + except KeyError: + real_title = check_v['version']['title'] + # be caution because of broken unicode truncation + res_title = res_title[:-2] + if real_title.startswith(res_title): + return candidate else: - return 'opinions/%s' % opinion + # the link was probably to an argument and not to the opinion or version + # itself + return None diff --git a/get_propositions.py b/get_propositions.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/python3 -u # Get opinions and modifications of a user from common import HEADERS, url2res @@ -25,6 +25,8 @@ if __name__ == '__main__': time.sleep(1) tree = BeautifulSoup(data.text, 'html.parser') + seen = set() + # the following does not work on older bs4 versions #for div in tree.find_all('div', class_='opinion__data'): for div in tree.find_all('div'): @@ -45,5 +47,12 @@ if __name__ == '__main__': res_url = a.get('href') break - print ("%s 1" % url2res(res_url)) + res = url2res(res_url) + if res in seen: + print("warning: duplicate entry for %s" % res, + file=sys.stderr) + print("this may indicate a problem with the scraping", + file=sys.stderr) + seen.add(res) + print ("%s 1" % res) diff --git a/get_votes.py b/get_votes.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/python3 -u # Get all votes of a user from common import HEADERS, url2res @@ -30,8 +30,11 @@ if __name__ == '__main__': time.sleep(1) votes_tree = BeautifulSoup(data.text, 'html.parser') - # see get_propositions.py + seen = set() + + # redo all votes in chronological order for div in votes_tree.find_all('div'): + # see get_propositions.py for why the complicated mess below is used try: c = div.get("class") except KeyError: @@ -41,12 +44,14 @@ if __name__ == '__main__': if c != 'opinion__data': continue res_url = None + res_title = None for a in div.find_all('a'): v = a.get('href') if not v: continue if v.startswith('/consultations'): res_url = a.get('href') + res_title = a.string break raw_v = None for span in div.find_all('span'): @@ -54,5 +59,14 @@ if __name__ == '__main__': break v = KEYS[raw_v[1].split('-')[1]] - print ("%s %s" % (url2res(res_url), v)) + res = url2res(res_url, res_title) + + if res: + if res in seen: + print("warning: duplicate entry for %s" % res, + file=sys.stderr) + print("this may indicate a problem with the scraping", + file=sys.stderr) + seen.add(res) + print ("%s %s" % (res, v)) diff --git a/vote.py b/vote.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/python3 -u # Automate tasks with www.republique-numerique.fr # Only to facilitate your life, please use responsibly @@ -7,13 +7,7 @@ import json import requests import sys import time -from common import HEADERS - -HEADERS_JSON = { - 'Accept': "application/json" - } -HEADERS_JSON.update(HEADERS) - +from common import HEADERS, HEADERS_JSON def login(): """return a requests session and API token""" @@ -59,7 +53,9 @@ def login(): try: token = jdata['token'] except KeyError: - print("Could not retrieve API token during login", file=sys.stderr) + print("Could not retrieve API token during login\n" + "Maybe the EMAIL/PASSWORD are invalid or the account doesn't exist?", + file=sys.stderr) sys.exit(2) return s, token @@ -85,11 +81,11 @@ if __name__ == '__main__': user = sys.argv[1] password = sys.argv[2] except IndexError: - print(("Usage: %s EMAIL PASSWORD\n" - "(your EMAIL and PASSWORD" - "with a local republique-numerique.fr account)\n" - "Performs votes given on stdin, see README") % - sys.argv[0], file=sys.stderr) + print("""Usage: %s EMAIL PASSWORD +Vote as indicated on stdin with republique-numerique.fr account EMAIL/PASSWORD +See README for vote format and details +You must register local account EMAIL/PASSWORD on republique-numerique.fr first""" + % sys.argv[0], file=sys.stderr) sys.exit(1) s, token = login() @@ -100,4 +96,7 @@ if __name__ == '__main__': if v != requests.codes.no_content: print("Vote for %s failed with status code %d" % (f[0], v), file=sys.stderr) + else: + print("Successfully voted %s for %s" % (f[1], f[0]), + file=sys.stderr)