commit da6b3b320e64b16294994a78dd86d79201fccc7c
parent cfc5ded7132074be9341112e5b10addf3f7cf17c
Author: Antoine Amarilli <a3nm@a3nm.net>
Date: Wed, 14 Oct 2015 12:21:35 +0200
Merge branch 'master' of a3nm.net:git/republique
Diffstat:
5 files changed, 99 insertions(+), 28 deletions(-)
diff --git a/README b/README
@@ -1,8 +1,10 @@
Helper scripts for www.republique-numerique.fr
-Antoine Amarilli, 2015
+Antoine Amarilli <a3nm AT a3nm DOT net>, 2015
License: see COPYING (MIT license)
-Requires Debian packages python3-requests, python3-bs4, python3-lxml
+Requires Debian packages python3-requests, python3-bs4, python3-lxml.
+Please refer to <http://a3nm.net/blog/republique_numerique.html> for more
+information (in French).
- get_propositions.py USER
returns the list of identifiers of all opinions and modifications of USER
@@ -15,8 +17,10 @@ Requires Debian packages python3-requests, python3-bs4, python3-lxml
where X is -1, 0, or 1
- vote.py EMAIL PASSWORD
- connects to www.republique-numerique.fr local account EMAIL PASSWORD
+ connects to www.republique-numerique.fr using local account EMAIL PASSWORD
and performs votes given on standard input in the form of get_votes.py
+ (account EMAIL/PASSWORD must have been created on www.republique-numerique.fr
+ beforehand)
- dump_all.sh DEST
dump raw JSON of all opinions and versions to DEST/opinions and DEST/versions
@@ -32,8 +36,27 @@ Examples:
get_propositions.py USER | vote.py YOUREMAIL YOURPASS
-WARNING: These scripts are provided to simplify your life only. Please use them
-responsibly. The author does not encourage any form of ballot stuffing or other
-abusive behavior.
+Known limitations:
+
+- Votes on arguments cannot be reliably distinguished from votes on opinions and
+ versions. The code tries to check from the title, but it could be wrong. The
+ code may reject a vote erroneously (so some votes may be missing), or mistake
+ a vote on an argument for a vote on the corresponding opinion or modification
+ (in which case a favorable vote on the corresponding item may be erroneously
+ counted). This could probably be solved reliably using the API, but it is not
+ documented and I can't figure out how to do.
+
+- Subject to the limitations above, votes on arguments are discarded. Ideally
+ one could want to retrieve and redo them as well, but retrieving such votes
+ and identifying the correct argument would be brittle anyway (it could only
+ look at the title)
+- get_propositions.py and get_votes.py are slow due to the need to perform up to
+ 2 requests per item. Again, this could probably be avoided with adequate usage
+ of the undocumented API.
+
+WARNING: These scripts are provided to simplify your life only. Please use them
+responsibly. I do not accept any responsibility for any erroneous votes that
+this tool may do on your behalf. I do not encourage any form of ballot stuffing
+or other abusive behavior.
diff --git a/common.py b/common.py
@@ -1,13 +1,20 @@
#!/bin/python3
from bs4 import BeautifulSoup
+import json
import requests
import time
HEADERS = { 'User-Agent': 'Mozilla' }
+HEADERS_JSON = {
+ 'Accept': "application/json"
+ }
+HEADERS_JSON.update(HEADERS)
+
URL = 'https://www.republique-numerique.fr%s'
+API_URL = 'https://www.republique-numerique.fr/api/%s'
-def url2res(relurl):
+def url2res(relurl, res_title=None):
"""get identifier of URL"""
# this sucks but I don't know how else to do it
url = URL % relurl
@@ -24,7 +31,26 @@ def url2res(relurl):
except KeyError:
pass
if version:
- return 'opinions/%s/versions/%s' % (opinion, version)
+ candidate = 'opinions/%s/versions/%s' % (opinion, version)
+ else:
+ candidate = 'opinions/%s' % opinion
+ if not res_title:
+ return candidate
+ # links to arguments have the same href as the ones to the opinions and
+ # versions themselves, so we need to make sure
+ check = requests.get(API_URL % candidate, headers=HEADERS_JSON)
+ time.sleep(1)
+ check_v = json.loads(check.text)
+ try:
+ real_title = check_v['opinion']['title']
+ except KeyError:
+ real_title = check_v['version']['title']
+ # be caution because of broken unicode truncation
+ res_title = res_title[:-2]
+ if real_title.startswith(res_title):
+ return candidate
else:
- return 'opinions/%s' % opinion
+ # the link was probably to an argument and not to the opinion or version
+ # itself
+ return None
diff --git a/get_propositions.py b/get_propositions.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python3
+#!/usr/bin/python3 -u
# Get opinions and modifications of a user
from common import HEADERS, url2res
@@ -25,6 +25,8 @@ if __name__ == '__main__':
time.sleep(1)
tree = BeautifulSoup(data.text, 'html.parser')
+ seen = set()
+
# the following does not work on older bs4 versions
#for div in tree.find_all('div', class_='opinion__data'):
for div in tree.find_all('div'):
@@ -45,5 +47,12 @@ if __name__ == '__main__':
res_url = a.get('href')
break
- print ("%s 1" % url2res(res_url))
+ res = url2res(res_url)
+ if res in seen:
+ print("warning: duplicate entry for %s" % res,
+ file=sys.stderr)
+ print("this may indicate a problem with the scraping",
+ file=sys.stderr)
+ seen.add(res)
+ print ("%s 1" % res)
diff --git a/get_votes.py b/get_votes.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python3
+#!/usr/bin/python3 -u
# Get all votes of a user
from common import HEADERS, url2res
@@ -30,8 +30,11 @@ if __name__ == '__main__':
time.sleep(1)
votes_tree = BeautifulSoup(data.text, 'html.parser')
- # see get_propositions.py
+ seen = set()
+
+ # redo all votes in chronological order
for div in votes_tree.find_all('div'):
+ # see get_propositions.py for why the complicated mess below is used
try:
c = div.get("class")
except KeyError:
@@ -41,12 +44,14 @@ if __name__ == '__main__':
if c != 'opinion__data':
continue
res_url = None
+ res_title = None
for a in div.find_all('a'):
v = a.get('href')
if not v:
continue
if v.startswith('/consultations'):
res_url = a.get('href')
+ res_title = a.string
break
raw_v = None
for span in div.find_all('span'):
@@ -54,5 +59,14 @@ if __name__ == '__main__':
break
v = KEYS[raw_v[1].split('-')[1]]
- print ("%s %s" % (url2res(res_url), v))
+ res = url2res(res_url, res_title)
+
+ if res:
+ if res in seen:
+ print("warning: duplicate entry for %s" % res,
+ file=sys.stderr)
+ print("this may indicate a problem with the scraping",
+ file=sys.stderr)
+ seen.add(res)
+ print ("%s %s" % (res, v))
diff --git a/vote.py b/vote.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python3
+#!/usr/bin/python3 -u
# Automate tasks with www.republique-numerique.fr
# Only to facilitate your life, please use responsibly
@@ -7,13 +7,7 @@ import json
import requests
import sys
import time
-from common import HEADERS
-
-HEADERS_JSON = {
- 'Accept': "application/json"
- }
-HEADERS_JSON.update(HEADERS)
-
+from common import HEADERS, HEADERS_JSON
def login():
"""return a requests session and API token"""
@@ -59,7 +53,9 @@ def login():
try:
token = jdata['token']
except KeyError:
- print("Could not retrieve API token during login", file=sys.stderr)
+ print("Could not retrieve API token during login\n"
+ "Maybe the EMAIL/PASSWORD are invalid or the account doesn't exist?",
+ file=sys.stderr)
sys.exit(2)
return s, token
@@ -85,11 +81,11 @@ if __name__ == '__main__':
user = sys.argv[1]
password = sys.argv[2]
except IndexError:
- print(("Usage: %s EMAIL PASSWORD\n"
- "(your EMAIL and PASSWORD"
- "with a local republique-numerique.fr account)\n"
- "Performs votes given on stdin, see README") %
- sys.argv[0], file=sys.stderr)
+ print("""Usage: %s EMAIL PASSWORD
+Vote as indicated on stdin with republique-numerique.fr account EMAIL/PASSWORD
+See README for vote format and details
+You must register local account EMAIL/PASSWORD on republique-numerique.fr first"""
+ % sys.argv[0], file=sys.stderr)
sys.exit(1)
s, token = login()
@@ -100,4 +96,7 @@ if __name__ == '__main__':
if v != requests.codes.no_content:
print("Vote for %s failed with status code %d" % (f[0], v),
file=sys.stderr)
+ else:
+ print("Successfully voted %s for %s" % (f[1], f[0]),
+ file=sys.stderr)