commit 19720ba8807a38c0466c10871a1227f283e87b3b
parent 55866f1215a2dbedc5ebf3ac97af3e27c4fb93e7
Author: Antoine Amarilli <a3nm@a3nm.net>
Date: Sun, 11 Oct 2015 02:31:25 +0200
throttling
Diffstat:
4 files changed, 14 insertions(+), 1 deletion(-)
diff --git a/common.py b/common.py
@@ -2,6 +2,7 @@
from bs4 import BeautifulSoup
import requests
+import time
HEADERS = { 'User-Agent': 'Mozilla' }
URL = 'https://www.republique-numerique.fr%s'
@@ -11,6 +12,7 @@ def url2res(relurl):
# this sucks but I don't know how else to do it
url = URL % relurl
data = requests.get(url, headers=HEADERS)
+ time.sleep(1)
tree = BeautifulSoup(data.text, 'html.parser')
divs = (tree.find_all('div', id='render-opinion')
+ tree.find_all('div', id='render-opinion-version'))
diff --git a/get_propositions.py b/get_propositions.py
@@ -5,6 +5,7 @@ from common import HEADERS, url2res
from bs4 import BeautifulSoup
import requests
import sys
+import time
PROPOSITIONS_URL = 'https://www.republique-numerique.fr/profile/%s/opinions'
VERSIONS_URL = 'https://www.republique-numerique.fr/profile/%s/versions'
@@ -21,6 +22,7 @@ if __name__ == '__main__':
for user in users:
for url in [PROPOSITIONS_URL % user, VERSIONS_URL % user]:
data = requests.get(url, headers=HEADERS)
+ time.sleep(1)
tree = BeautifulSoup(data.text, 'html.parser')
for div in tree.find_all('div', class_='opinion__data'):
diff --git a/get_votes.py b/get_votes.py
@@ -5,6 +5,7 @@ from common import HEADERS, url2res
from bs4 import BeautifulSoup
import requests
import sys
+import time
VOTE_URL = 'https://www.republique-numerique.fr/profile/%s/votes'
@@ -25,6 +26,7 @@ if __name__ == '__main__':
for user in users:
data = requests.get(VOTE_URL % user, headers=HEADERS)
+ time.sleep(1)
votes_tree = BeautifulSoup(data.text, 'html.parser')
for div in votes_tree.find_all('div', class_='opinion__data'):
diff --git a/vote.py b/vote.py
@@ -6,6 +6,7 @@ from bs4 import BeautifulSoup
import json
import requests
import sys
+import time
from common import HEADERS
HEADERS_JSON = {
@@ -13,6 +14,7 @@ HEADERS_JSON = {
}
HEADERS_JSON.update(HEADERS)
+
def login():
"""return a requests session and API token"""
@@ -23,6 +25,7 @@ def login():
s = requests.Session()
data = s.get(LOGIN, headers=HEADERS)
+ time.sleep(1)
login_tree = BeautifulSoup(data.text, 'html.parser')
csrf = None
@@ -48,8 +51,10 @@ def login():
}
response = s.post(LOGIN_ACTION, headers=HEADERS, data=data)
+ time.sleep(1)
response = s.get(API_TOKEN, headers=HEADERS_JSON)
+ time.sleep(1)
jdata = json.loads(response.text)
try:
token = jdata['token']
@@ -58,6 +63,7 @@ def login():
sys.exit(2)
return s, token
+
def vote(s, token, res, v):
"""vote for res with value v using session s and API token"""
@@ -70,8 +76,10 @@ def vote(s, token, res, v):
# deleting is HTTP method delete, not implemented here
r = s.put('https://www.republique-numerique.fr/api/%s/votes' % res,
headers=headers, data=data)
+ time.sleep(1)
return (r.status_code)
+
if __name__ == '__main__':
try:
user = sys.argv[1]
@@ -93,4 +101,3 @@ if __name__ == '__main__':
print("Vote for %s failed with status code %d" % (f[0], v),
file=sys.stderr)
-