mybin

my ~/bin
git clone https://a3nm.net/git/mybin/
Log | Files | Refs | README

wikicateg (1093B)


      1 #!/usr/bin/python3
      2 
      3 """Produce text list of article names in a wikipedia category"""
      4 
      5 # license: public domain
      6 
      7 import json
      8 import urllib.request,urllib.parse
      9 import sys
     10 import time
     11 from pprint import pprint
     12 
     13 if len(sys.argv) != 3:
     14   print("Usage: %s WIKI CATEGORY" % sys.argv[0], file=sys.stderr)
     15   print("WIKI should be like \"en.wikipedia.org\"", file=sys.stderr)
     16   sys.exit(1)
     17 
     18 progname, wiki, categ = sys.argv
     19 
     20 def retrieve(cont=''):
     21   args = {'action': 'query',
     22       'list': 'categorymembers',
     23       'cmtitle': 'Category:%s' % categ,
     24       'cmlimit': 500,
     25       'format': 'json'
     26       }
     27   if cont != '':
     28     args['cmcontinue'] = cont
     29   data = json.loads(urllib.request.urlopen(
     30         "https://"+wiki+"/w/api.php?",
     31         urllib.parse.urlencode(args).encode('utf8')
     32       ).read().decode('utf8'))
     33   if 'error' in data.keys():
     34     print("Error: %s" % (data['error']['info']), file=sys.stderr)
     35     sys.exit(1)
     36   for a in data['query']['categorymembers']:
     37     print(a['title'])
     38   if 'continue' in data.keys():
     39     time.sleep(2)
     40     retrieve(data['continue']['cmcontinue'])
     41 
     42 retrieve()
     43