wikifirc

filter irc.wikimedia.org on specific pages and users
git clone https://a3nm.net/git/wikifirc/
Log | Files | Refs | README

wikifirc (4346B)


      1 #!/usr/bin/python3 -u
      2 
      3 """Filter Wikipedia recent changes to specific users and pages"""
      4 
      5 import sys
      6 import time
      7 import urllib.parse
      8 import urllib.request
      9 
     10 # use an API here if needed
     11 API=None
     12 
     13 def shorten(url):
     14   if not API:
     15     return url
     16   return (urllib.request.urlopen(API %
     17     urllib.parse.quote(url)).read()
     18     ).decode('utf-8')
     19 
     20 # user and user talk namespaces for the various languages of interest
     21 # add your language here if it isn't there
     22 user_namespaces = [
     23     'User:', 'User talk:', # en
     24     'Utilisateur:', 'Discussion utilisateur:', # fr
     25     'Gebruiker:', 'Overleg gebruiker:', # nl
     26   ]
     27 
     28 special_namespaces = [
     29     'Special:', 'Spécial:'
     30   ]
     31 
     32 colors = {
     33     'green': 3,
     34     'red': 4,
     35     'olive': 7,
     36     'teal': 10,
     37     }
     38 
     39 def capitalize(s):
     40   if len(s) == 0:
     41     return ''
     42   return (s[0].upper() + s[1:])
     43 
     44 def colorize(text, color):
     45   """colorize for IRC, cf. http://irssi.org/documentation/formats"""
     46   return "\x03%02d%s\x0300" % (colors[color], text)
     47 
     48 class Change:
     49   def __init__(self, project, data):
     50     """parse a change from a channel and irc line"""
     51     self.project = project
     52     self.data = data
     53     self.time = time.time()
     54     page, sep, rest = data.partition(']]')
     55     self.page = page[8:-3]
     56     fields = rest.split(' ')
     57     fields.pop(0) # trailing characters of title
     58     self.flags = fields.pop(0)[:-2]
     59     self.diff = fields.pop(0)[3:-1]
     60     self.flags2 = fields.pop(0)[2:-1]
     61     username, sep, rest = ' '.join(fields).partition('*')
     62     self.username = username[3:-4]
     63     fields = rest.split(' ')
     64     fields.pop(0) # rest of flags2
     65     self.diffc = fields.pop(0)[1:-1]
     66     self.message = ' '.join(fields)[3:-1]
     67 
     68   def __str__(self):
     69     """format the change to an irc line"""
     70     return ("<%s> [[%s]] %s %s \"%s\" %s" % (
     71         colorize(self.username, 'green'),
     72         colorize(self.page, 'olive'),
     73         self.diffc,
     74         colorize(self.flags+self.flags2, 'red'),
     75         colorize(self.message, 'teal'),
     76         shorten(self.diff),
     77       ))
     78 
     79 def register(pages, page, fout):
     80   """add a page to a set of pages and output it if not already present"""
     81   if page in pages:
     82     return
     83   pages.add(page)
     84   if fout:
     85     print(page, file=fout)
     86     fout.flush()
     87 
     88 if __name__ == "__main__":
     89 
     90   pages = set()
     91 
     92   try:
     93     admin = sys.argv[1]
     94   except IndexError:
     95     print ("Usage: %s ADMIN [DUMP]" % sys.argv[0])
     96     sys.exit(1)
     97 
     98   dump = None
     99 
    100   try:
    101     dump = sys.argv[2]
    102   except IndexError:
    103     pass
    104 
    105   # load pages
    106   if dump:
    107     try:
    108       f = open(dump, 'r')
    109       while True:
    110         line = f.readline()
    111         if not line:
    112           break
    113         register(pages, capitalize(line.rstrip()), None)
    114       f.close()
    115     except FileNotFoundError:
    116       pass
    117 
    118   # now, prepare to save pages
    119   fout = None
    120   if dump:
    121     fout = open(dump, 'w')
    122     # rewrite current pages
    123     for page in pages:
    124       print(page, file=fout)
    125     fout.flush()
    126 
    127   while True:
    128     data = sys.stdin.readline()
    129     if not data:
    130       break
    131     fields = data.strip().split()
    132     project = fields.pop(0)[1:-1]
    133     if not project.startswith('#'):
    134       user = fields.pop(0)[1:-1]
    135       if user != admin:
    136         continue
    137       command = fields[0]
    138       if command == 'exit':
    139         break
    140       value = capitalize(' '.join(fields[1:]))
    141       if command == "user":
    142         # register it as a user
    143         print("== I started to follow user %s" % value)
    144         for namespace in user_namespaces:
    145           register(pages, namespace + value, fout)
    146       elif command == "page":
    147         # register it as a page
    148         print("== I started to follow page %s" % value)
    149         register(pages, value, fout)
    150       else:
    151         # bad command, fail noisily
    152         raise ValueError
    153       continue
    154     fields.pop(0) # bot username
    155     data = ' '.join(fields)
    156     line = Change(project, data)
    157     # a user is followed if its user page is followed
    158     if user_namespaces[0] + capitalize(line.username) in pages:
    159       new_page = capitalize(line.page)
    160       # do not follow special pages
    161       special = False
    162       for special_namespace in special_namespaces:
    163         if line.page.startswith(special_namespace):
    164           special = True
    165       if not special and not new_page in pages:
    166         print("== I started to follow page %s" % new_page)
    167         register(pages, new_page, fout)
    168     if line.page in pages:
    169       print(line)
    170 
    171   fout.close()
    172