publist

managing my list of publications, talks, reviews
git clone https://a3nm.net/git/publist/
Log | Files | Refs | README | LICENSE

parserec.py (9122B)


      1 #'!/usr/bin/python3
      2 
      3 import sys
      4 
      5 stz = {
      6         'submitted': {
      7             'en': "Under review",
      8             'fr': "Soumis au comité de lecture",
      9             },
     10         'draft': {
     11             'en': "Draft",
     12             'fr': "Version préliminaire",
     13         },
     14         'journalversion': {
     15             'en': "journal version",
     16             'fr': "version journal",
     17         },
     18         'conferenceversion': {
     19             'en': "conference version",
     20             'fr': "version conférence",
     21         },
     22         'journalversion_explain': {
     23             'en': "Extended journal publication:",
     24             'fr': "Version étendue correspondante :",
     25         },
     26         'conferenceversion_explain': {
     27             'en': "Extended version of the conference publication:",
     28             'fr': "Version étendue de la publication conférence :",
     29         },
     30         'slides': {
     31             'en': "slides",
     32             'fr': "exposé",
     33         },
     34         'slideslong': {
     35             'en': "longer slides",
     36             'fr': "exposé plus détaillé",
     37         },
     38         'conference': {
     39             'en': "conference",
     40             'fr': "une conférence",
     41         },
     42         'workshop': {
     43             'en': "workshop",
     44             'fr': "un workshop",
     45         },
     46         'journal': {
     47             'en': "journal",
     48             'fr': "une revue",
     49         },
     50         'published at non oa': {
     51             'en': "Published at a closed-access %s",
     52             'fr': "Publié dans %s non accessible en libre accès",
     53         },
     54         'oaexplain': {
     55             'en': "[why?]",
     56             'fr': "[explications (en anglais)]",
     57         },
     58         'poster': {
     59             'en': "poster",
     60             'fr': "poster",
     61         },
     62         'video': {
     63             'en': "video",
     64             'fr': "vidéo",
     65         },
     66         'videoin': {
     67             'en': "video in",
     68             'fr': "vidéo en",
     69         },
     70         'videoon': {
     71             'en': "video on",
     72             'fr': "vidéo sur",
     73         },
     74         'direct download': {
     75             'en': "direct download",
     76             'fr': "téléchargement direct",
     77         },
     78         'on': {
     79             'en': "on",
     80             'fr': "sur",
     81         },
     82         'by': {
     83             'en': "by",
     84             'fr': "par",
     85         },
     86         'oron': {
     87             'en': "or on",
     88             'fr': "ou sur",
     89         },
     90         'orin': {
     91             'en': "or in",
     92             'fr': "ou en",
     93         },
     94         'short': {
     95             'en': "lightning talk",
     96             'fr': "exposé bref",
     97         },
     98         'code': {
     99             'en': "code",
    100             'fr': "code",
    101         },
    102         'phddefense': {
    103             'en': "PhD defense",
    104             'fr': "soutenance de thèse",
    105         }, 
    106         'habilitationdefense': {
    107             'en': "Habilitation defense",
    108             'fr': "soutenance d'habilitation à diriger des recherches",
    109         }, 
    110         'phddefenserehearsal': {
    111             'en': "PhD defense rehearsal",
    112             'fr': "répétition de soutenance de thèse",
    113         },
    114         'habilitationthesis': {
    115             'en': "Habilitation thesis",
    116             'fr': "Manuscrit d'habilitation à diriger des recherches",
    117         },
    118         'and': {
    119             'en': "and",
    120             'fr': "et",
    121         },
    122         'demo': {
    123             'en': "Demo paper",
    124             'fr': "Démonstration",
    125         },
    126         'spotlight': {
    127             'en': "Spotlight presentation",
    128             'fr': "Exposé spotlight",
    129         },
    130         'shortpaper': {
    131             'en': "Short paper",
    132             'fr': "Article court",
    133         },
    134         'posterpaper': {
    135             'en': "Poster paper",
    136             'fr': "Article poster",
    137         },
    138       }
    139 
    140 stopwords = ["at", "au", "du", "at the", "of the", "for project", "du projet"]
    141 
    142 talk_types = ['poster', 'short', 'phddefenserehearsal']
    143 
    144 def authorname(author, sepnames=False):
    145     if 'name' in author.keys():
    146         return author['name']
    147     else:
    148         if sepnames:
    149             return author['lastname'] + ', ' + author['firstname']
    150         else:
    151             return author['firstname'] + ' ' + author['lastname']
    152 
    153 def isurlrel(url):
    154     # is URL relative?
    155     if url.startswith('http'):
    156         return False
    157     if url.startswith('/'):
    158         return False
    159     return True
    160 
    161 def absurl(url, site, local):
    162     if url.startswith('http'):
    163         return url
    164     if url.startswith('/'):
    165         return site + url
    166     return local + url
    167 
    168 def getyear(publi):
    169     if 'year' in publi.keys():
    170         return int(publi['year'])
    171     # guess a year
    172     pos = 0
    173     title = publi['id']
    174     while not title[pos].isdigit():
    175         pos += 1
    176     return int(title[pos:pos+4])
    177 
    178 def endswithpunct(publi):
    179     for a in [".", "?", "!"]:
    180         if (publi['title'].endswith(a)):
    181             return True
    182     return False
    183 
    184 def mkvenuename(venueo, venuesz, short=False, year=True):
    185     venue = venueo['id']
    186     venue_prevo = None
    187     last = venue[-4:]
    188     if last.isdigit():
    189         venue_prev = venue[:-4]
    190         if venue_prev in venuesz.keys():
    191             venue_prevo = venuesz[venue_prev]
    192     if 'name' in venueo.keys():
    193         return venueo['name']
    194     else:
    195         # make name from id
    196         pos = 0
    197         while not venue[pos].isdigit():
    198             pos += 1
    199         sep = ' '
    200         lpos = pos
    201         if short and pos < len(venue) + 1:
    202             # skip parts of year
    203             pos += 2
    204             sep = "'"
    205         prename = venue[:lpos].upper()
    206         if venue_prevo and 'name' in venue_prevo.keys():
    207             prename = venue_prevo['name']
    208         if year:
    209             return prename + sep + venue[pos:]
    210         else:
    211             return prename
    212 
    213 # return name, fullname, type, venue URL, issue: for publi in lang given venuesz
    214 def getvenue(publi, lang, venuesz, short=False):
    215     global stz
    216     global talk_types
    217     if 'venue' not in publi.keys():
    218         if 'status' in publi.keys():
    219             name = stz[publi['status']][lang]
    220             return (name, name, publi['status'], '', '', '', set())
    221         return ('', '', '', '', '', '', set()) # phdthesis or mscthesis or habilitationthesis
    222     venue = publi['venue']
    223     venueid = venue
    224     if short and 'venueshort' in publi.keys():
    225         venue = publi['venueshort']
    226     url = None
    227     typ = None
    228     keywords = set()
    229     oa = None
    230     fullname = None
    231     venue_no_year = venue
    232     found = False
    233     if venue in venuesz.keys():
    234         venueo = venuesz[venue]
    235         if 'oa' not in venueo.keys():
    236             print("missing OA info for %s" % venue, file=sys.stderr)
    237             assert(False)
    238         oa = venueo['oa']
    239         if 'audience' in venueo.keys():
    240             assert (venueo['audience'] in ['national', 'international'])
    241             keywords.add(venueo['audience'])
    242         if 'informal' in venueo.keys():
    243             assert (venueo['informal'] == 'yes')
    244             keywords.add("informal")
    245         else:
    246             keywords.add("formal")
    247         if 'type' in venueo.keys():
    248             assert (venueo['type'] in ['school', 'conference', 'workshop',
    249             'journal', 'book'])
    250             if venueo['type'] in ['conference', 'journal']:
    251                 typ = venueo['type']
    252             # the book I have isn't really a book
    253             keywords.add('TYPE' + (venueo['type'] if venueo['type'] != 'book'
    254                 else 'conference'))
    255         if 'url' in venueo.keys():
    256             url = venueo['url']
    257         if 'fullname' in venueo.keys():
    258             fullname = venueo['fullname']
    259         venue = mkvenuename(venueo, venuesz, short)
    260         venue_no_year = mkvenuename(venueo, venuesz, short, year=False)
    261         if fullname and venue:
    262             fullname += " (" + venue + ")"
    263     else:
    264         if 'venueurl' in publi.keys():
    265             url = publi['venueurl']
    266         # the venue is given directly (deprecated, used for talks and some special types)
    267         assert('type' not in publi.keys() or publi['type'] in ['patent', 'mscthesis', 'phdthesis', 'habilitationthesis', 'note'] + talk_types)
    268         oa = True
    269         lvenue = 'venue' + lang
    270         if lvenue in publi.keys():
    271             venue = publi[lvenue]
    272     if fullname == None:
    273         fullname = venue
    274         last = venueid[-4:]
    275         if last.isdigit():
    276             # conf2042 => conf
    277             venue_prev = venueid[:-4]
    278             if venue_prev in venuesz.keys():
    279                 if 'fullname' in venuesz[venue_prev].keys():
    280                     fullname = venuesz[venue_prev]['fullname'] + " (" + venue_no_year + ")"
    281 
    282     return (venue, fullname, typ, url, publi.get('issue', ''), oa in ['yes', 'n/a'], keywords)
    283 
    284 def parse(fname):
    285     with open(fname, 'r') as f:
    286         current = {}
    287         for l in f.readlines():
    288             if l.strip().startswith('#'):
    289                 continue
    290             if len(l.strip()) == 0:
    291                 if len(current.keys()) > 0:
    292                     yield current
    293                 current = {}
    294                 continue
    295             fields = l.strip().split(':')
    296             fname = fields[0].lower()
    297             fval = ':'.join(fields[1:])
    298             current[fname.strip().lower()] = fval.strip()
    299         if len(current.keys()) > 0:
    300             yield current
    301