commit d93688d9c735ca09c94e586e13e96f95ae0b3c72
parent 8907130fa23dd551ad8d64fe5e09b6e0cb3173a6
Author: Antoine Amarilli <a3nm@a3nm.net>
Date: Tue, 18 Jan 2022 18:12:27 +0100
adddoi
Diffstat:
adddoi | | | 96 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
1 file changed, 96 insertions(+), 0 deletions(-)
diff --git a/adddoi b/adddoi
@@ -0,0 +1,96 @@
+#!/usr/bin/env python
+# source : https://tex.stackexchange.com/a/300474 and https://tex.stackexchange.com/questions/6810/automatically-adding-doi-fields-to-a-hand-made-bibliography
+# users: XachaB, thando, JohnM
+import sys, re
+from unidecode import unidecode
+import bibtexparser
+from bibtexparser.bwriter import BibTexWriter
+import http.client as httplib
+import requests
+import urllib
+
+# Search for the DOI given a title; e.g. "computation in Noisy Radio Networks"
+# Credit to user13348, slight modifications
+# http://tex.stackexchange.com/questions/6810/automatically-adding-doi-fields-to-a-hand-made-bibliography
+def searchdoi(title, author):
+ params = urllib.parse.urlencode({"titlesearch":"titlesearch", "auth2" : author, "atitle2" : title, "multi_hit" : "on", "article_title_search" : "Search", "queryType" : "author-title"})
+ headers = {"User-Agent": "Mozilla/5.0" , "Accept": "text/html", "Content-Type" : "application/x-www-form-urlencoded", "Host" : "www.crossref.org"}
+ # conn = httplib.HTTPConnection("www.crossref.org:80") # Not working any more, HTTPS required
+ conn = httplib.HTTPSConnection("www.crossref.org")
+ conn.request("POST", "/guestquery/", params, headers)
+ response = conn.getresponse()
+ #print(response.status, response.reason)
+ data = response.read()
+ conn.close()
+ url = "https://www.crossref.org/guestquery/#bibsearch"
+
+ r = requests.post(url, headers=headers, data=params)
+
+ data = r.text
+
+ return re.search(r'doi\.org/([^"^<^>]+)', str(data))
+
+def normalize(string):
+ """Normalize strings to ascii, without latex."""
+ string = re.sub(r'[{}\\\'"^]',"", string)
+ string = re.sub(r"\$.*?\$","",string) # better remove all math expressions
+ return unidecode(string)
+
+def get_authors(entry):
+ """Get a list of authors' or editors' last names."""
+ def get_last_name(authors):
+ for author in authors :
+ author = author.strip(" ")
+ if "," in author:
+ yield author.split(",")[0]
+ elif " " in author:
+ yield author.split(" ")[-1]
+ else:
+ yield author
+
+ try:
+ authors = entry["author"]
+ except KeyError:
+ authors = entry["editor"]
+
+ authors = normalize(authors).split("and")
+ return list(get_last_name(authors))
+
+
+print("Reading Bibliography...")
+with open(sys.argv[1]) as bibtex_file:
+ bibliography = bibtexparser.load(bibtex_file)
+
+
+print("Looking for Dois...")
+before = 0
+new = 0
+total = len(bibliography.entries)
+for i,entry in enumerate(bibliography.entries):
+ print("\r{i}/{total} entries processed, please wait...".format(i=i,total=total),flush=True,end="")
+ try:
+ if "doi" not in entry or entry["doi"].isspace():
+ title = normalize(entry["title"]
+ authors = get_authors(entry)
+ for author in authors:
+ doi_match = searchdoi(title,author)
+ if doi_match:
+ doi = doi_match.groups()[0]
+ entry["doi"] = doi
+ new += 1
+ break
+ else:
+ before += 1
+ except:
+ pass
+print("")
+
+template="We added {new} DOIs !\nBefore: {before}/{total} entries had DOI\nNow: {after}/{total} entries have DOI"
+
+print(template.format(new=new,before=before,after=before+new,total=total))
+outfile = sys.argv[1]+"_doi.bib"
+print("Writing result to ",outfile)
+writer = BibTexWriter()
+writer.indent = ' ' # indent entries with 4 spaces instead of one
+with open(outfile, 'w') as bibfile:
+ bibfile.write(writer.write(bibliography))