From 7b19a9dcdd09d7d51a2c51ccc8e5b1d0c77cd4ce Mon Sep 17 00:00:00 2001 From: Olivier Mangin Date: Thu, 13 Jul 2017 21:16:26 -0400 Subject: [PATCH] Forces utf8 when no encoding in header for DOI request. The default behavior from the requests library is to use the guess from chardet which is not always reliable while doi.org seems to always return utf8 encoded data. It's unlikely that this changes without also updating the header. --- pubs/apis.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pubs/apis.py b/pubs/apis.py index 8c95126..0a0e449 100644 --- a/pubs/apis.py +++ b/pubs/apis.py @@ -3,15 +3,19 @@ import requests from bs4 import BeautifulSoup + def doi2bibtex(doi): """Return a bibtex string of metadata from a DOI""" url = 'http://dx.doi.org/{}'.format(doi) headers = {'accept': 'application/x-bibtex'} r = requests.get(url, headers=headers) + if r.encoding is None: + r.encoding = 'utf8' # Do not rely on guessing from request return r.text + def isbn2bibtex(isbn): """Return a bibtex string of metadata from a DOI"""