Merge pull request #155 from pubs/jma/arxiv

Handle arXiv errors and other improvements.

- handles old/new style arxiv references
- normalize arXiv citekeys
- improves error handling of doi/isbn (raise Exception if an HTTP status is detected).
- fix the isbn parse test that was incorrectly using doi2bibtex.
- add feedparser to the requirements.txt file.
- allows to run the tests using pytest.
main
Olivier Mangin 7 years ago committed by GitHub
commit 81aa1cddf4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -16,6 +16,11 @@ matrix:
- os: linux - os: linux
language: python language: python
python: 3.6 python: 3.6
- os: linux
language: python
python: 3.7
dist: xenial
sudo: true
- os: osx - os: osx
language: generic language: generic
python: 2.7 python: 2.7
@ -36,8 +41,9 @@ matrix:
# command to install dependencies # command to install dependencies
install: install:
- python --version - python --version
- pip install -r tests/requirements.txt - export PUBS_TESTS_MODE=ONLINE
- python setup.py install
# command to run tests # command to run tests
script: python -m unittest discover script:
- PUBS_TESTS_MODE=MOCK python setup.py test
- PUBS_TESTS_MODE=COLLECT python setup.py test

@ -8,6 +8,10 @@
### Implemented enhancements ### Implemented enhancements
- Support for downloading arXiv reference from their ID ([#146](https://github.com/pubs/pubs/issues/146) by [joe-antognini](https://github.com/joe-antognini))
- Better feedback when an error is encountered while adding a reference from a DOI, ISBN or arXiv ID [#155](https://github.com/pubs/pubs/issues/155)
- Better dialog after editing paper [(#142)](https://github.com/pubs/pubs/issues/142) - Better dialog after editing paper [(#142)](https://github.com/pubs/pubs/issues/142)
- Add a command to open urls ([#139](https://github.com/pubs/pubs/issues/139) by [ksunden](https://github.com/ksunden)) - Add a command to open urls ([#139](https://github.com/pubs/pubs/issues/139) by [ksunden](https://github.com/ksunden))
@ -26,6 +30,7 @@
- Support year ranges in query [(#102)](https://github.com/pubs/pubs/issues/102) - Support year ranges in query [(#102)](https://github.com/pubs/pubs/issues/102)
- Tests can now be run with `python setup.py test` [#155](https://github.com/pubs/pubs/issues/155)
### Fixed bugs ### Fixed bugs

@ -0,0 +1,24 @@
# if you want to setup your environment for development of the pytest code,
# doing `pip install -r dev_requirements.txt` is the single thing you have to do.
# Alternatively, and perhaps more conveniently, running `python setup.py test`
# will do the same *and* run the tests, but without installing the packages on
# the system.
# Note that if you introduce a new dependency, you need to add it here and, more
# importantly, to the setup.py script so that it is taken into account when
# installing from PyPi.
-e .
pyyaml
bibtexparser>=1.0
python-dateutil
requests
configobj
beautifulsoup4
feedparser
six
# those are the additional packages required to run the tests
pyfakefs
ddt
mock
pytest # optional (python setup.py test works without it), but possible nonetheless

@ -1,27 +1,198 @@
"""Interface for Remote Bibliographic APIs""" """Interface for Remote Bibliographic APIs"""
import re
import datetime
import requests import requests
import bibtexparser
from bibtexparser.bibdatabase import BibDatabase
import feedparser
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
def doi2bibtex(doi): class ReferenceNotFoundError(Exception):
pass
def get_bibentry_from_api(id_str, id_type, try_doi=True, ui=None):
"""Return a bibtex string from various ID methods.
This is a wrapper around functions that will return a bibtex string given
one of:
* DOI
* IBSN
* arXiv ID
Args:
id_str: A string with the ID.
id_type: Name of the ID type. Must be one of `doi`, `isbn`, or `arxiv`.
rp: A `Repository` object.
ui: A UI object.
Returns:
A bibtex string.
Raises:
ValueError: if `id_type` is not one of `doi`, `isbn`, or `arxiv`.
apis.ReferenceNotFoundException: if no valid reference could be found.
"""
id_fns = {
'doi': doi2bibtex,
'isbn': isbn2bibtex,
'arxiv': arxiv2bibtex,
}
if id_type not in id_fns.keys():
raise ValueError('id_type must be one of `doi`, `isbn`, or `arxiv`.')
bibentry_raw = id_fns[id_type](id_str, try_doi=try_doi, ui=ui)
endecoder.EnDecoder().decode_bibdata(bibentry_raw)
if bibentry is None:
raise ReferenceNotFoundException(
'invalid {} {} or unable to retrieve bibfile from it.'.format(id_type, id_str))
return bibentry
def _get_request(url, headers=None):
"""GET requests to a url. Return the `requests` object.
:raise ConnectionError: if anything goes bad (connection refused, timeout
http status error (401, 404, etc)).
"""
try:
r = requests.get(url, headers=headers)
r.raise_for_status()
return r
except requests.exceptions.RequestException as e:
raise ReferenceNotFoundError(e.args)
## DOI support
def doi2bibtex(doi, **kwargs):
"""Return a bibtex string of metadata from a DOI""" """Return a bibtex string of metadata from a DOI"""
url = 'http://dx.doi.org/{}'.format(doi) url = 'https://dx.doi.org/{}'.format(doi)
headers = {'accept': 'application/x-bibtex'} headers = {'accept': 'application/x-bibtex'}
r = requests.get(url, headers=headers) r = _get_request(url, headers=headers)
if r.encoding is None: if r.encoding is None:
r.encoding = 'utf8' # Do not rely on guessing from request r.encoding = 'utf8' # Do not rely on guessing from request
return r.text return r.text
def isbn2bibtex(isbn): ## ISBN support
def isbn2bibtex(isbn, **kwargs):
"""Return a bibtex string of metadata from an ISBN""" """Return a bibtex string of metadata from an ISBN"""
url = 'http://www.ottobib.com/isbn/{}/bibtex'.format(isbn) url = 'https://www.ottobib.com/isbn/{}/bibtex'.format(isbn)
r = requests.get(url) r = _get_request(url)
soup = BeautifulSoup(r.text, "html.parser") soup = BeautifulSoup(r.text, "html.parser")
citation = soup.find("textarea").text citation = soup.find("textarea").text
return citation return citation
# Note: apparently ottobib.com uses caracter modifiers for accents instead
# of the correct unicode characters. TODO: Should we convert them?
## arXiv support
_months = ['jan', 'feb', 'mar', 'apr', 'may', 'jun',
'jul', 'aug', 'sep', 'oct', 'nov', 'dec']
def _is_arxiv_oldstyle(arxiv_id):
return re.match(r"(arXiv\:)?[a-z\-]+\/[0-9]+(v[0-9]+)?", arxiv_id) is not None
def _extract_arxiv_id(entry):
pattern = r"http[s]?://arxiv.org/abs/(?P<entry_id>.+)"
return re.search(pattern, entry['id']).groupdict()['entry_id']
def arxiv2bibtex(arxiv_id, try_doi=True, ui=None):
"""Return a bibtex string of metadata from an arXiv ID
:param arxiv_id: arXiv id, with or without the `arXiv:` prefix and version
suffix (e.g. `v1`). Old an new style are accepted. Here are
example of accepted identifiers: `1510.00322`,
`arXiv:1510.00322`, `0901.0512`, `arXiv:0901.0512`,
`hep-ph/9409201` or `arXiv:hep-ph/9409201`.
Note that the `arXiv:` prefix will be automatically
removed, and the version suffix automatically added if
missing.
:param try_doi: if a DOI is referenced in the arXiv metadata,
try to download it instead. If that fails for any reason,
falls back to the arXiv, with a warning message, if the
UI is provided.
:param ui: if not None, will display a warning if the doi request
fails.
"""
## handle errors
url = 'https://export.arxiv.org/api/query?id_list={}'.format(arxiv_id)
try:
r = requests.get(url)
if r.status_code == 400: # bad request
msg = ("the arXiv server returned a bad request error. The "
"arXiv id {} is possibly invalid or malformed.".format(arxiv_id))
raise ReferenceNotFoundError(msg)
r.raise_for_status() # raise an exception for HTTP errors:
# 401, 404, 400 if `ui` is None, etc.
except requests.exceptions.RequestException as e:
msg = ("connection error while retrieving arXiv data for "
"'{}': {}".format(arxiv_id, e))
raise ReferenceNotFoundError(msg)
feed = feedparser.parse(r.text)
if len(feed.entries) == 0: # no results.
msg = "no results for arXiv id {}".format(arxiv_id)
raise ReferenceNotFoundError(msg)
if len(feed.entries) > 1: # I don't know how that could happen, but let's
# be ready for it.
results = '\n'.join('{}. {}'.format(i, entry['title'])
for entry in feed.entries)
msg = ("multiple results for arXiv id {}:\n{}\nThis is unexpected. "
"Please submit an issue at "
"https://github.com/pubs/pubs/issues").format(arxiv_id, choices)
raise ReferenceNotFoundError(msg)
entry = feed.entries[0]
## try to return a doi instead of the arXiv reference
if try_doi and 'arxiv_doi' in entry:
try:
return doi2bibtex(entry['arxiv_doi'])
except ReferenceNotFoundError as e:
if ui is not None:
ui.warning(str(e))
## create a bibentry from the arXiv response.
db = BibDatabase()
entry_id = _extract_arxiv_id(entry)
author_str = ' and '.join(
[author['name'] for author in entry['authors']])
db.entries = [{
'ENTRYTYPE': 'article',
'ID': entry_id,
'author': author_str,
'title': entry['title'],
'year': str(entry['published_parsed'].tm_year),
'month': _months[entry['published_parsed'].tm_mon-1],
'eprint': entry_id,
'eprinttype': 'arxiv',
'date': entry['published'], # not really standard, but a resolution more
# granular than months is increasinlgy relevant.
'url': entry['link'],
'urldate': datetime.datetime.utcnow().isoformat() + 'Z' # can't hurt.
}]
# we don't add eprintclass for old-style ids, as it is in the id already.
if not _is_arxiv_oldstyle(entry_id):
db.entries[0]['eprintclass'] = entry['arxiv_primary_category']['term']
if 'arxiv_doi' in entry:
db.entries[0]['arxiv_doi'] = entry['arxiv_doi']
bibtex = bibtexparser.dumps(db)
return bibtex

@ -26,8 +26,10 @@ def parser(subparsers, conf):
parser = subparsers.add_parser('add', help='add a paper to the repository') parser = subparsers.add_parser('add', help='add a paper to the repository')
parser.add_argument('bibfile', nargs='?', default=None, parser.add_argument('bibfile', nargs='?', default=None,
help='bibtex file') help='bibtex file')
parser.add_argument('-D', '--doi', help='doi number to retrieve the bibtex entry, if it is not provided', default=None, action=ValidateDOI) id_arg = parser.add_mutually_exclusive_group()
parser.add_argument('-I', '--isbn', help='isbn number to retrieve the bibtex entry, if it is not provided', default=None) id_arg.add_argument('-D', '--doi', help='doi number to retrieve the bibtex entry, if it is not provided', default=None, action=ValidateDOI)
id_arg.add_argument('-I', '--isbn', help='isbn number to retrieve the bibtex entry, if it is not provided', default=None)
id_arg.add_argument('-X', '--arxiv', help='arXiv ID to retrieve the bibtex entry, if it is not provided', default=None)
parser.add_argument('-d', '--docfile', help='pdf or ps file', default=None) parser.add_argument('-d', '--docfile', help='pdf or ps file', default=None)
parser.add_argument('-t', '--tags', help='tags associated to the paper, separated by commas', parser.add_argument('-t', '--tags', help='tags associated to the paper, separated by commas',
default=None default=None
@ -41,22 +43,25 @@ def parser(subparsers, conf):
return parser return parser
def bibentry_from_editor(conf, ui, rp): def bibentry_from_editor(conf, ui):
again = True again = True
bibstr = templates.add_bib bibentry_raw = templates.add_bib
decoder = endecoder.EnDecoder()
while again: while again:
try: try:
bibstr = ui.editor_input(initial=bibstr, suffix='.bib') bibentry_raw = ui.editor_input(initial=bibentry_raw, suffix='.bib')
if bibstr == templates.add_bib: if bibentry_raw == templates.add_bib:
again = ui.input_yn( again = ui.input_yn(
question='Bibfile not edited. Edit again ?', question='Bibfile not edited. Edit again ?',
default='y') default='y')
if not again: if not again:
ui.exit(0) ui.exit(0)
else: else:
bibentry = rp.databroker.verify(bibstr) bibentry = decoder.decode_bibdata(bibentry_raw)
bibstruct.verify_bibdata(bibentry) bibstruct.verify_bibdata(bibentry)
# REFACTOR Generate citykey # REFACTOR Generate citekey
again = False again = False
except endecoder.EnDecoder.BibDecodingError: except endecoder.EnDecoder.BibDecodingError:
@ -82,30 +87,29 @@ def command(conf, args):
citekey = args.citekey citekey = args.citekey
rp = repo.Repository(conf) rp = repo.Repository(conf)
decoder = endecoder.EnDecoder()
# get bibtex entry # get bibtex entry
if bibfile is None: if bibfile is None:
if args.doi is None and args.isbn is None: if args.doi is None and args.isbn is None and args.arxiv is None:
bibentry = bibentry_from_editor(conf, ui, rp) bibentry = bibentry_from_editor(conf, ui)
else: else:
if args.doi is not None: bibentry = None
bibentry_raw = apis.doi2bibtex(args.doi) try:
bibentry = rp.databroker.verify(bibentry_raw) if args.doi is not None:
if bibentry is None: bibentry = apis.get_bibentry_from_api(args.doi, 'doi', ui=ui)
ui.error('invalid doi {} or unable to retrieve bibfile from it.'.format(args.doi)) elif args.isbn is not None:
if args.isbn is None: bibentry = apis.get_bibentry_from_api(args.isbn, 'isbn', ui=ui)
ui.exit(1) # TODO distinguish between cases, offer to open the error page in a webbrowser.
if args.isbn is not None: # TODO offer to confirm/change citekey
bibentry_raw = apis.isbn2bibtex(args.isbn) elif args.arxiv is not None:
bibentry = rp.databroker.verify(bibentry_raw) bibentry = apis.get_bibentry_from_api(args.arxiv, 'arxiv', ui=ui)
if bibentry is None: except apis.ReferenceNotFoundException as e:
ui.error('invalid isbn {} or unable to retrieve bibfile from it.'.format(args.isbn)) ui.error(e.message)
ui.exit(1) ui.exit(1)
# TODO distinguish between cases, offer to open the error page in a webbrowser.
# TODO offer to confirm/change citekey
else: else:
bibentry_raw = content.get_content(bibfile, ui=ui) bibentry_raw = content.get_content(bibfile, ui=ui)
bibentry = rp.databroker.verify(bibentry_raw) bibentry = decoder.decode_bibdata(bibentry_raw)
if bibentry is None: if bibentry is None:
ui.error('invalid bibfile {}.'.format(bibfile)) ui.error('invalid bibfile {}.'.format(bibfile))

@ -79,16 +79,6 @@ class DataBroker(object):
def listing(self, filestats=True): def listing(self, filestats=True):
return self.filebroker.listing(filestats=filestats) return self.filebroker.listing(filestats=filestats)
def verify(self, bibdata_raw):
"""Will return None if bibdata_raw can't be decoded"""
try:
if bibdata_raw.startswith('\ufeff'):
# remove BOM, because bibtexparser does not support it.
bibdata_raw = bibdata_raw[1:]
return self.endecoder.decode_bibdata(bibdata_raw)
except ValueError as e:
return None
# docbroker # docbroker
def in_docsdir(self, docpath): def in_docsdir(self, docpath):

@ -163,9 +163,6 @@ class DataCache(object):
def listing(self, filestats=True): def listing(self, filestats=True):
return self.databroker.listing(filestats=filestats) return self.databroker.listing(filestats=filestats)
def verify(self, bibdata_raw):
return self.databroker.verify(bibdata_raw)
# docbroker # docbroker
def in_docsdir(self, docpath): def in_docsdir(self, docpath):

@ -1,9 +1,16 @@
from __future__ import absolute_import, unicode_literals from __future__ import absolute_import, unicode_literals
import copy import copy
import logging
# both needed to intercept exceptions.
import pyparsing
import bibtexparser
try: try:
import bibtexparser as bp import bibtexparser as bp
# don't let bibtexparser display stuff
bp.bparser.logger.setLevel(level=logging.CRITICAL)
except ImportError: except ImportError:
print("error: you need to install bibterxparser; try running 'pip install " print("error: you need to install bibterxparser; try running 'pip install "
"bibtexparser'.") "bibtexparser'.")
@ -68,14 +75,14 @@ class EnDecoder(object):
class BibDecodingError(Exception): class BibDecodingError(Exception):
message = "Could not parse provided bibdata:\n---\n{}\n---" def __init__(self, error_msg, bibdata):
"""
def __init__(self, bibdata): :param error_msg: specific message about what went wrong
:param bibdata: the data that was unsuccessfully decoded.
"""
super(Exception, self).__init__(error_msg) # make `str(self)` work.
self.data = bibdata self.data = bibdata
def __str__(self):
return self.message.format(self.data)
bwriter = bp.bwriter.BibTexWriter() bwriter = bp.bwriter.BibTexWriter()
bwriter.display_order = BIBFIELD_ORDER bwriter.display_order = BIBFIELD_ORDER
@ -117,10 +124,12 @@ class EnDecoder(object):
If the decoding fails, returns a BibParseError. If the decoding fails, returns a BibParseError.
""" """
if len(bibdata) == 0:
error_msg = 'parsing error: the provided string has length zero.'
raise self.BibDecodingError(error_msg, bibdata)
try: try:
entries = bp.bparser.BibTexParser( entries = bp.bparser.BibTexParser(
bibdata, common_strings=True, bibdata, common_strings=True, customization=customizations,
customization=customizations,
homogenize_fields=True).get_entry_dict() homogenize_fields=True).get_entry_dict()
# Remove id from bibtexparser attribute which is stored as citekey # Remove id from bibtexparser attribute which is stored as citekey
@ -131,8 +140,18 @@ class EnDecoder(object):
entries[e][TYPE_KEY] = t entries[e][TYPE_KEY] = t
if len(entries) > 0: if len(entries) > 0:
return entries return entries
except Exception: except (pyparsing.ParseException, pyparsing.ParseSyntaxException) as e:
import traceback error_msg = self._format_parsing_error(e)
traceback.print_exc() raise self.BibDecodingError(error_msg, bibdata)
raise self.BibDecodingError(bibdata) except bibtexparser.bibdatabase.UndefinedString as e:
# TODO: filter exceptions from pyparsing and pass reason upstream error_msg = 'parsing error: undefined string in provided data: {}'.format(e)
raise self.BibDecodingError(error_msg, bibdata)
@classmethod
def _format_parsing_error(cls, e):
"""Transform a pyparsing exception into an error message
Does a best effort to be useful, but might need to be improved.
"""
return '{}\n{}^\n{}'.format(e.line, (e.column - 1) * ' ', e)

@ -56,6 +56,10 @@ or an ISBN (dashes are ignored):
pubs add -I 978-0822324669 -d article.pdf pubs add -I 978-0822324669 -d article.pdf
or an arXiv id (automatically downloading arXiv article is in the works):
pubs add -X math/9501234 -d article.pdf
## References always up-to-date ## References always up-to-date

@ -1,6 +0,0 @@
pyyaml
bibtexparser>=1.0
python-dateutil
requests
configobj
beautifulsoup4

@ -1,10 +1,16 @@
#!/usr/bin/env python #!/usr/bin/env python
import unittest
from setuptools import setup from setuptools import setup
with open('pubs/version.py') as f: with open('pubs/version.py') as f:
exec(f.read()) # defines __version__ exec(f.read()) # defines __version__
def pubs_test_suite():
test_loader = unittest.TestLoader()
test_suite = test_loader.discover('tests', pattern='test_*.py')
return test_suite
setup( setup(
name='pubs', name='pubs',
version=__version__, version=__version__,
@ -26,9 +32,8 @@ setup(
], ],
}, },
install_requires=['pyyaml', 'bibtexparser>=1.0', 'python-dateutil', install_requires=['pyyaml', 'bibtexparser>=1.0', 'python-dateutil', 'six',
'requests', 'configobj', 'beautifulsoup4'], 'requests', 'configobj', 'beautifulsoup4', 'feedparser'],
tests_require=['pyfakefs>=2.7', 'mock'],
extras_require={'autocompletion': ['argcomplete'], extras_require={'autocompletion': ['argcomplete'],
}, },
@ -41,6 +46,9 @@ setup(
'Intended Audience :: Science/Research', 'Intended Audience :: Science/Research',
], ],
test_suite= 'tests',
tests_require=['pyfakefs>=3.4', 'mock', 'ddt'],
# in order to avoid 'zipimport.ZipImportError: bad local file header' # in order to avoid 'zipimport.ZipImportError: bad local file header'
zip_safe=False, zip_safe=False,

@ -0,0 +1,106 @@
"""
Mock the `requests.get` function, and handle collecting data to do so.
Three modes are available, and controlled via the `PUBS_TESTS_MODE` environment
variable. To modify the variable, under linux or macos, do one of:
$ export PUBS_TESTS_MODE=MOCK
$ export PUBS_TESTS_MODE=COLLECT
$ export PUBS_TESTS_MODE=ONLINE
The MOCK mode is the default one, active even if PUBS_TESTS_MODE has not been
set. It uses saved data to run pubs units tests relying on the `requests.get`
function without the need of an internet connection (it is also much faster).
The prefected data is save in the `test_apis_data.pickle` file.
The COLLECT mode does real GET requests, and updates the `test_apis_data.pickle`
file. It is needed if you add or modify the test relying on `requests.get`.
The ONLINE mode bypasses all this and use the original `requests.get` without
accessing or updating the `test_apis_data.pickle` data. It might be useful when
running tests on Travis for instance.
"""
import os
import json
import mock
import requests
_orgininal_requests_get = requests.get
_collected_responses = []
_data_filepath = os.path.join(os.path.dirname(__file__), 'test_apis_data.json')
class MockingResponse:
def __init__(self, text, status_code=200, error_msg=None):
self.text = text
self.status_code = status_code
self.error_msg = error_msg
self.encoding = 'utf8'
def raise_for_status(self):
if self.status_code != 200:
raise requests.exceptions.RequestException(self.error_msg)
def intercept_text(text):
try:
if '10.1103/PhysRevD.89.084044' in text:
# replace with wrong DOI
text = text.replace('PhysRevD', 'INVALIDDOI')
except TypeError:
if b'10.1103/PhysRevD.89.084044' in text:
# replace with wrong DOI
text = text.replace(b'PhysRevD', b'INVALIDDOI')
return text
mode = os.environ.get('PUBS_TESTS_MODE', 'MOCK')
if mode == 'MOCK':
with open(os.path.join(_data_filepath), 'r') as fd:
_collected_responses = json.load(fd)
def mock_requests_get(*args, **kwargs):
for args2, kwargs2, text, status_code, error_msg in _collected_responses:
if list(args) == list(args2) and kwargs == kwargs2:
return MockingResponse(text, status_code, error_msg)
raise KeyError(('No stub data found for requests.get({}, {}).\n You may'
' need to update the mock data. Look at the '
'tests/mock_requests.py file for an explanation').format(args, kwargs))
elif mode == 'COLLECT':
def mock_requests_get(*args, **kwargs):
text, status_code, error_msg = None, None, None
try:
r = _orgininal_requests_get(*args, **kwargs)
text, status_code = r.text, r.status_code
r.raise_for_status()
except requests.exceptions.RequestException as e:
error_msg = str(e)
text = intercept_text(text)
_collected_responses.append((args, kwargs, text, status_code, error_msg))
_save_collected_responses() # yes, we save everytime, because it's not
# clear how to run once after all the tests
# have run. If you figure it out...
return MockingResponse(text, status_code, error_msg)
def _save_collected_responses():
with open(os.path.join(_data_filepath), 'w') as fd:
json.dump(sorted(_collected_responses), fd, indent=2)
elif mode == 'ONLINE':
def mock_requests_get(*args, **kwargs):
# with mock.patch('requests.Response.text', new_callable=mock.PropertyMock) as mock_text:
r = _orgininal_requests_get(*args, **kwargs)
r._content = intercept_text(r.content)
# print(r.content.__class__)
# mock_text.return_value = intercept_text(r.text)
return r

@ -1,5 +0,0 @@
# those are the additional packages required to run the tests
six
pyfakefs
ddt
mock

@ -3,24 +3,36 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import unittest import unittest
import mock
import dotdot import dotdot
from pubs.p3 import ustr from pubs.p3 import ustr
from pubs.endecoder import EnDecoder from pubs.endecoder import EnDecoder
from pubs.apis import doi2bibtex, isbn2bibtex from pubs.apis import ReferenceNotFoundError, arxiv2bibtex, doi2bibtex, isbn2bibtex, _is_arxiv_oldstyle, _extract_arxiv_id
from pubs import apis
import mock_requests
class TestDOI2Bibtex(unittest.TestCase): class APITests(unittest.TestCase):
def setUp(self): def setUp(self):
self.endecoder = EnDecoder() self.endecoder = EnDecoder()
def test_unicode(self):
class TestDOI2Bibtex(APITests):
@mock.patch('pubs.apis.requests.get', side_effect=mock_requests.mock_requests_get)
def test_unicode(self, reqget):
bib = doi2bibtex('10.1007/BF01700692') bib = doi2bibtex('10.1007/BF01700692')
self.assertIsInstance(bib, ustr) self.assertIsInstance(bib, ustr)
self.assertIn('Kurt Gödel', bib) self.assertIn('Kurt Gödel', bib)
def test_parses_to_bibtex(self): @mock.patch('pubs.apis.requests.get', side_effect=mock_requests.mock_requests_get)
def test_parses_to_bibtex(self, reqget):
bib = doi2bibtex('10.1007/BF01700692') bib = doi2bibtex('10.1007/BF01700692')
b = self.endecoder.decode_bibdata(bib) b = self.endecoder.decode_bibdata(bib)
self.assertEqual(len(b), 1) self.assertEqual(len(b), 1)
@ -30,23 +42,22 @@ class TestDOI2Bibtex(unittest.TestCase):
'Über formal unentscheidbare Sätze der Principia ' 'Über formal unentscheidbare Sätze der Principia '
'Mathematica und verwandter Systeme I') 'Mathematica und verwandter Systeme I')
def test_parse_fails_on_incorrect_DOI(self): @mock.patch('pubs.apis.requests.get', side_effect=mock_requests.mock_requests_get)
bib = doi2bibtex('999999') def test_retrieve_fails_on_incorrect_DOI(self, reqget):
with self.assertRaises(EnDecoder.BibDecodingError): with self.assertRaises(apis.ReferenceNotFoundError):
self.endecoder.decode_bibdata(bib) doi2bibtex('999999')
class TestISBN2Bibtex(unittest.TestCase):
def setUp(self): class TestISBN2Bibtex(APITests):
self.endecoder = EnDecoder()
def test_unicode(self): @mock.patch('pubs.apis.requests.get', side_effect=mock_requests.mock_requests_get)
def test_unicode(self, reqget):
bib = isbn2bibtex('9782081336742') bib = isbn2bibtex('9782081336742')
self.assertIsInstance(bib, ustr) self.assertIsInstance(bib, ustr)
self.assertIn('Poincaré, Henri', bib) self.assertIn('Poincaré, Henri', bib)
def test_parses_to_bibtex(self): @mock.patch('pubs.apis.requests.get', side_effect=mock_requests.mock_requests_get)
def test_parses_to_bibtex(self, reqget):
bib = isbn2bibtex('9782081336742') bib = isbn2bibtex('9782081336742')
b = self.endecoder.decode_bibdata(bib) b = self.endecoder.decode_bibdata(bib)
self.assertEqual(len(b), 1) self.assertEqual(len(b), 1)
@ -54,11 +65,97 @@ class TestISBN2Bibtex(unittest.TestCase):
self.assertEqual(entry['author'][0], 'Poincaré, Henri') self.assertEqual(entry['author'][0], 'Poincaré, Henri')
self.assertEqual(entry['title'], 'La science et l\'hypothèse') self.assertEqual(entry['title'], 'La science et l\'hypothèse')
def test_parse_fails_on_incorrect_ISBN(self): @mock.patch('pubs.apis.requests.get', side_effect=mock_requests.mock_requests_get)
bib = doi2bibtex('9' * 13) def test_retrieve_fails_on_incorrect_ISBN(self, reqget):
bib = isbn2bibtex('9' * 13)
with self.assertRaises(EnDecoder.BibDecodingError): with self.assertRaises(EnDecoder.BibDecodingError):
self.endecoder.decode_bibdata(bib) self.endecoder.decode_bibdata(bib)
# Note: apparently ottobib.com uses caracter modifiers for accents instead class TestArxiv2Bibtex(APITests):
# of the correct unicode characters. TODO: Should we convert them?
@mock.patch('pubs.apis.requests.get', side_effect=mock_requests.mock_requests_get)
def test_new_style(self, reqget):
bib = arxiv2bibtex('astro-ph/9812133')
b = self.endecoder.decode_bibdata(bib)
self.assertEqual(len(b), 1)
entry = b[list(b)[0]]
self.assertEqual(entry['author'][0], 'Perlmutter, S.')
self.assertEqual(entry['year'], '1999')
@mock.patch('pubs.apis.requests.get', side_effect=mock_requests.mock_requests_get)
def test_parses_to_bibtex_with_doi(self, reqget):
bib = arxiv2bibtex('astro-ph/9812133')
b = self.endecoder.decode_bibdata(bib)
self.assertEqual(len(b), 1)
entry = b[list(b)[0]]
self.assertEqual(entry['author'][0], 'Perlmutter, S.')
self.assertEqual(entry['year'], '1999')
@mock.patch('pubs.apis.requests.get', side_effect=mock_requests.mock_requests_get)
def test_parses_to_bibtex_without_doi(self, reqget):
bib = arxiv2bibtex('math/0211159')
b = self.endecoder.decode_bibdata(bib)
self.assertEqual(len(b), 1)
entry = b[list(b)[0]]
self.assertEqual(entry['author'][0], 'Perelman, Grisha')
self.assertEqual(entry['year'], '2002')
self.assertEqual(
entry['title'],
'The entropy formula for the Ricci flow and its geometric applications')
@mock.patch('pubs.apis.requests.get', side_effect=mock_requests.mock_requests_get)
def test_arxiv_wrong_id(self, reqget):
with self.assertRaises(ReferenceNotFoundError):
bib = arxiv2bibtex('INVALIDID')
@mock.patch('pubs.apis.requests.get', side_effect=mock_requests.mock_requests_get)
def test_arxiv_wrong_doi(self, reqget):
bib = arxiv2bibtex('1312.2021')
b = self.endecoder.decode_bibdata(bib)
entry = b[list(b)[0]]
self.assertEqual(entry['arxiv_doi'], '10.1103/INVALIDDOI.89.084044')
@mock.patch('pubs.apis.requests.get', side_effect=mock_requests.mock_requests_get)
def test_arxiv_good_doi(self, reqget):
"""Get the DOI bibtex instead of the arXiv one if possible"""
bib = arxiv2bibtex('1710.08557')
b = self.endecoder.decode_bibdata(bib)
entry = b[list(b)[0]]
self.assertTrue(not 'arxiv_doi' in entry)
self.assertEqual(entry['doi'], '10.1186/s12984-017-0305-3')
self.assertEqual(entry['title'].lower(), 'on neuromechanical approaches for the study of biological and robotic grasp and manipulation')
@mock.patch('pubs.apis.requests.get', side_effect=mock_requests.mock_requests_get)
def test_arxiv_good_doi_force_arxiv(self, reqget):
bib = arxiv2bibtex('1710.08557', try_doi=False)
b = self.endecoder.decode_bibdata(bib)
entry = b[list(b)[0]]
self.assertEqual(entry['arxiv_doi'], '10.1186/s12984-017-0305-3')
self.assertEqual(entry['title'].lower(), 'on neuromechanical approaches for the study of biological grasp and\nmanipulation')
class TestArxiv2BibtexLocal(unittest.TestCase):
"""Test arXiv 2 Bibtex connection; those tests don't require a connection"""
def test_oldstyle_pattern(self):
"""Test that we can accurately differentiate between old and new style arXiv ids."""
# old-style arXiv ids
for arxiv_id in ['cs/9301113', 'math/9201277v3', 'astro-ph/9812133',
'cond-mat/0604612', 'hep-ph/0702007v10', 'arXiv:physics/9403001'
]:
self.assertTrue(_is_arxiv_oldstyle(arxiv_id))
# new-style arXiv ids
for arxiv_id in ['1808.00954', 'arXiv:1808.00953', '1808.0953',
'1808.00954v1', 'arXiv:1808.00953v2', '1808.0953v42']:
self.assertFalse(_is_arxiv_oldstyle(arxiv_id))
def test_extract_id(self):
"""Test that ids are correctly extracted"""
self.assertEqual(_extract_arxiv_id({'id': "http://arxiv.org/abs/0704.0010v1"}), "0704.0010v1")
self.assertEqual(_extract_arxiv_id({'id': "https://arxiv.org/abs/0704.0010v1"}), "0704.0010v1")
self.assertEqual(_extract_arxiv_id({'id': "https://arxiv.org/abs/astro-ph/9812133v2"}), "astro-ph/9812133v2")
if __name__ == '__main__':
unittest.main(verbosity=2)

File diff suppressed because one or more lines are too long

@ -23,6 +23,11 @@ def compare_yaml_str(s1, s2):
class TestEnDecode(unittest.TestCase): class TestEnDecode(unittest.TestCase):
def test_decode_emptystring(self):
decoder = endecoder.EnDecoder()
with self.assertRaises(decoder.BibDecodingError):
entry = decoder.decode_bibdata('')
def test_encode_bibtex_is_unicode(self): def test_encode_bibtex_is_unicode(self):
decoder = endecoder.EnDecoder() decoder = endecoder.EnDecoder()
entry = decoder.decode_bibdata(bibtex_raw0) entry = decoder.decode_bibdata(bibtex_raw0)
@ -52,6 +57,18 @@ class TestEnDecode(unittest.TestCase):
self.assertEqual(bibraw1, bibraw2) self.assertEqual(bibraw1, bibraw2)
def test_endecode_bibtex_BOM(self):
"""Test that bibtexparser if fine with BOM-prefixed data"""
decoder = endecoder.EnDecoder()
bom_str = '\ufeff'
entry_1 = decoder.decode_bibdata(bibtex_raw0)
bibraw_1 = decoder.encode_bibdata(entry_1)
entry_2 = decoder.decode_bibdata(bom_str + bibraw_1)
bibraw_2 = decoder.encode_bibdata(entry_2)
self.assertEqual(bibraw_1, bibraw_2)
def test_endecode_bibtex_converts_month_string(self): def test_endecode_bibtex_converts_month_string(self):
"""Test if `month=dec` is correctly recognized and transformed into """Test if `month=dec` is correctly recognized and transformed into
`month={December}`""" `month={December}`"""

@ -7,7 +7,7 @@ from pubs.events import Event
_output = None _output = None
class TestEvent(Event): class StringEvent(Event):
def __init__(self, string): def __init__(self, string):
self.string = string self.string = string
@ -34,20 +34,20 @@ class SpecificInfo(Info):
self.specific = specific self.specific = specific
@TestEvent.listen(12, 15) @StringEvent.listen(12, 15)
def display(TestEventInstance, nb1, nb2): def display(StringEventInstance, nb1, nb2):
_output.append("%s %s %s" _output.append("%s %s %s"
% (TestEventInstance.string, nb1, nb2)) % (StringEventInstance.string, nb1, nb2))
@TestEvent.listen() @StringEvent.listen()
def hello_word(TestEventInstance): def hello_word(StringEventInstance):
_output.append('Helloword') _output.append('Helloword')
@TestEvent.listen() @StringEvent.listen()
def print_it(TestEventInstance): def print_it(StringEventInstance):
TestEventInstance.print_one() StringEventInstance.print_one()
@AddEvent.listen() @AddEvent.listen()
@ -56,7 +56,7 @@ def do_it(AddEventInstance):
@Info.listen() @Info.listen()
def test_info_instance(infoevent): def collect_info_instance(infoevent):
_output.append(infoevent.info) _output.append(infoevent.info)
if isinstance(infoevent, SpecificInfo): if isinstance(infoevent, SpecificInfo):
_output.append(infoevent.specific) _output.append(infoevent.specific)
@ -68,9 +68,9 @@ class TestEvents(unittest.TestCase):
global _output global _output
_output = [] _output = []
def test_listen_TestEvent(self): def test_listen_StringEvent(self):
# using the callback system # using the callback system
myevent = TestEvent('abcdefghijklmnopqrstuvwxyz') myevent = StringEvent('abcdefghijklmnopqrstuvwxyz')
myevent.send() # this one call three function myevent.send() # this one call three function
correct = ['abcdefghijklmnopqrstuvwxyz 12 15', correct = ['abcdefghijklmnopqrstuvwxyz 12 15',
'Helloword', 'Helloword',

Loading…
Cancel
Save