Merge pull request #155 from pubs/jma/arxiv

Handle arXiv errors and other improvements.

- handles old/new style arxiv references
- normalize arXiv citekeys
- improves error handling of doi/isbn (raise Exception if an HTTP status is detected).
- fix the isbn parse test that was incorrectly using doi2bibtex.
- add feedparser to the requirements.txt file.
- allows to run the tests using pytest.
main
Olivier Mangin 7 years ago committed by GitHub
commit 81aa1cddf4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -16,6 +16,11 @@ matrix:
- os: linux
language: python
python: 3.6
- os: linux
language: python
python: 3.7
dist: xenial
sudo: true
- os: osx
language: generic
python: 2.7
@ -36,8 +41,9 @@ matrix:
# command to install dependencies
install:
- python --version
- pip install -r tests/requirements.txt
- python setup.py install
- export PUBS_TESTS_MODE=ONLINE
# command to run tests
script: python -m unittest discover
script:
- PUBS_TESTS_MODE=MOCK python setup.py test
- PUBS_TESTS_MODE=COLLECT python setup.py test

@ -8,6 +8,10 @@
### Implemented enhancements
- Support for downloading arXiv reference from their ID ([#146](https://github.com/pubs/pubs/issues/146) by [joe-antognini](https://github.com/joe-antognini))
- Better feedback when an error is encountered while adding a reference from a DOI, ISBN or arXiv ID [#155](https://github.com/pubs/pubs/issues/155)
- Better dialog after editing paper [(#142)](https://github.com/pubs/pubs/issues/142)
- Add a command to open urls ([#139](https://github.com/pubs/pubs/issues/139) by [ksunden](https://github.com/ksunden))
@ -26,6 +30,7 @@
- Support year ranges in query [(#102)](https://github.com/pubs/pubs/issues/102)
- Tests can now be run with `python setup.py test` [#155](https://github.com/pubs/pubs/issues/155)
### Fixed bugs

@ -0,0 +1,24 @@
# if you want to setup your environment for development of the pytest code,
# doing `pip install -r dev_requirements.txt` is the single thing you have to do.
# Alternatively, and perhaps more conveniently, running `python setup.py test`
# will do the same *and* run the tests, but without installing the packages on
# the system.
# Note that if you introduce a new dependency, you need to add it here and, more
# importantly, to the setup.py script so that it is taken into account when
# installing from PyPi.
-e .
pyyaml
bibtexparser>=1.0
python-dateutil
requests
configobj
beautifulsoup4
feedparser
six
# those are the additional packages required to run the tests
pyfakefs
ddt
mock
pytest # optional (python setup.py test works without it), but possible nonetheless

@ -1,27 +1,198 @@
"""Interface for Remote Bibliographic APIs"""
import re
import datetime
import requests
import bibtexparser
from bibtexparser.bibdatabase import BibDatabase
import feedparser
from bs4 import BeautifulSoup
def doi2bibtex(doi):
class ReferenceNotFoundError(Exception):
pass
def get_bibentry_from_api(id_str, id_type, try_doi=True, ui=None):
"""Return a bibtex string from various ID methods.
This is a wrapper around functions that will return a bibtex string given
one of:
* DOI
* IBSN
* arXiv ID
Args:
id_str: A string with the ID.
id_type: Name of the ID type. Must be one of `doi`, `isbn`, or `arxiv`.
rp: A `Repository` object.
ui: A UI object.
Returns:
A bibtex string.
Raises:
ValueError: if `id_type` is not one of `doi`, `isbn`, or `arxiv`.
apis.ReferenceNotFoundException: if no valid reference could be found.
"""
id_fns = {
'doi': doi2bibtex,
'isbn': isbn2bibtex,
'arxiv': arxiv2bibtex,
}
if id_type not in id_fns.keys():
raise ValueError('id_type must be one of `doi`, `isbn`, or `arxiv`.')
bibentry_raw = id_fns[id_type](id_str, try_doi=try_doi, ui=ui)
endecoder.EnDecoder().decode_bibdata(bibentry_raw)
if bibentry is None:
raise ReferenceNotFoundException(
'invalid {} {} or unable to retrieve bibfile from it.'.format(id_type, id_str))
return bibentry
def _get_request(url, headers=None):
"""GET requests to a url. Return the `requests` object.
:raise ConnectionError: if anything goes bad (connection refused, timeout
http status error (401, 404, etc)).
"""
try:
r = requests.get(url, headers=headers)
r.raise_for_status()
return r
except requests.exceptions.RequestException as e:
raise ReferenceNotFoundError(e.args)
## DOI support
def doi2bibtex(doi, **kwargs):
"""Return a bibtex string of metadata from a DOI"""
url = 'http://dx.doi.org/{}'.format(doi)
url = 'https://dx.doi.org/{}'.format(doi)
headers = {'accept': 'application/x-bibtex'}
r = requests.get(url, headers=headers)
r = _get_request(url, headers=headers)
if r.encoding is None:
r.encoding = 'utf8' # Do not rely on guessing from request
return r.text
def isbn2bibtex(isbn):
## ISBN support
def isbn2bibtex(isbn, **kwargs):
"""Return a bibtex string of metadata from an ISBN"""
url = 'http://www.ottobib.com/isbn/{}/bibtex'.format(isbn)
r = requests.get(url)
url = 'https://www.ottobib.com/isbn/{}/bibtex'.format(isbn)
r = _get_request(url)
soup = BeautifulSoup(r.text, "html.parser")
citation = soup.find("textarea").text
return citation
# Note: apparently ottobib.com uses caracter modifiers for accents instead
# of the correct unicode characters. TODO: Should we convert them?
## arXiv support
_months = ['jan', 'feb', 'mar', 'apr', 'may', 'jun',
'jul', 'aug', 'sep', 'oct', 'nov', 'dec']
def _is_arxiv_oldstyle(arxiv_id):
return re.match(r"(arXiv\:)?[a-z\-]+\/[0-9]+(v[0-9]+)?", arxiv_id) is not None
def _extract_arxiv_id(entry):
pattern = r"http[s]?://arxiv.org/abs/(?P<entry_id>.+)"
return re.search(pattern, entry['id']).groupdict()['entry_id']
def arxiv2bibtex(arxiv_id, try_doi=True, ui=None):
"""Return a bibtex string of metadata from an arXiv ID
:param arxiv_id: arXiv id, with or without the `arXiv:` prefix and version
suffix (e.g. `v1`). Old an new style are accepted. Here are
example of accepted identifiers: `1510.00322`,
`arXiv:1510.00322`, `0901.0512`, `arXiv:0901.0512`,
`hep-ph/9409201` or `arXiv:hep-ph/9409201`.
Note that the `arXiv:` prefix will be automatically
removed, and the version suffix automatically added if
missing.
:param try_doi: if a DOI is referenced in the arXiv metadata,
try to download it instead. If that fails for any reason,
falls back to the arXiv, with a warning message, if the
UI is provided.
:param ui: if not None, will display a warning if the doi request
fails.
"""
## handle errors
url = 'https://export.arxiv.org/api/query?id_list={}'.format(arxiv_id)
try:
r = requests.get(url)
if r.status_code == 400: # bad request
msg = ("the arXiv server returned a bad request error. The "
"arXiv id {} is possibly invalid or malformed.".format(arxiv_id))
raise ReferenceNotFoundError(msg)
r.raise_for_status() # raise an exception for HTTP errors:
# 401, 404, 400 if `ui` is None, etc.
except requests.exceptions.RequestException as e:
msg = ("connection error while retrieving arXiv data for "
"'{}': {}".format(arxiv_id, e))
raise ReferenceNotFoundError(msg)
feed = feedparser.parse(r.text)
if len(feed.entries) == 0: # no results.
msg = "no results for arXiv id {}".format(arxiv_id)
raise ReferenceNotFoundError(msg)
if len(feed.entries) > 1: # I don't know how that could happen, but let's
# be ready for it.
results = '\n'.join('{}. {}'.format(i, entry['title'])
for entry in feed.entries)
msg = ("multiple results for arXiv id {}:\n{}\nThis is unexpected. "
"Please submit an issue at "
"https://github.com/pubs/pubs/issues").format(arxiv_id, choices)
raise ReferenceNotFoundError(msg)
entry = feed.entries[0]
## try to return a doi instead of the arXiv reference
if try_doi and 'arxiv_doi' in entry:
try:
return doi2bibtex(entry['arxiv_doi'])
except ReferenceNotFoundError as e:
if ui is not None:
ui.warning(str(e))
## create a bibentry from the arXiv response.
db = BibDatabase()
entry_id = _extract_arxiv_id(entry)
author_str = ' and '.join(
[author['name'] for author in entry['authors']])
db.entries = [{
'ENTRYTYPE': 'article',
'ID': entry_id,
'author': author_str,
'title': entry['title'],
'year': str(entry['published_parsed'].tm_year),
'month': _months[entry['published_parsed'].tm_mon-1],
'eprint': entry_id,
'eprinttype': 'arxiv',
'date': entry['published'], # not really standard, but a resolution more
# granular than months is increasinlgy relevant.
'url': entry['link'],
'urldate': datetime.datetime.utcnow().isoformat() + 'Z' # can't hurt.
}]
# we don't add eprintclass for old-style ids, as it is in the id already.
if not _is_arxiv_oldstyle(entry_id):
db.entries[0]['eprintclass'] = entry['arxiv_primary_category']['term']
if 'arxiv_doi' in entry:
db.entries[0]['arxiv_doi'] = entry['arxiv_doi']
bibtex = bibtexparser.dumps(db)
return bibtex

@ -26,8 +26,10 @@ def parser(subparsers, conf):
parser = subparsers.add_parser('add', help='add a paper to the repository')
parser.add_argument('bibfile', nargs='?', default=None,
help='bibtex file')
parser.add_argument('-D', '--doi', help='doi number to retrieve the bibtex entry, if it is not provided', default=None, action=ValidateDOI)
parser.add_argument('-I', '--isbn', help='isbn number to retrieve the bibtex entry, if it is not provided', default=None)
id_arg = parser.add_mutually_exclusive_group()
id_arg.add_argument('-D', '--doi', help='doi number to retrieve the bibtex entry, if it is not provided', default=None, action=ValidateDOI)
id_arg.add_argument('-I', '--isbn', help='isbn number to retrieve the bibtex entry, if it is not provided', default=None)
id_arg.add_argument('-X', '--arxiv', help='arXiv ID to retrieve the bibtex entry, if it is not provided', default=None)
parser.add_argument('-d', '--docfile', help='pdf or ps file', default=None)
parser.add_argument('-t', '--tags', help='tags associated to the paper, separated by commas',
default=None
@ -41,22 +43,25 @@ def parser(subparsers, conf):
return parser
def bibentry_from_editor(conf, ui, rp):
def bibentry_from_editor(conf, ui):
again = True
bibstr = templates.add_bib
bibentry_raw = templates.add_bib
decoder = endecoder.EnDecoder()
while again:
try:
bibstr = ui.editor_input(initial=bibstr, suffix='.bib')
if bibstr == templates.add_bib:
bibentry_raw = ui.editor_input(initial=bibentry_raw, suffix='.bib')
if bibentry_raw == templates.add_bib:
again = ui.input_yn(
question='Bibfile not edited. Edit again ?',
default='y')
if not again:
ui.exit(0)
else:
bibentry = rp.databroker.verify(bibstr)
bibentry = decoder.decode_bibdata(bibentry_raw)
bibstruct.verify_bibdata(bibentry)
# REFACTOR Generate citykey
# REFACTOR Generate citekey
again = False
except endecoder.EnDecoder.BibDecodingError:
@ -82,30 +87,29 @@ def command(conf, args):
citekey = args.citekey
rp = repo.Repository(conf)
decoder = endecoder.EnDecoder()
# get bibtex entry
if bibfile is None:
if args.doi is None and args.isbn is None:
bibentry = bibentry_from_editor(conf, ui, rp)
if args.doi is None and args.isbn is None and args.arxiv is None:
bibentry = bibentry_from_editor(conf, ui)
else:
if args.doi is not None:
bibentry_raw = apis.doi2bibtex(args.doi)
bibentry = rp.databroker.verify(bibentry_raw)
if bibentry is None:
ui.error('invalid doi {} or unable to retrieve bibfile from it.'.format(args.doi))
if args.isbn is None:
ui.exit(1)
if args.isbn is not None:
bibentry_raw = apis.isbn2bibtex(args.isbn)
bibentry = rp.databroker.verify(bibentry_raw)
if bibentry is None:
ui.error('invalid isbn {} or unable to retrieve bibfile from it.'.format(args.isbn))
ui.exit(1)
# TODO distinguish between cases, offer to open the error page in a webbrowser.
# TODO offer to confirm/change citekey
bibentry = None
try:
if args.doi is not None:
bibentry = apis.get_bibentry_from_api(args.doi, 'doi', ui=ui)
elif args.isbn is not None:
bibentry = apis.get_bibentry_from_api(args.isbn, 'isbn', ui=ui)
# TODO distinguish between cases, offer to open the error page in a webbrowser.
# TODO offer to confirm/change citekey
elif args.arxiv is not None:
bibentry = apis.get_bibentry_from_api(args.arxiv, 'arxiv', ui=ui)
except apis.ReferenceNotFoundException as e:
ui.error(e.message)
ui.exit(1)
else:
bibentry_raw = content.get_content(bibfile, ui=ui)
bibentry = rp.databroker.verify(bibentry_raw)
bibentry = decoder.decode_bibdata(bibentry_raw)
if bibentry is None:
ui.error('invalid bibfile {}.'.format(bibfile))

@ -79,16 +79,6 @@ class DataBroker(object):
def listing(self, filestats=True):
return self.filebroker.listing(filestats=filestats)
def verify(self, bibdata_raw):
"""Will return None if bibdata_raw can't be decoded"""
try:
if bibdata_raw.startswith('\ufeff'):
# remove BOM, because bibtexparser does not support it.
bibdata_raw = bibdata_raw[1:]
return self.endecoder.decode_bibdata(bibdata_raw)
except ValueError as e:
return None
# docbroker
def in_docsdir(self, docpath):

@ -163,9 +163,6 @@ class DataCache(object):
def listing(self, filestats=True):
return self.databroker.listing(filestats=filestats)
def verify(self, bibdata_raw):
return self.databroker.verify(bibdata_raw)
# docbroker
def in_docsdir(self, docpath):

@ -1,9 +1,16 @@
from __future__ import absolute_import, unicode_literals
import copy
import logging
# both needed to intercept exceptions.
import pyparsing
import bibtexparser
try:
import bibtexparser as bp
# don't let bibtexparser display stuff
bp.bparser.logger.setLevel(level=logging.CRITICAL)
except ImportError:
print("error: you need to install bibterxparser; try running 'pip install "
"bibtexparser'.")
@ -68,14 +75,14 @@ class EnDecoder(object):
class BibDecodingError(Exception):
message = "Could not parse provided bibdata:\n---\n{}\n---"
def __init__(self, bibdata):
def __init__(self, error_msg, bibdata):
"""
:param error_msg: specific message about what went wrong
:param bibdata: the data that was unsuccessfully decoded.
"""
super(Exception, self).__init__(error_msg) # make `str(self)` work.
self.data = bibdata
def __str__(self):
return self.message.format(self.data)
bwriter = bp.bwriter.BibTexWriter()
bwriter.display_order = BIBFIELD_ORDER
@ -117,10 +124,12 @@ class EnDecoder(object):
If the decoding fails, returns a BibParseError.
"""
if len(bibdata) == 0:
error_msg = 'parsing error: the provided string has length zero.'
raise self.BibDecodingError(error_msg, bibdata)
try:
entries = bp.bparser.BibTexParser(
bibdata, common_strings=True,
customization=customizations,
bibdata, common_strings=True, customization=customizations,
homogenize_fields=True).get_entry_dict()
# Remove id from bibtexparser attribute which is stored as citekey
@ -131,8 +140,18 @@ class EnDecoder(object):
entries[e][TYPE_KEY] = t
if len(entries) > 0:
return entries
except Exception:
import traceback
traceback.print_exc()
raise self.BibDecodingError(bibdata)
# TODO: filter exceptions from pyparsing and pass reason upstream
except (pyparsing.ParseException, pyparsing.ParseSyntaxException) as e:
error_msg = self._format_parsing_error(e)
raise self.BibDecodingError(error_msg, bibdata)
except bibtexparser.bibdatabase.UndefinedString as e:
error_msg = 'parsing error: undefined string in provided data: {}'.format(e)
raise self.BibDecodingError(error_msg, bibdata)
@classmethod
def _format_parsing_error(cls, e):
"""Transform a pyparsing exception into an error message
Does a best effort to be useful, but might need to be improved.
"""
return '{}\n{}^\n{}'.format(e.line, (e.column - 1) * ' ', e)

@ -56,6 +56,10 @@ or an ISBN (dashes are ignored):
pubs add -I 978-0822324669 -d article.pdf
or an arXiv id (automatically downloading arXiv article is in the works):
pubs add -X math/9501234 -d article.pdf
## References always up-to-date

@ -1,6 +0,0 @@
pyyaml
bibtexparser>=1.0
python-dateutil
requests
configobj
beautifulsoup4

@ -1,10 +1,16 @@
#!/usr/bin/env python
import unittest
from setuptools import setup
with open('pubs/version.py') as f:
exec(f.read()) # defines __version__
def pubs_test_suite():
test_loader = unittest.TestLoader()
test_suite = test_loader.discover('tests', pattern='test_*.py')
return test_suite
setup(
name='pubs',
version=__version__,
@ -26,9 +32,8 @@ setup(
],
},
install_requires=['pyyaml', 'bibtexparser>=1.0', 'python-dateutil',
'requests', 'configobj', 'beautifulsoup4'],
tests_require=['pyfakefs>=2.7', 'mock'],
install_requires=['pyyaml', 'bibtexparser>=1.0', 'python-dateutil', 'six',
'requests', 'configobj', 'beautifulsoup4', 'feedparser'],
extras_require={'autocompletion': ['argcomplete'],
},
@ -41,6 +46,9 @@ setup(
'Intended Audience :: Science/Research',
],
test_suite= 'tests',
tests_require=['pyfakefs>=3.4', 'mock', 'ddt'],
# in order to avoid 'zipimport.ZipImportError: bad local file header'
zip_safe=False,

@ -0,0 +1,106 @@
"""
Mock the `requests.get` function, and handle collecting data to do so.
Three modes are available, and controlled via the `PUBS_TESTS_MODE` environment
variable. To modify the variable, under linux or macos, do one of:
$ export PUBS_TESTS_MODE=MOCK
$ export PUBS_TESTS_MODE=COLLECT
$ export PUBS_TESTS_MODE=ONLINE
The MOCK mode is the default one, active even if PUBS_TESTS_MODE has not been
set. It uses saved data to run pubs units tests relying on the `requests.get`
function without the need of an internet connection (it is also much faster).
The prefected data is save in the `test_apis_data.pickle` file.
The COLLECT mode does real GET requests, and updates the `test_apis_data.pickle`
file. It is needed if you add or modify the test relying on `requests.get`.
The ONLINE mode bypasses all this and use the original `requests.get` without
accessing or updating the `test_apis_data.pickle` data. It might be useful when
running tests on Travis for instance.
"""
import os
import json
import mock
import requests
_orgininal_requests_get = requests.get
_collected_responses = []
_data_filepath = os.path.join(os.path.dirname(__file__), 'test_apis_data.json')
class MockingResponse:
def __init__(self, text, status_code=200, error_msg=None):
self.text = text
self.status_code = status_code
self.error_msg = error_msg
self.encoding = 'utf8'
def raise_for_status(self):
if self.status_code != 200:
raise requests.exceptions.RequestException(self.error_msg)
def intercept_text(text):
try:
if '10.1103/PhysRevD.89.084044' in text:
# replace with wrong DOI
text = text.replace('PhysRevD', 'INVALIDDOI')
except TypeError:
if b'10.1103/PhysRevD.89.084044' in text:
# replace with wrong DOI
text = text.replace(b'PhysRevD', b'INVALIDDOI')
return text
mode = os.environ.get('PUBS_TESTS_MODE', 'MOCK')
if mode == 'MOCK':
with open(os.path.join(_data_filepath), 'r') as fd:
_collected_responses = json.load(fd)
def mock_requests_get(*args, **kwargs):
for args2, kwargs2, text, status_code, error_msg in _collected_responses:
if list(args) == list(args2) and kwargs == kwargs2:
return MockingResponse(text, status_code, error_msg)
raise KeyError(('No stub data found for requests.get({}, {}).\n You may'
' need to update the mock data. Look at the '
'tests/mock_requests.py file for an explanation').format(args, kwargs))
elif mode == 'COLLECT':
def mock_requests_get(*args, **kwargs):
text, status_code, error_msg = None, None, None
try:
r = _orgininal_requests_get(*args, **kwargs)
text, status_code = r.text, r.status_code
r.raise_for_status()
except requests.exceptions.RequestException as e:
error_msg = str(e)
text = intercept_text(text)
_collected_responses.append((args, kwargs, text, status_code, error_msg))
_save_collected_responses() # yes, we save everytime, because it's not
# clear how to run once after all the tests
# have run. If you figure it out...
return MockingResponse(text, status_code, error_msg)
def _save_collected_responses():
with open(os.path.join(_data_filepath), 'w') as fd:
json.dump(sorted(_collected_responses), fd, indent=2)
elif mode == 'ONLINE':
def mock_requests_get(*args, **kwargs):
# with mock.patch('requests.Response.text', new_callable=mock.PropertyMock) as mock_text:
r = _orgininal_requests_get(*args, **kwargs)
r._content = intercept_text(r.content)
# print(r.content.__class__)
# mock_text.return_value = intercept_text(r.text)
return r

@ -1,5 +0,0 @@
# those are the additional packages required to run the tests
six
pyfakefs
ddt
mock

@ -3,24 +3,36 @@
from __future__ import unicode_literals
import unittest
import mock
import dotdot
from pubs.p3 import ustr
from pubs.endecoder import EnDecoder
from pubs.apis import doi2bibtex, isbn2bibtex
from pubs.apis import ReferenceNotFoundError, arxiv2bibtex, doi2bibtex, isbn2bibtex, _is_arxiv_oldstyle, _extract_arxiv_id
from pubs import apis
import mock_requests
class TestDOI2Bibtex(unittest.TestCase):
class APITests(unittest.TestCase):
def setUp(self):
self.endecoder = EnDecoder()
def test_unicode(self):
class TestDOI2Bibtex(APITests):
@mock.patch('pubs.apis.requests.get', side_effect=mock_requests.mock_requests_get)
def test_unicode(self, reqget):
bib = doi2bibtex('10.1007/BF01700692')
self.assertIsInstance(bib, ustr)
self.assertIn('Kurt Gödel', bib)
def test_parses_to_bibtex(self):
@mock.patch('pubs.apis.requests.get', side_effect=mock_requests.mock_requests_get)
def test_parses_to_bibtex(self, reqget):
bib = doi2bibtex('10.1007/BF01700692')
b = self.endecoder.decode_bibdata(bib)
self.assertEqual(len(b), 1)
@ -30,23 +42,22 @@ class TestDOI2Bibtex(unittest.TestCase):
'Über formal unentscheidbare Sätze der Principia '
'Mathematica und verwandter Systeme I')
def test_parse_fails_on_incorrect_DOI(self):
bib = doi2bibtex('999999')
with self.assertRaises(EnDecoder.BibDecodingError):
self.endecoder.decode_bibdata(bib)
@mock.patch('pubs.apis.requests.get', side_effect=mock_requests.mock_requests_get)
def test_retrieve_fails_on_incorrect_DOI(self, reqget):
with self.assertRaises(apis.ReferenceNotFoundError):
doi2bibtex('999999')
class TestISBN2Bibtex(unittest.TestCase):
def setUp(self):
self.endecoder = EnDecoder()
class TestISBN2Bibtex(APITests):
def test_unicode(self):
@mock.patch('pubs.apis.requests.get', side_effect=mock_requests.mock_requests_get)
def test_unicode(self, reqget):
bib = isbn2bibtex('9782081336742')
self.assertIsInstance(bib, ustr)
self.assertIn('Poincaré, Henri', bib)
def test_parses_to_bibtex(self):
@mock.patch('pubs.apis.requests.get', side_effect=mock_requests.mock_requests_get)
def test_parses_to_bibtex(self, reqget):
bib = isbn2bibtex('9782081336742')
b = self.endecoder.decode_bibdata(bib)
self.assertEqual(len(b), 1)
@ -54,11 +65,97 @@ class TestISBN2Bibtex(unittest.TestCase):
self.assertEqual(entry['author'][0], 'Poincaré, Henri')
self.assertEqual(entry['title'], 'La science et l\'hypothèse')
def test_parse_fails_on_incorrect_ISBN(self):
bib = doi2bibtex('9' * 13)
@mock.patch('pubs.apis.requests.get', side_effect=mock_requests.mock_requests_get)
def test_retrieve_fails_on_incorrect_ISBN(self, reqget):
bib = isbn2bibtex('9' * 13)
with self.assertRaises(EnDecoder.BibDecodingError):
self.endecoder.decode_bibdata(bib)
# Note: apparently ottobib.com uses caracter modifiers for accents instead
# of the correct unicode characters. TODO: Should we convert them?
class TestArxiv2Bibtex(APITests):
@mock.patch('pubs.apis.requests.get', side_effect=mock_requests.mock_requests_get)
def test_new_style(self, reqget):
bib = arxiv2bibtex('astro-ph/9812133')
b = self.endecoder.decode_bibdata(bib)
self.assertEqual(len(b), 1)
entry = b[list(b)[0]]
self.assertEqual(entry['author'][0], 'Perlmutter, S.')
self.assertEqual(entry['year'], '1999')
@mock.patch('pubs.apis.requests.get', side_effect=mock_requests.mock_requests_get)
def test_parses_to_bibtex_with_doi(self, reqget):
bib = arxiv2bibtex('astro-ph/9812133')
b = self.endecoder.decode_bibdata(bib)
self.assertEqual(len(b), 1)
entry = b[list(b)[0]]
self.assertEqual(entry['author'][0], 'Perlmutter, S.')
self.assertEqual(entry['year'], '1999')
@mock.patch('pubs.apis.requests.get', side_effect=mock_requests.mock_requests_get)
def test_parses_to_bibtex_without_doi(self, reqget):
bib = arxiv2bibtex('math/0211159')
b = self.endecoder.decode_bibdata(bib)
self.assertEqual(len(b), 1)
entry = b[list(b)[0]]
self.assertEqual(entry['author'][0], 'Perelman, Grisha')
self.assertEqual(entry['year'], '2002')
self.assertEqual(
entry['title'],
'The entropy formula for the Ricci flow and its geometric applications')
@mock.patch('pubs.apis.requests.get', side_effect=mock_requests.mock_requests_get)
def test_arxiv_wrong_id(self, reqget):
with self.assertRaises(ReferenceNotFoundError):
bib = arxiv2bibtex('INVALIDID')
@mock.patch('pubs.apis.requests.get', side_effect=mock_requests.mock_requests_get)
def test_arxiv_wrong_doi(self, reqget):
bib = arxiv2bibtex('1312.2021')
b = self.endecoder.decode_bibdata(bib)
entry = b[list(b)[0]]
self.assertEqual(entry['arxiv_doi'], '10.1103/INVALIDDOI.89.084044')
@mock.patch('pubs.apis.requests.get', side_effect=mock_requests.mock_requests_get)
def test_arxiv_good_doi(self, reqget):
"""Get the DOI bibtex instead of the arXiv one if possible"""
bib = arxiv2bibtex('1710.08557')
b = self.endecoder.decode_bibdata(bib)
entry = b[list(b)[0]]
self.assertTrue(not 'arxiv_doi' in entry)
self.assertEqual(entry['doi'], '10.1186/s12984-017-0305-3')
self.assertEqual(entry['title'].lower(), 'on neuromechanical approaches for the study of biological and robotic grasp and manipulation')
@mock.patch('pubs.apis.requests.get', side_effect=mock_requests.mock_requests_get)
def test_arxiv_good_doi_force_arxiv(self, reqget):
bib = arxiv2bibtex('1710.08557', try_doi=False)
b = self.endecoder.decode_bibdata(bib)
entry = b[list(b)[0]]
self.assertEqual(entry['arxiv_doi'], '10.1186/s12984-017-0305-3')
self.assertEqual(entry['title'].lower(), 'on neuromechanical approaches for the study of biological grasp and\nmanipulation')
class TestArxiv2BibtexLocal(unittest.TestCase):
"""Test arXiv 2 Bibtex connection; those tests don't require a connection"""
def test_oldstyle_pattern(self):
"""Test that we can accurately differentiate between old and new style arXiv ids."""
# old-style arXiv ids
for arxiv_id in ['cs/9301113', 'math/9201277v3', 'astro-ph/9812133',
'cond-mat/0604612', 'hep-ph/0702007v10', 'arXiv:physics/9403001'
]:
self.assertTrue(_is_arxiv_oldstyle(arxiv_id))
# new-style arXiv ids
for arxiv_id in ['1808.00954', 'arXiv:1808.00953', '1808.0953',
'1808.00954v1', 'arXiv:1808.00953v2', '1808.0953v42']:
self.assertFalse(_is_arxiv_oldstyle(arxiv_id))
def test_extract_id(self):
"""Test that ids are correctly extracted"""
self.assertEqual(_extract_arxiv_id({'id': "http://arxiv.org/abs/0704.0010v1"}), "0704.0010v1")
self.assertEqual(_extract_arxiv_id({'id': "https://arxiv.org/abs/0704.0010v1"}), "0704.0010v1")
self.assertEqual(_extract_arxiv_id({'id': "https://arxiv.org/abs/astro-ph/9812133v2"}), "astro-ph/9812133v2")
if __name__ == '__main__':
unittest.main(verbosity=2)

File diff suppressed because one or more lines are too long

@ -23,6 +23,11 @@ def compare_yaml_str(s1, s2):
class TestEnDecode(unittest.TestCase):
def test_decode_emptystring(self):
decoder = endecoder.EnDecoder()
with self.assertRaises(decoder.BibDecodingError):
entry = decoder.decode_bibdata('')
def test_encode_bibtex_is_unicode(self):
decoder = endecoder.EnDecoder()
entry = decoder.decode_bibdata(bibtex_raw0)
@ -52,6 +57,18 @@ class TestEnDecode(unittest.TestCase):
self.assertEqual(bibraw1, bibraw2)
def test_endecode_bibtex_BOM(self):
"""Test that bibtexparser if fine with BOM-prefixed data"""
decoder = endecoder.EnDecoder()
bom_str = '\ufeff'
entry_1 = decoder.decode_bibdata(bibtex_raw0)
bibraw_1 = decoder.encode_bibdata(entry_1)
entry_2 = decoder.decode_bibdata(bom_str + bibraw_1)
bibraw_2 = decoder.encode_bibdata(entry_2)
self.assertEqual(bibraw_1, bibraw_2)
def test_endecode_bibtex_converts_month_string(self):
"""Test if `month=dec` is correctly recognized and transformed into
`month={December}`"""

@ -7,7 +7,7 @@ from pubs.events import Event
_output = None
class TestEvent(Event):
class StringEvent(Event):
def __init__(self, string):
self.string = string
@ -34,20 +34,20 @@ class SpecificInfo(Info):
self.specific = specific
@TestEvent.listen(12, 15)
def display(TestEventInstance, nb1, nb2):
@StringEvent.listen(12, 15)
def display(StringEventInstance, nb1, nb2):
_output.append("%s %s %s"
% (TestEventInstance.string, nb1, nb2))
% (StringEventInstance.string, nb1, nb2))
@TestEvent.listen()
def hello_word(TestEventInstance):
@StringEvent.listen()
def hello_word(StringEventInstance):
_output.append('Helloword')
@TestEvent.listen()
def print_it(TestEventInstance):
TestEventInstance.print_one()
@StringEvent.listen()
def print_it(StringEventInstance):
StringEventInstance.print_one()
@AddEvent.listen()
@ -56,7 +56,7 @@ def do_it(AddEventInstance):
@Info.listen()
def test_info_instance(infoevent):
def collect_info_instance(infoevent):
_output.append(infoevent.info)
if isinstance(infoevent, SpecificInfo):
_output.append(infoevent.specific)
@ -68,9 +68,9 @@ class TestEvents(unittest.TestCase):
global _output
_output = []
def test_listen_TestEvent(self):
def test_listen_StringEvent(self):
# using the callback system
myevent = TestEvent('abcdefghijklmnopqrstuvwxyz')
myevent = StringEvent('abcdefghijklmnopqrstuvwxyz')
myevent.send() # this one call three function
correct = ['abcdefghijklmnopqrstuvwxyz 12 15',
'Helloword',

Loading…
Cancel
Save