better error message when parsing of bibtex fails

Plus, slight refactoring: remove `databroker.verify()` method
main
Fabien C. Y. Benureau 7 years ago
parent 69f760cac6
commit be80e75cbb
No known key found for this signature in database
GPG Key ID: C3FB5E831A249A9A

@ -8,6 +8,10 @@
### Implemented enhancements ### Implemented enhancements
- Support for downloading arXiv reference from their ID ([#146](https://github.com/pubs/pubs/issues/146) by [joe-antognini](https://github.com/joe-antognini))
- Better feedback when an error is encountered while adding a reference from a DOI, ISBN or arXiv ID [#155](https://github.com/pubs/pubs/issues/155)
- Better dialog after editing paper [(#142)](https://github.com/pubs/pubs/issues/142) - Better dialog after editing paper [(#142)](https://github.com/pubs/pubs/issues/142)
- Add a command to open urls ([#139](https://github.com/pubs/pubs/issues/139) by [ksunden](https://github.com/ksunden)) - Add a command to open urls ([#139](https://github.com/pubs/pubs/issues/139) by [ksunden](https://github.com/ksunden))
@ -26,6 +30,7 @@
- Support year ranges in query [(#102)](https://github.com/pubs/pubs/issues/102) - Support year ranges in query [(#102)](https://github.com/pubs/pubs/issues/102)
- Tests can now be run with `python setup.py test` [#155](https://github.com/pubs/pubs/issues/155)
### Fixed bugs ### Fixed bugs

@ -13,7 +13,7 @@ class ReferenceNotFoundError(Exception):
pass pass
def get_bibentry_from_api(id_str, id_type, rp, ui=None): def get_bibentry_from_api(id_str, id_type, try_doi=True, ui=None):
"""Return a bibtex string from various ID methods. """Return a bibtex string from various ID methods.
This is a wrapper around functions that will return a bibtex string given This is a wrapper around functions that will return a bibtex string given
@ -46,8 +46,8 @@ def get_bibentry_from_api(id_str, id_type, rp, ui=None):
if id_type not in id_fns.keys(): if id_type not in id_fns.keys():
raise ValueError('id_type must be one of `doi`, `isbn`, or `arxiv`.') raise ValueError('id_type must be one of `doi`, `isbn`, or `arxiv`.')
bibentry_raw = id_fns[id_type](id_str) bibentry_raw = id_fns[id_type](id_str, try_doi=try_doi, ui=ui)
bibentry = rp.databroker.verify(bibentry_raw) endecoder.EnDecoder().decode_bibdata(bibentry_raw)
if bibentry is None: if bibentry is None:
raise ReferenceNotFoundException( raise ReferenceNotFoundException(
'invalid {} {} or unable to retrieve bibfile from it.'.format(id_type, id_str)) 'invalid {} {} or unable to retrieve bibfile from it.'.format(id_type, id_str))

@ -43,22 +43,25 @@ def parser(subparsers, conf):
return parser return parser
def bibentry_from_editor(conf, ui, rp): def bibentry_from_editor(conf, ui):
again = True again = True
bibstr = templates.add_bib bibentry_raw = templates.add_bib
decoder = endecoder.EnDecoder()
while again: while again:
try: try:
bibstr = ui.editor_input(initial=bibstr, suffix='.bib') bibentry_raw = ui.editor_input(initial=bibentry_raw, suffix='.bib')
if bibstr == templates.add_bib: if bibentry_raw == templates.add_bib:
again = ui.input_yn( again = ui.input_yn(
question='Bibfile not edited. Edit again ?', question='Bibfile not edited. Edit again ?',
default='y') default='y')
if not again: if not again:
ui.exit(0) ui.exit(0)
else: else:
bibentry = rp.databroker.verify(bibstr) bibentry = decoder.decode_bibdata(bibentry_raw)
bibstruct.verify_bibdata(bibentry) bibstruct.verify_bibdata(bibentry)
# REFACTOR Generate citykey # REFACTOR Generate citekey
again = False again = False
except endecoder.EnDecoder.BibDecodingError: except endecoder.EnDecoder.BibDecodingError:
@ -84,28 +87,29 @@ def command(conf, args):
citekey = args.citekey citekey = args.citekey
rp = repo.Repository(conf) rp = repo.Repository(conf)
decoder = endecoder.EnDecoder()
# get bibtex entry # get bibtex entry
if bibfile is None: if bibfile is None:
if args.doi is None and args.isbn is None and args.arxiv is None: if args.doi is None and args.isbn is None and args.arxiv is None:
bibentry = bibentry_from_editor(conf, ui, rp) bibentry = bibentry_from_editor(conf, ui)
else: else:
bibentry = None bibentry = None
try: try:
if args.doi is not None: if args.doi is not None:
bibentry = apis.get_bibentry_from_api(args.doi, 'doi', rp) bibentry = apis.get_bibentry_from_api(args.doi, 'doi', ui=ui)
elif args.isbn is not None: elif args.isbn is not None:
bibentry = apis.get_bibentry_from_api(args.isbn, 'isbn', rp) bibentry = apis.get_bibentry_from_api(args.isbn, 'isbn', ui=ui)
# TODO distinguish between cases, offer to open the error page in a webbrowser. # TODO distinguish between cases, offer to open the error page in a webbrowser.
# TODO offer to confirm/change citekey # TODO offer to confirm/change citekey
elif args.arxiv is not None: elif args.arxiv is not None:
bibentry = apis.get_bibentry_from_api(args.arxiv, 'arxiv', rp) bibentry = apis.get_bibentry_from_api(args.arxiv, 'arxiv', ui=ui)
except apis.ReferenceNotFoundException as e: except apis.ReferenceNotFoundException as e:
ui.error(e.message) ui.error(e.message)
ui.exit(1) ui.exit(1)
else: else:
bibentry_raw = content.get_content(bibfile, ui=ui) bibentry_raw = content.get_content(bibfile, ui=ui)
bibentry = rp.databroker.verify(bibentry_raw) bibentry = decoder.decode_bibdata(bibentry_raw)
if bibentry is None: if bibentry is None:
ui.error('invalid bibfile {}.'.format(bibfile)) ui.error('invalid bibfile {}.'.format(bibfile))

@ -79,16 +79,6 @@ class DataBroker(object):
def listing(self, filestats=True): def listing(self, filestats=True):
return self.filebroker.listing(filestats=filestats) return self.filebroker.listing(filestats=filestats)
def verify(self, bibdata_raw):
"""Will return None if bibdata_raw can't be decoded"""
try:
if bibdata_raw.startswith('\ufeff'):
# remove BOM, because bibtexparser does not support it.
bibdata_raw = bibdata_raw[1:]
return self.endecoder.decode_bibdata(bibdata_raw)
except ValueError as e:
return None
# docbroker # docbroker
def in_docsdir(self, docpath): def in_docsdir(self, docpath):

@ -163,9 +163,6 @@ class DataCache(object):
def listing(self, filestats=True): def listing(self, filestats=True):
return self.databroker.listing(filestats=filestats) return self.databroker.listing(filestats=filestats)
def verify(self, bibdata_raw):
return self.databroker.verify(bibdata_raw)
# docbroker # docbroker
def in_docsdir(self, docpath): def in_docsdir(self, docpath):

@ -1,9 +1,16 @@
from __future__ import absolute_import, unicode_literals from __future__ import absolute_import, unicode_literals
import copy import copy
import logging
# both needed to intercept exceptions.
import pyparsing
import bibtexparser
try: try:
import bibtexparser as bp import bibtexparser as bp
# don't let bibtexparser display stuff
# bp.bparser.logger.setLevel(level=logging.CRITICAL)
except ImportError: except ImportError:
print("error: you need to install bibterxparser; try running 'pip install " print("error: you need to install bibterxparser; try running 'pip install "
"bibtexparser'.") "bibtexparser'.")
@ -68,14 +75,16 @@ class EnDecoder(object):
class BibDecodingError(Exception): class BibDecodingError(Exception):
message = "Could not parse provided bibdata:\n---\n{}\n---" # message = "Could not parse provided bibdata:\n---\n{}\n---"
def __init__(self, bibdata): def __init__(self, error_msg, bibdata):
"""
:param error_msg: specific message about what went wrong
:param bibdata: the data that was unsuccessfully decoded.
"""
super(Exception, self).__init__(error_msg) # make `str(self)` work.
self.data = bibdata self.data = bibdata
def __str__(self):
return self.message.format(self.data)
bwriter = bp.bwriter.BibTexWriter() bwriter = bp.bwriter.BibTexWriter()
bwriter.display_order = BIBFIELD_ORDER bwriter.display_order = BIBFIELD_ORDER
@ -117,10 +126,12 @@ class EnDecoder(object):
If the decoding fails, returns a BibParseError. If the decoding fails, returns a BibParseError.
""" """
if len(bibdata) == 0:
error_msg = 'parsing error: the provided string has length zero.'
raise self.BibDecodingError(error_msg, bibdata)
try: try:
entries = bp.bparser.BibTexParser( entries = bp.bparser.BibTexParser(
bibdata, common_strings=True, bibdata, common_strings=True, customization=customizations,
customization=customizations,
homogenize_fields=True).get_entry_dict() homogenize_fields=True).get_entry_dict()
# Remove id from bibtexparser attribute which is stored as citekey # Remove id from bibtexparser attribute which is stored as citekey
@ -131,8 +142,18 @@ class EnDecoder(object):
entries[e][TYPE_KEY] = t entries[e][TYPE_KEY] = t
if len(entries) > 0: if len(entries) > 0:
return entries return entries
except Exception: except (pyparsing.ParseException, pyparsing.ParseSyntaxException) as e:
import traceback error_msg = self._format_parsing_error(e)
traceback.print_exc() raise self.BibDecodingError(error_msg, bibdata)
raise self.BibDecodingError(bibdata) except bibtexparser.bibdatabase.UndefinedString as e:
# TODO: filter exceptions from pyparsing and pass reason upstream error_msg = 'parsing error: undefined string in provided data: {}'.format(e)
raise self.BibDecodingError(error_msg, bibdata)
@classmethod
def _format_parsing_error(cls, e):
"""Transform a pyparsing exception into an error message
Does a best effort to be useful, but might need to be improved.
"""
return '{}\n{}^\n{}'.format(e.line, (e.column - 1) * ' ', e)

@ -2,6 +2,7 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import unittest import unittest
import socket
import dotdot import dotdot
@ -20,9 +21,11 @@ def _is_connected():
try: try:
host = socket.gethostbyname('www.google.com') host = socket.gethostbyname('www.google.com')
s = socket.create_connection((host, 80), 2) s = socket.create_connection((host, 80), 2)
s.close()
return True return True
except: except socket.error:
return False pass
return False
class APITests(unittest.TestCase): class APITests(unittest.TestCase):
@ -78,6 +81,14 @@ class TestISBN2Bibtex(APITests):
class TestArxiv2Bibtex(APITests): class TestArxiv2Bibtex(APITests):
def test_new_style(self):
bib = arxiv2bibtex('astro-ph/9812133')
b = self.endecoder.decode_bibdata(bib)
self.assertEqual(len(b), 1)
entry = b[list(b)[0]]
self.assertEqual(entry['author'][0], 'Perlmutter, S.')
self.assertEqual(entry['year'], '1999')
def test_parses_to_bibtex_with_doi(self): def test_parses_to_bibtex_with_doi(self):
bib = arxiv2bibtex('astro-ph/9812133') bib = arxiv2bibtex('astro-ph/9812133')
b = self.endecoder.decode_bibdata(bib) b = self.endecoder.decode_bibdata(bib)

@ -23,6 +23,11 @@ def compare_yaml_str(s1, s2):
class TestEnDecode(unittest.TestCase): class TestEnDecode(unittest.TestCase):
def test_decode_emptystring(self):
decoder = endecoder.EnDecoder()
with self.assertRaises(decoder.BibDecodingError):
entry = decoder.decode_bibdata('')
def test_encode_bibtex_is_unicode(self): def test_encode_bibtex_is_unicode(self):
decoder = endecoder.EnDecoder() decoder = endecoder.EnDecoder()
entry = decoder.decode_bibdata(bibtex_raw0) entry = decoder.decode_bibdata(bibtex_raw0)
@ -52,6 +57,18 @@ class TestEnDecode(unittest.TestCase):
self.assertEqual(bibraw1, bibraw2) self.assertEqual(bibraw1, bibraw2)
def test_endecode_bibtex_BOM(self):
"""Test that bibtexparser if fine with BOM-prefixed data"""
decoder = endecoder.EnDecoder()
bom_str = '\ufeff'
entry_1 = decoder.decode_bibdata(bibtex_raw0)
bibraw_1 = decoder.encode_bibdata(entry_1)
entry_2 = decoder.decode_bibdata(bom_str + bibraw_1)
bibraw_2 = decoder.encode_bibdata(entry_2)
self.assertEqual(bibraw_1, bibraw_2)
def test_endecode_bibtex_converts_month_string(self): def test_endecode_bibtex_converts_month_string(self):
"""Test if `month=dec` is correctly recognized and transformed into """Test if `month=dec` is correctly recognized and transformed into
`month={December}`""" `month={December}`"""

Loading…
Cancel
Save