From be80e75cbb7fb531ae8c9d67d97ca08299d04e95 Mon Sep 17 00:00:00 2001 From: "Fabien C. Y. Benureau" Date: Mon, 6 Aug 2018 17:45:30 +0900 Subject: [PATCH] better error message when parsing of bibtex fails Plus, slight refactoring: remove `databroker.verify()` method --- changelog.md | 5 +++++ pubs/apis.py | 6 +++--- pubs/commands/add_cmd.py | 26 +++++++++++++---------- pubs/databroker.py | 10 --------- pubs/datacache.py | 3 --- pubs/endecoder.py | 45 +++++++++++++++++++++++++++++----------- tests/test_apis.py | 15 ++++++++++++-- tests/test_endecoder.py | 17 +++++++++++++++ 8 files changed, 86 insertions(+), 41 deletions(-) diff --git a/changelog.md b/changelog.md index 314ccbd..5e07c14 100644 --- a/changelog.md +++ b/changelog.md @@ -8,6 +8,10 @@ ### Implemented enhancements +- Support for downloading arXiv reference from their ID ([#146](https://github.com/pubs/pubs/issues/146) by [joe-antognini](https://github.com/joe-antognini)) + +- Better feedback when an error is encountered while adding a reference from a DOI, ISBN or arXiv ID [#155](https://github.com/pubs/pubs/issues/155) + - Better dialog after editing paper [(#142)](https://github.com/pubs/pubs/issues/142) - Add a command to open urls ([#139](https://github.com/pubs/pubs/issues/139) by [ksunden](https://github.com/ksunden)) @@ -26,6 +30,7 @@ - Support year ranges in query [(#102)](https://github.com/pubs/pubs/issues/102) +- Tests can now be run with `python setup.py test` [#155](https://github.com/pubs/pubs/issues/155) ### Fixed bugs diff --git a/pubs/apis.py b/pubs/apis.py index b816abd..f6d84a8 100644 --- a/pubs/apis.py +++ b/pubs/apis.py @@ -13,7 +13,7 @@ class ReferenceNotFoundError(Exception): pass -def get_bibentry_from_api(id_str, id_type, rp, ui=None): +def get_bibentry_from_api(id_str, id_type, try_doi=True, ui=None): """Return a bibtex string from various ID methods. This is a wrapper around functions that will return a bibtex string given @@ -46,8 +46,8 @@ def get_bibentry_from_api(id_str, id_type, rp, ui=None): if id_type not in id_fns.keys(): raise ValueError('id_type must be one of `doi`, `isbn`, or `arxiv`.') - bibentry_raw = id_fns[id_type](id_str) - bibentry = rp.databroker.verify(bibentry_raw) + bibentry_raw = id_fns[id_type](id_str, try_doi=try_doi, ui=ui) + endecoder.EnDecoder().decode_bibdata(bibentry_raw) if bibentry is None: raise ReferenceNotFoundException( 'invalid {} {} or unable to retrieve bibfile from it.'.format(id_type, id_str)) diff --git a/pubs/commands/add_cmd.py b/pubs/commands/add_cmd.py index 3ae8f70..dead1a7 100644 --- a/pubs/commands/add_cmd.py +++ b/pubs/commands/add_cmd.py @@ -43,22 +43,25 @@ def parser(subparsers, conf): return parser -def bibentry_from_editor(conf, ui, rp): +def bibentry_from_editor(conf, ui): + again = True - bibstr = templates.add_bib + bibentry_raw = templates.add_bib + decoder = endecoder.EnDecoder() + while again: try: - bibstr = ui.editor_input(initial=bibstr, suffix='.bib') - if bibstr == templates.add_bib: + bibentry_raw = ui.editor_input(initial=bibentry_raw, suffix='.bib') + if bibentry_raw == templates.add_bib: again = ui.input_yn( question='Bibfile not edited. Edit again ?', default='y') if not again: ui.exit(0) else: - bibentry = rp.databroker.verify(bibstr) + bibentry = decoder.decode_bibdata(bibentry_raw) bibstruct.verify_bibdata(bibentry) - # REFACTOR Generate citykey + # REFACTOR Generate citekey again = False except endecoder.EnDecoder.BibDecodingError: @@ -84,28 +87,29 @@ def command(conf, args): citekey = args.citekey rp = repo.Repository(conf) + decoder = endecoder.EnDecoder() # get bibtex entry if bibfile is None: if args.doi is None and args.isbn is None and args.arxiv is None: - bibentry = bibentry_from_editor(conf, ui, rp) + bibentry = bibentry_from_editor(conf, ui) else: bibentry = None try: if args.doi is not None: - bibentry = apis.get_bibentry_from_api(args.doi, 'doi', rp) + bibentry = apis.get_bibentry_from_api(args.doi, 'doi', ui=ui) elif args.isbn is not None: - bibentry = apis.get_bibentry_from_api(args.isbn, 'isbn', rp) + bibentry = apis.get_bibentry_from_api(args.isbn, 'isbn', ui=ui) # TODO distinguish between cases, offer to open the error page in a webbrowser. # TODO offer to confirm/change citekey elif args.arxiv is not None: - bibentry = apis.get_bibentry_from_api(args.arxiv, 'arxiv', rp) + bibentry = apis.get_bibentry_from_api(args.arxiv, 'arxiv', ui=ui) except apis.ReferenceNotFoundException as e: ui.error(e.message) ui.exit(1) else: bibentry_raw = content.get_content(bibfile, ui=ui) - bibentry = rp.databroker.verify(bibentry_raw) + bibentry = decoder.decode_bibdata(bibentry_raw) if bibentry is None: ui.error('invalid bibfile {}.'.format(bibfile)) diff --git a/pubs/databroker.py b/pubs/databroker.py index 97c1398..23e8b91 100644 --- a/pubs/databroker.py +++ b/pubs/databroker.py @@ -79,16 +79,6 @@ class DataBroker(object): def listing(self, filestats=True): return self.filebroker.listing(filestats=filestats) - def verify(self, bibdata_raw): - """Will return None if bibdata_raw can't be decoded""" - try: - if bibdata_raw.startswith('\ufeff'): - # remove BOM, because bibtexparser does not support it. - bibdata_raw = bibdata_raw[1:] - return self.endecoder.decode_bibdata(bibdata_raw) - except ValueError as e: - return None - # docbroker def in_docsdir(self, docpath): diff --git a/pubs/datacache.py b/pubs/datacache.py index 2da4fe0..a02a05b 100644 --- a/pubs/datacache.py +++ b/pubs/datacache.py @@ -163,9 +163,6 @@ class DataCache(object): def listing(self, filestats=True): return self.databroker.listing(filestats=filestats) - def verify(self, bibdata_raw): - return self.databroker.verify(bibdata_raw) - # docbroker def in_docsdir(self, docpath): diff --git a/pubs/endecoder.py b/pubs/endecoder.py index 259b8d9..f265f54 100644 --- a/pubs/endecoder.py +++ b/pubs/endecoder.py @@ -1,9 +1,16 @@ from __future__ import absolute_import, unicode_literals import copy +import logging + +# both needed to intercept exceptions. +import pyparsing +import bibtexparser try: import bibtexparser as bp + # don't let bibtexparser display stuff +# bp.bparser.logger.setLevel(level=logging.CRITICAL) except ImportError: print("error: you need to install bibterxparser; try running 'pip install " "bibtexparser'.") @@ -68,14 +75,16 @@ class EnDecoder(object): class BibDecodingError(Exception): - message = "Could not parse provided bibdata:\n---\n{}\n---" +# message = "Could not parse provided bibdata:\n---\n{}\n---" - def __init__(self, bibdata): + def __init__(self, error_msg, bibdata): + """ + :param error_msg: specific message about what went wrong + :param bibdata: the data that was unsuccessfully decoded. + """ + super(Exception, self).__init__(error_msg) # make `str(self)` work. self.data = bibdata - def __str__(self): - return self.message.format(self.data) - bwriter = bp.bwriter.BibTexWriter() bwriter.display_order = BIBFIELD_ORDER @@ -117,10 +126,12 @@ class EnDecoder(object): If the decoding fails, returns a BibParseError. """ + if len(bibdata) == 0: + error_msg = 'parsing error: the provided string has length zero.' + raise self.BibDecodingError(error_msg, bibdata) try: entries = bp.bparser.BibTexParser( - bibdata, common_strings=True, - customization=customizations, + bibdata, common_strings=True, customization=customizations, homogenize_fields=True).get_entry_dict() # Remove id from bibtexparser attribute which is stored as citekey @@ -131,8 +142,18 @@ class EnDecoder(object): entries[e][TYPE_KEY] = t if len(entries) > 0: return entries - except Exception: - import traceback - traceback.print_exc() - raise self.BibDecodingError(bibdata) - # TODO: filter exceptions from pyparsing and pass reason upstream + except (pyparsing.ParseException, pyparsing.ParseSyntaxException) as e: + error_msg = self._format_parsing_error(e) + raise self.BibDecodingError(error_msg, bibdata) + except bibtexparser.bibdatabase.UndefinedString as e: + error_msg = 'parsing error: undefined string in provided data: {}'.format(e) + raise self.BibDecodingError(error_msg, bibdata) + + + @classmethod + def _format_parsing_error(cls, e): + """Transform a pyparsing exception into an error message + + Does a best effort to be useful, but might need to be improved. + """ + return '{}\n{}^\n{}'.format(e.line, (e.column - 1) * ' ', e) diff --git a/tests/test_apis.py b/tests/test_apis.py index 6886aa1..d42e110 100644 --- a/tests/test_apis.py +++ b/tests/test_apis.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals import unittest +import socket import dotdot @@ -20,9 +21,11 @@ def _is_connected(): try: host = socket.gethostbyname('www.google.com') s = socket.create_connection((host, 80), 2) + s.close() return True - except: - return False + except socket.error: + pass + return False class APITests(unittest.TestCase): @@ -78,6 +81,14 @@ class TestISBN2Bibtex(APITests): class TestArxiv2Bibtex(APITests): + def test_new_style(self): + bib = arxiv2bibtex('astro-ph/9812133') + b = self.endecoder.decode_bibdata(bib) + self.assertEqual(len(b), 1) + entry = b[list(b)[0]] + self.assertEqual(entry['author'][0], 'Perlmutter, S.') + self.assertEqual(entry['year'], '1999') + def test_parses_to_bibtex_with_doi(self): bib = arxiv2bibtex('astro-ph/9812133') b = self.endecoder.decode_bibdata(bib) diff --git a/tests/test_endecoder.py b/tests/test_endecoder.py index a6e3043..11d5306 100644 --- a/tests/test_endecoder.py +++ b/tests/test_endecoder.py @@ -23,6 +23,11 @@ def compare_yaml_str(s1, s2): class TestEnDecode(unittest.TestCase): + def test_decode_emptystring(self): + decoder = endecoder.EnDecoder() + with self.assertRaises(decoder.BibDecodingError): + entry = decoder.decode_bibdata('') + def test_encode_bibtex_is_unicode(self): decoder = endecoder.EnDecoder() entry = decoder.decode_bibdata(bibtex_raw0) @@ -52,6 +57,18 @@ class TestEnDecode(unittest.TestCase): self.assertEqual(bibraw1, bibraw2) + def test_endecode_bibtex_BOM(self): + """Test that bibtexparser if fine with BOM-prefixed data""" + decoder = endecoder.EnDecoder() + bom_str = '\ufeff' + + entry_1 = decoder.decode_bibdata(bibtex_raw0) + bibraw_1 = decoder.encode_bibdata(entry_1) + entry_2 = decoder.decode_bibdata(bom_str + bibraw_1) + bibraw_2 = decoder.encode_bibdata(entry_2) + + self.assertEqual(bibraw_1, bibraw_2) + def test_endecode_bibtex_converts_month_string(self): """Test if `month=dec` is correctly recognized and transformed into `month={December}`"""