better error message when parsing of bibtex fails

Plus, slight refactoring: remove `databroker.verify()` method
2018-08-06 17:45:30 +09:00 · 2018-08-06 17:45:30 +09:00 · be80e75cbb
commit be80e75cbb
parent 69f760cac6
8 changed files with 86 additions and 41 deletions
--- a/changelog.md
+++ b/changelog.md
@ -8,6 +8,10 @@

 ### Implemented enhancements

+- Support for downloading arXiv reference from their ID ([#146](https://github.com/pubs/pubs/issues/146) by [joe-antognini](https://github.com/joe-antognini))
+
+- Better feedback when an error is encountered while adding a reference from a DOI, ISBN or arXiv ID [#155](https://github.com/pubs/pubs/issues/155)
+
 - Better dialog after editing paper [(#142)](https://github.com/pubs/pubs/issues/142)

 - Add a command to open urls ([#139](https://github.com/pubs/pubs/issues/139) by [ksunden](https://github.com/ksunden))
@ -26,6 +30,7 @@

 - Support year ranges in query [(#102)](https://github.com/pubs/pubs/issues/102)

+- Tests can now be run with `python setup.py test` [#155](https://github.com/pubs/pubs/issues/155)

 ### Fixed bugs

--- a/pubs/apis.py
+++ b/pubs/apis.py
@ -13,7 +13,7 @@ class ReferenceNotFoundError(Exception):
    pass


-def get_bibentry_from_api(id_str, id_type, rp, ui=None):
+def get_bibentry_from_api(id_str, id_type, try_doi=True, ui=None):
    """Return a bibtex string from various ID methods.

    This is a wrapper around functions that will return a bibtex string given
@ -46,8 +46,8 @@ def get_bibentry_from_api(id_str, id_type, rp, ui=None):
    if id_type not in id_fns.keys():
        raise ValueError('id_type must be one of `doi`, `isbn`, or `arxiv`.')

-    bibentry_raw = id_fns[id_type](id_str)
-    bibentry = rp.databroker.verify(bibentry_raw)
+    bibentry_raw = id_fns[id_type](id_str, try_doi=try_doi, ui=ui)
+    endecoder.EnDecoder().decode_bibdata(bibentry_raw)
    if bibentry is None:
        raise ReferenceNotFoundException(
            'invalid {} {} or unable to retrieve bibfile from it.'.format(id_type, id_str))
--- a/pubs/commands/add_cmd.py
+++ b/pubs/commands/add_cmd.py
@ -43,22 +43,25 @@ def parser(subparsers, conf):
    return parser


-def bibentry_from_editor(conf, ui, rp):
+def bibentry_from_editor(conf, ui):
+
    again = True
-    bibstr = templates.add_bib
+    bibentry_raw = templates.add_bib
+    decoder = endecoder.EnDecoder()
+
    while again:
        try:
-            bibstr = ui.editor_input(initial=bibstr, suffix='.bib')
-            if bibstr == templates.add_bib:
+            bibentry_raw = ui.editor_input(initial=bibentry_raw, suffix='.bib')
+            if bibentry_raw == templates.add_bib:
                again = ui.input_yn(
                    question='Bibfile not edited. Edit again ?',
                    default='y')
                if not again:
                    ui.exit(0)
            else:
-                bibentry = rp.databroker.verify(bibstr)
+                bibentry = decoder.decode_bibdata(bibentry_raw)
                bibstruct.verify_bibdata(bibentry)
-                # REFACTOR Generate citykey
+                # REFACTOR Generate citekey
                again = False

        except endecoder.EnDecoder.BibDecodingError:
@ -84,28 +87,29 @@ def command(conf, args):
    citekey = args.citekey

    rp = repo.Repository(conf)
+    decoder = endecoder.EnDecoder()

    # get bibtex entry
    if bibfile is None:
        if args.doi is None and args.isbn is None and args.arxiv is None:
-            bibentry = bibentry_from_editor(conf, ui, rp)
+            bibentry = bibentry_from_editor(conf, ui)
        else:
            bibentry = None
            try:
                if args.doi is not None:
-                    bibentry = apis.get_bibentry_from_api(args.doi, 'doi', rp)
+                    bibentry = apis.get_bibentry_from_api(args.doi, 'doi', ui=ui)
                elif args.isbn is not None:
-                    bibentry = apis.get_bibentry_from_api(args.isbn, 'isbn', rp)
+                    bibentry = apis.get_bibentry_from_api(args.isbn, 'isbn', ui=ui)
                    # TODO distinguish between cases, offer to open the error page in a webbrowser.
                    # TODO offer to confirm/change citekey
                elif args.arxiv is not None:
-                    bibentry = apis.get_bibentry_from_api(args.arxiv, 'arxiv', rp)
+                    bibentry = apis.get_bibentry_from_api(args.arxiv, 'arxiv', ui=ui)
            except apis.ReferenceNotFoundException as e:
                ui.error(e.message)
                ui.exit(1)
    else:
        bibentry_raw = content.get_content(bibfile, ui=ui)
-        bibentry = rp.databroker.verify(bibentry_raw)
+        bibentry = decoder.decode_bibdata(bibentry_raw)
        if bibentry is None:
            ui.error('invalid bibfile {}.'.format(bibfile))

--- a/pubs/databroker.py
+++ b/pubs/databroker.py
@ -79,16 +79,6 @@ class DataBroker(object):
    def listing(self, filestats=True):
        return self.filebroker.listing(filestats=filestats)

-    def verify(self, bibdata_raw):
-        """Will return None if bibdata_raw can't be decoded"""
-        try:
-            if bibdata_raw.startswith('\ufeff'):
-                # remove BOM, because bibtexparser does not support it.
-                bibdata_raw = bibdata_raw[1:]
-            return self.endecoder.decode_bibdata(bibdata_raw)
-        except ValueError as e:
-            return None
-
    # docbroker

    def in_docsdir(self, docpath):
--- a/pubs/datacache.py
+++ b/pubs/datacache.py
@ -163,9 +163,6 @@ class DataCache(object):
    def listing(self, filestats=True):
        return self.databroker.listing(filestats=filestats)

-    def verify(self, bibdata_raw):
-        return self.databroker.verify(bibdata_raw)
-
    # docbroker

    def in_docsdir(self, docpath):
--- a/pubs/endecoder.py
+++ b/pubs/endecoder.py
@ -1,9 +1,16 @@
 from __future__ import absolute_import, unicode_literals

 import copy
+import logging
+
+# both needed to intercept exceptions.
+import pyparsing
+import bibtexparser

 try:
    import bibtexparser as bp
+    # don't let bibtexparser display stuff
+#    bp.bparser.logger.setLevel(level=logging.CRITICAL)
 except ImportError:
    print("error: you need to install bibterxparser; try running 'pip install "
          "bibtexparser'.")
@ -68,14 +75,16 @@ class EnDecoder(object):

    class BibDecodingError(Exception):

-        message = "Could not parse provided bibdata:\n---\n{}\n---"
+#        message = "Could not parse provided bibdata:\n---\n{}\n---"

-        def __init__(self, bibdata):
+        def __init__(self, error_msg, bibdata):
+            """
+            :param error_msg: specific message about what went wrong
+            :param bibdata:   the data that was unsuccessfully decoded.
+            """
+            super(Exception, self).__init__(error_msg) # make `str(self)` work.
            self.data = bibdata

-        def __str__(self):
-            return self.message.format(self.data)
-
    bwriter = bp.bwriter.BibTexWriter()
    bwriter.display_order = BIBFIELD_ORDER

@ -117,10 +126,12 @@ class EnDecoder(object):

        If the decoding fails, returns a BibParseError.
        """
+        if len(bibdata) == 0:
+            error_msg = 'parsing error: the provided string has length zero.'
+            raise self.BibDecodingError(error_msg, bibdata)
        try:
            entries = bp.bparser.BibTexParser(
-                bibdata, common_strings=True,
-                customization=customizations,
+                bibdata, common_strings=True, customization=customizations,
                homogenize_fields=True).get_entry_dict()

            # Remove id from bibtexparser attribute which is stored as citekey
@ -131,8 +142,18 @@ class EnDecoder(object):
                entries[e][TYPE_KEY] = t
            if len(entries) > 0:
                return entries
-        except Exception:
-            import traceback
-            traceback.print_exc()
-        raise self.BibDecodingError(bibdata)
-        # TODO: filter exceptions from pyparsing and pass reason upstream
+        except (pyparsing.ParseException, pyparsing.ParseSyntaxException) as e:
+            error_msg = self._format_parsing_error(e)
+            raise self.BibDecodingError(error_msg, bibdata)
+        except bibtexparser.bibdatabase.UndefinedString as e:
+            error_msg = 'parsing error: undefined string in provided data: {}'.format(e)
+            raise self.BibDecodingError(error_msg, bibdata)
+
+
+    @classmethod
+    def _format_parsing_error(cls, e):
+        """Transform a pyparsing exception into an error message
+
+        Does a best effort to be useful, but might need to be improved.
+        """
+        return '{}\n{}^\n{}'.format(e.line, (e.column - 1) * ' ', e)
--- a/tests/test_apis.py
+++ b/tests/test_apis.py
@ -2,6 +2,7 @@

 from __future__ import unicode_literals
 import unittest
+import socket

 import dotdot

@ -20,9 +21,11 @@ def _is_connected():
    try:
        host = socket.gethostbyname('www.google.com')
        s = socket.create_connection((host, 80), 2)
+        s.close()
        return True
-    except:
-        return False
+    except socket.error:
+        pass
+    return False

 class APITests(unittest.TestCase):

@ -78,6 +81,14 @@ class TestISBN2Bibtex(APITests):

 class TestArxiv2Bibtex(APITests):

+    def test_new_style(self):
+        bib = arxiv2bibtex('astro-ph/9812133')
+        b = self.endecoder.decode_bibdata(bib)
+        self.assertEqual(len(b), 1)
+        entry = b[list(b)[0]]
+        self.assertEqual(entry['author'][0], 'Perlmutter, S.')
+        self.assertEqual(entry['year'], '1999')
+
    def test_parses_to_bibtex_with_doi(self):
        bib = arxiv2bibtex('astro-ph/9812133')
        b = self.endecoder.decode_bibdata(bib)
--- a/tests/test_endecoder.py
+++ b/tests/test_endecoder.py
@ -23,6 +23,11 @@ def compare_yaml_str(s1, s2):

 class TestEnDecode(unittest.TestCase):

+    def test_decode_emptystring(self):
+        decoder = endecoder.EnDecoder()
+        with self.assertRaises(decoder.BibDecodingError):
+            entry = decoder.decode_bibdata('')
+
    def test_encode_bibtex_is_unicode(self):
        decoder = endecoder.EnDecoder()
        entry = decoder.decode_bibdata(bibtex_raw0)
@ -52,6 +57,18 @@ class TestEnDecode(unittest.TestCase):

        self.assertEqual(bibraw1, bibraw2)

+    def test_endecode_bibtex_BOM(self):
+        """Test that bibtexparser if fine with BOM-prefixed data"""
+        decoder = endecoder.EnDecoder()
+        bom_str = '\ufeff'
+
+        entry_1  = decoder.decode_bibdata(bibtex_raw0)
+        bibraw_1 = decoder.encode_bibdata(entry_1)
+        entry_2  = decoder.decode_bibdata(bom_str + bibraw_1)
+        bibraw_2 = decoder.encode_bibdata(entry_2)
+
+        self.assertEqual(bibraw_1, bibraw_2)
+
    def test_endecode_bibtex_converts_month_string(self):
        """Test if `month=dec` is correctly recognized and transformed into
        `month={December}`"""