Handle Unicode BOM in bibfile

In particular, xml2bib file created without the `-nb` options would trip bibtexparser. Fixes #23
2015-12-20 22:20:01 +01:00 · 2015-12-20 22:20:01 +01:00 · b2ebc67f5e
commit b2ebc67f5e
parent 220e5ad9a8
5 changed files with 29 additions and 3 deletions
--- a/pubs/bibstruct.py
+++ b/pubs/bibstruct.py
@ -40,7 +40,6 @@ def get_entry(bibdata):
        return e
 def extract_citekey(bibdata):
    verify_bibdata(bibdata)
    citekey, entry = get_entry(bibdata)
    return citekey
--- a/pubs/databroker.py
+++ b/pubs/databroker.py
@ -56,8 +56,11 @@ class DataBroker(object):
    def verify(self, bibdata_raw):
        """Will return None if bibdata_raw can't be decoded"""
        try:
            if bibdata_raw.startswith(u'\ufeff'):
                # remove BOM, because bibtexparser does not support it.
                bibdata_raw = bibdata_raw[1:]
            return self.endecoder.decode_bibdata(bibdata_raw)
-        except ValueError:
+        except ValueError as e:
            return None
    # docbroker
@ -90,4 +93,3 @@ class DataBroker(object):
    def rename_note(self, old_citekey, new_citekey):
        notepath = 'notesdir://{}.txt'.format(old_citekey)
        return self.notebroker.rename_doc(notepath, new_citekey)
--- a/tests/bibexamples/bibutils.bib
+++ b/tests/bibexamples/bibutils.bib
@ -0,0 +1,15 @@
@TechReport{Page99,
 author="Page, Lawrence
 and Brin, Sergey
 and Motwani, Rajeev
 and Winograd, Terry",
 title="The PageRank Citation Ranking: Bringing Order to the Web.",
 year="1999",
 month="November",
 publisher="Stanford InfoLab",
 number="1999-66",
 abstract="The importance of a Web page is an inherently subjective matter, which depends on the readers interests, knowledge and attitudes. But there is still much that can be said objectively about the relative importance of Web pages. This paper describes PageRank, a mathod for rating Web pages objectively and mechanically, effectively measuring the human interest and attention devoted to them. We compare PageRank to an idealized random Web surfer. We show how to efficiently compute PageRank for large numbers of pages. And, we show how to apply PageRank to search and to user navigation.",
 note="Previous number = SIDL-WP-1999-0120",
 url="http://ilpubs.stanford.edu:8090/422/"
 }
--- a/tests/bibexamples/notes.txt
+++ b/tests/bibexamples/notes.txt
@ -0,0 +1,2 @@
 + `xml2bib.bib`: was created with: `bib2xml data/pagerank.bib | xml2bib >> bibexamples/bibutils.bib` (bibutils 5.6)  
 + `leadingspace.bib`: has a leading space in the citekey
--- a/tests/test_usecase.py
+++ b/tests/test_usecase.py
@ -155,6 +155,14 @@ class TestAdd(DataCommandTestCase):
        self.assertEqual(set(self.fs['os'].listdir(meta_dir)), {'Page99.yaml'})
        self.assertEqual(set(self.fs['os'].listdir(doc_dir)), {'Page99.pdf'})
    def test_add_bibutils(self):
        cmds = ['pubs init',
                'pubs add /bibexamples/bibutils.bib',
                ]
        self.execute_cmds(cmds)
        bib_dir = self.fs['os'].path.join(self.default_pubs_dir, 'bib')
        self.assertEqual(set(self.fs['os'].listdir(bib_dir)), {'Page99.bib'})
    def test_add2(self):
        cmds = ['pubs init -p /not_default',
                'pubs add /data/pagerank.bib -d /data/pagerank.pdf',
		`@ -0,0 +1,2 @@`
							+ `xml2bib.bib`: was created with: `bib2xml data/pagerank.bib \| xml2bib >> bibexamples/bibutils.bib` (bibutils 5.6)
							+ `leadingspace.bib`: has a leading space in the citekey