Handle Unicode BOM in bibfile

In particular, xml2bib file created without the `-nb` options would trip bibtexparser.

Fixes #23
This commit is contained in:
Fabien Benureau 2015-12-20 22:20:01 +01:00
parent 220e5ad9a8
commit b2ebc67f5e
5 changed files with 29 additions and 3 deletions

View File

@ -40,7 +40,6 @@ def get_entry(bibdata):
return e
def extract_citekey(bibdata):
verify_bibdata(bibdata)
citekey, entry = get_entry(bibdata)
return citekey

View File

@ -56,8 +56,11 @@ class DataBroker(object):
def verify(self, bibdata_raw):
"""Will return None if bibdata_raw can't be decoded"""
try:
if bibdata_raw.startswith(u'\ufeff'):
# remove BOM, because bibtexparser does not support it.
bibdata_raw = bibdata_raw[1:]
return self.endecoder.decode_bibdata(bibdata_raw)
except ValueError:
except ValueError as e:
return None
# docbroker
@ -90,4 +93,3 @@ class DataBroker(object):
def rename_note(self, old_citekey, new_citekey):
notepath = 'notesdir://{}.txt'.format(old_citekey)
return self.notebroker.rename_doc(notepath, new_citekey)

View File

@ -0,0 +1,15 @@
@TechReport{Page99,
author="Page, Lawrence
and Brin, Sergey
and Motwani, Rajeev
and Winograd, Terry",
title="The PageRank Citation Ranking: Bringing Order to the Web.",
year="1999",
month="November",
publisher="Stanford InfoLab",
number="1999-66",
abstract="The importance of a Web page is an inherently subjective matter, which depends on the readers interests, knowledge and attitudes. But there is still much that can be said objectively about the relative importance of Web pages. This paper describes PageRank, a mathod for rating Web pages objectively and mechanically, effectively measuring the human interest and attention devoted to them. We compare PageRank to an idealized random Web surfer. We show how to efficiently compute PageRank for large numbers of pages. And, we show how to apply PageRank to search and to user navigation.",
note="Previous number = SIDL-WP-1999-0120",
url="http://ilpubs.stanford.edu:8090/422/"
}

View File

@ -0,0 +1,2 @@
+ `xml2bib.bib`: was created with: `bib2xml data/pagerank.bib | xml2bib >> bibexamples/bibutils.bib` (bibutils 5.6)
+ `leadingspace.bib`: has a leading space in the citekey

View File

@ -155,6 +155,14 @@ class TestAdd(DataCommandTestCase):
self.assertEqual(set(self.fs['os'].listdir(meta_dir)), {'Page99.yaml'})
self.assertEqual(set(self.fs['os'].listdir(doc_dir)), {'Page99.pdf'})
def test_add_bibutils(self):
cmds = ['pubs init',
'pubs add /bibexamples/bibutils.bib',
]
self.execute_cmds(cmds)
bib_dir = self.fs['os'].path.join(self.default_pubs_dir, 'bib')
self.assertEqual(set(self.fs['os'].listdir(bib_dir)), {'Page99.bib'})
def test_add2(self):
cmds = ['pubs init -p /not_default',
'pubs add /data/pagerank.bib -d /data/pagerank.pdf',