Handle Unicode BOM in bibfile

In particular, xml2bib file created without the `-nb` options would trip bibtexparser.

Fixes #23
main
Fabien Benureau 9 years ago
parent 220e5ad9a8
commit b2ebc67f5e

@ -40,7 +40,6 @@ def get_entry(bibdata):
return e return e
def extract_citekey(bibdata): def extract_citekey(bibdata):
verify_bibdata(bibdata)
citekey, entry = get_entry(bibdata) citekey, entry = get_entry(bibdata)
return citekey return citekey

@ -56,8 +56,11 @@ class DataBroker(object):
def verify(self, bibdata_raw): def verify(self, bibdata_raw):
"""Will return None if bibdata_raw can't be decoded""" """Will return None if bibdata_raw can't be decoded"""
try: try:
if bibdata_raw.startswith(u'\ufeff'):
# remove BOM, because bibtexparser does not support it.
bibdata_raw = bibdata_raw[1:]
return self.endecoder.decode_bibdata(bibdata_raw) return self.endecoder.decode_bibdata(bibdata_raw)
except ValueError: except ValueError as e:
return None return None
# docbroker # docbroker
@ -90,4 +93,3 @@ class DataBroker(object):
def rename_note(self, old_citekey, new_citekey): def rename_note(self, old_citekey, new_citekey):
notepath = 'notesdir://{}.txt'.format(old_citekey) notepath = 'notesdir://{}.txt'.format(old_citekey)
return self.notebroker.rename_doc(notepath, new_citekey) return self.notebroker.rename_doc(notepath, new_citekey)

@ -0,0 +1,15 @@
@TechReport{Page99,
author="Page, Lawrence
and Brin, Sergey
and Motwani, Rajeev
and Winograd, Terry",
title="The PageRank Citation Ranking: Bringing Order to the Web.",
year="1999",
month="November",
publisher="Stanford InfoLab",
number="1999-66",
abstract="The importance of a Web page is an inherently subjective matter, which depends on the readers interests, knowledge and attitudes. But there is still much that can be said objectively about the relative importance of Web pages. This paper describes PageRank, a mathod for rating Web pages objectively and mechanically, effectively measuring the human interest and attention devoted to them. We compare PageRank to an idealized random Web surfer. We show how to efficiently compute PageRank for large numbers of pages. And, we show how to apply PageRank to search and to user navigation.",
note="Previous number = SIDL-WP-1999-0120",
url="http://ilpubs.stanford.edu:8090/422/"
}

@ -0,0 +1,2 @@
+ `xml2bib.bib`: was created with: `bib2xml data/pagerank.bib | xml2bib >> bibexamples/bibutils.bib` (bibutils 5.6)
+ `leadingspace.bib`: has a leading space in the citekey

@ -155,6 +155,14 @@ class TestAdd(DataCommandTestCase):
self.assertEqual(set(self.fs['os'].listdir(meta_dir)), {'Page99.yaml'}) self.assertEqual(set(self.fs['os'].listdir(meta_dir)), {'Page99.yaml'})
self.assertEqual(set(self.fs['os'].listdir(doc_dir)), {'Page99.pdf'}) self.assertEqual(set(self.fs['os'].listdir(doc_dir)), {'Page99.pdf'})
def test_add_bibutils(self):
cmds = ['pubs init',
'pubs add /bibexamples/bibutils.bib',
]
self.execute_cmds(cmds)
bib_dir = self.fs['os'].path.join(self.default_pubs_dir, 'bib')
self.assertEqual(set(self.fs['os'].listdir(bib_dir)), {'Page99.bib'})
def test_add2(self): def test_add2(self):
cmds = ['pubs init -p /not_default', cmds = ['pubs init -p /not_default',
'pubs add /data/pagerank.bib -d /data/pagerank.pdf', 'pubs add /data/pagerank.bib -d /data/pagerank.pdf',

Loading…
Cancel
Save