From 07be993d0273d77b00f583687a59940f9c09af59 Mon Sep 17 00:00:00 2001 From: Olivier Mangin Date: Sun, 7 Jan 2018 12:47:06 -0500 Subject: [PATCH] Moves to using bibtexparser's writer for bibdata. [Fixes #33] --- pubs/endecoder.py | 70 ++++++++++++++++++++--------------------- tests/test_endecoder.py | 30 +++++++++++++++++- 2 files changed, 63 insertions(+), 37 deletions(-) diff --git a/pubs/endecoder.py b/pubs/endecoder.py index 3456fc3..2c479fb 100644 --- a/pubs/endecoder.py +++ b/pubs/endecoder.py @@ -53,9 +53,10 @@ def customizations(record): return record + bibfield_order = ['author', 'title', 'journal', 'institution', 'publisher', - 'year', 'month', 'number', 'volume', 'pages', 'link', 'doi', 'note', - 'abstract'] + 'year', 'month', 'number', 'volume', 'pages', 'link', 'doi', + 'note', 'abstract'] class EnDecoder(object): @@ -69,6 +70,9 @@ class EnDecoder(object): * encode_bibdata will try to recognize exceptions """ + bwriter = bp.bwriter.BibTexWriter() + bwriter.display_order = bibfield_order + def encode_metadata(self, metadata): return yaml.safe_dump(metadata, allow_unicode=True, encoding=None, indent=4) @@ -76,41 +80,35 @@ class EnDecoder(object): def decode_metadata(self, metadata_raw): return yaml.safe_load(metadata_raw) - def encode_bibdata(self, bibdata): + def encode_bibdata(self, bibdata, ignore_fields=[]): """Encode bibdata """ - return '\n'.join(self._encode_bibentry(citekey, entry) - for citekey, entry in bibdata.items()) - - @staticmethod - def _encode_field(key, value): - if key == 'link': - return ', '.join(link['url'] for link in value) - elif key == 'author': - return ' and '.join(author for author in value) - elif key == 'editor': - return ' and '.join(editor['name'] for editor in value) - elif key == 'journal': - return value['name'] - elif key == 'keyword': - return ', '.join(keyword for keyword in value) - else: - return value - - @staticmethod - def _encode_bibentry(citekey, bibentry): - bibraw = '@{}{{{},\n'.format(bibentry[TYPE_KEY], citekey) - bibentry = copy.copy(bibentry) - for key in bibfield_order: - if key in bibentry: - value = bibentry.pop(key) - bibraw += ' {} = {{{}}},\n'.format( - key, EnDecoder._encode_field(key, value)) - for key, value in bibentry.items(): - if key != TYPE_KEY: - bibraw += ' {} = {{{}}},\n'.format( - key, EnDecoder._encode_field(key, value)) - bibraw += '}\n' - return bibraw + bpdata = bp.bibdatabase.BibDatabase() + bpdata.entries = [self._entry_to_bp_entry(k, copy.copy(bibdata[k]), + ignore_fields=ignore_fields) + for k in bibdata] + return self.bwriter.write(bpdata) + + def _entry_to_bp_entry(self, key, entry, ignore_fields=[]): + """Convert back entries to the format expected by bibtexparser.""" + entry[BP_ID_KEY] = key + # Convert internal 'type' to bibtexparser entrytype key + entry[BP_ENTRYTYPE_KEY] = entry.pop(TYPE_KEY) + for f in ignore_fields: + entry.pop(f, None) + if 'link' in entry: + entry['link'] = ', '.join(link['url'] for link in entry['link']) + if 'author' in entry: + entry['author'] = ' and '.join( + author for author in entry['author']) + if 'editor' in entry: + entry['editor'] = ' and '.join( + editor['name'] for editor in entry['editor']) + if 'journal' in entry: + entry['journal'] = entry['journal']['name'] + if 'keyword' in entry: + entry['keyword'] = ', '.join( + keyword for keyword in entry['keyword']) + return entry def decode_bibdata(self, bibdata): """""" diff --git a/tests/test_endecoder.py b/tests/test_endecoder.py index 4bf7a5f..68387d2 100644 --- a/tests/test_endecoder.py +++ b/tests/test_endecoder.py @@ -91,13 +91,41 @@ class TestEnDecode(unittest.TestCase): self.assertIn(u'keyword', entry) self.assertEqual(set(keywords), set(entry[u'keyword'])) - def test_endecode_metadata(self): decoder = endecoder.EnDecoder() entry = decoder.decode_metadata(metadata_raw0) metadata_output0 = decoder.encode_metadata(entry) self.assertEqual(set(metadata_raw0.split('\n')), set(metadata_output0.split('\n'))) + def test_endecode_bibtex_field_order(self): + decoder = endecoder.EnDecoder() + entry = decoder.decode_bibdata(bibtex_raw0) + lines = decoder.encode_bibdata(entry).splitlines() + self.assertEqual(lines[1].split('=')[0].strip(), u'author') + self.assertEqual(lines[2].split('=')[0].strip(), u'title') + self.assertEqual(lines[3].split('=')[0].strip(), u'institution') + self.assertEqual(lines[4].split('=')[0].strip(), u'publisher') + self.assertEqual(lines[5].split('=')[0].strip(), u'year') + self.assertEqual(lines[6].split('=')[0].strip(), u'month') + self.assertEqual(lines[7].split('=')[0].strip(), u'number') + self.assertEqual(lines[8].split('=')[0].strip(), u'link') + self.assertEqual(lines[9].split('=')[0].strip(), u'note') + self.assertEqual(lines[10].split('=')[0].strip(), u'abstract') + + def test_endecode_bibtex_ignores_fields(self): + decoder = endecoder.EnDecoder() + entry = decoder.decode_bibdata(bibtex_raw0) + + bibraw1 = decoder.encode_bibdata( + entry, ignore_fields=['title', 'note', 'abstract', 'journal']) + entry1 = list(decoder.decode_bibdata(bibraw1).values())[0] + + self.assertNotIn('title', entry1) + self.assertNotIn('note', entry1) + self.assertNotIn('abtract', entry1) + self.assertIn('author', entry1) + self.assertIn('institution', entry1) + if __name__ == '__main__': unittest.main()