You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
165 lines
5.8 KiB
165 lines
5.8 KiB
from __future__ import absolute_import, unicode_literals
|
|
|
|
import copy
|
|
import logging
|
|
|
|
# both needed to intercept exceptions.
|
|
import pyparsing
|
|
import bibtexparser
|
|
|
|
try:
|
|
import bibtexparser as bp
|
|
# don't let bibtexparser display stuff
|
|
bp.bparser.logger.setLevel(level=logging.CRITICAL)
|
|
except ImportError:
|
|
print("error: you need to install bibterxparser; try running 'pip install "
|
|
"bibtexparser'.")
|
|
exit(-1)
|
|
|
|
import yaml
|
|
|
|
from .bibstruct import TYPE_KEY
|
|
|
|
"""Important notice:
|
|
All functions and methods in this file assume and produce unicode data.
|
|
"""
|
|
|
|
|
|
if bp.__version__ > "0.6.0":
|
|
BP_ID_KEY = 'ID'
|
|
BP_ENTRYTYPE_KEY = 'ENTRYTYPE'
|
|
else:
|
|
BP_ID_KEY = 'id'
|
|
BP_ENTRYTYPE_KEY = 'type'
|
|
|
|
|
|
BIBFIELD_ORDER = ['author', 'title', 'journal', 'institution', 'publisher',
|
|
'year', 'month', 'number', 'volume', 'pages', 'url', 'link',
|
|
'doi', 'note', 'abstract']
|
|
|
|
|
|
def sanitize_citekey(record):
|
|
record[BP_ID_KEY] = record[BP_ID_KEY].strip('\n')
|
|
return record
|
|
|
|
|
|
def customizations(record):
|
|
""" Use some functions delivered by the library
|
|
|
|
:param record: a record
|
|
:returns: -- customized record
|
|
"""
|
|
|
|
# record = bp.customization.convert_to_unicode(record) # transform \& into & ones, messing-up latex
|
|
record = bp.customization.type(record)
|
|
record = bp.customization.author(record)
|
|
record = bp.customization.editor(record)
|
|
record = bp.customization.keyword(record)
|
|
record = bp.customization.page_double_hyphen(record)
|
|
|
|
record = sanitize_citekey(record)
|
|
|
|
return record
|
|
|
|
|
|
class EnDecoder(object):
|
|
""" Encode and decode content.
|
|
|
|
Design choices:
|
|
* Has no interaction with disk.
|
|
* Incoming content is not trusted.
|
|
* Returned content must be correctly formatted (no one else checks).
|
|
* Failures raise ValueError
|
|
* encode_bibdata will try to recognize exceptions
|
|
"""
|
|
|
|
class BibDecodingError(Exception):
|
|
|
|
def __init__(self, error_msg, bibdata):
|
|
"""
|
|
:param error_msg: specific message about what went wrong
|
|
:param bibdata: the data that was unsuccessfully decoded.
|
|
"""
|
|
super(Exception, self).__init__(error_msg) # make `str(self)` work.
|
|
self.data = bibdata
|
|
|
|
bwriter = bp.bwriter.BibTexWriter()
|
|
bwriter.display_order = BIBFIELD_ORDER
|
|
|
|
def encode_metadata(self, metadata):
|
|
return yaml.safe_dump(metadata, allow_unicode=True,
|
|
encoding=None, indent=4)
|
|
|
|
def decode_metadata(self, metadata_raw):
|
|
return yaml.safe_load(metadata_raw)
|
|
|
|
def encode_bibdata(self, bibdata, ignore_fields=[]):
|
|
"""Encode bibdata """
|
|
bpdata = bp.bibdatabase.BibDatabase()
|
|
bpdata.entries = [self._entry_to_bp_entry(k, copy.copy(bibdata[k]),
|
|
ignore_fields=ignore_fields)
|
|
for k in bibdata]
|
|
return self.bwriter.write(bpdata)
|
|
|
|
def _entry_to_bp_entry(self, key, entry, ignore_fields=[]):
|
|
"""Convert back entries to the format expected by bibtexparser."""
|
|
entry[BP_ID_KEY] = key
|
|
# Convert internal 'type' to bibtexparser entrytype key
|
|
entry[BP_ENTRYTYPE_KEY] = entry.pop(TYPE_KEY)
|
|
for f in ignore_fields:
|
|
entry.pop(f, None)
|
|
if 'author' in entry:
|
|
entry['author'] = ' and '.join(
|
|
author for author in entry['author'])
|
|
if 'editor' in entry:
|
|
entry['editor'] = ' and '.join(
|
|
editor for editor in entry['editor'])
|
|
if 'keyword' in entry:
|
|
entry['keyword'] = ', '.join(
|
|
keyword for keyword in entry['keyword'])
|
|
return entry
|
|
|
|
def decode_bibdata(self, bibstr):
|
|
"""Decodes bibdata from string.
|
|
|
|
If the decoding fails, returns a BibDecodingError.
|
|
"""
|
|
if len(bibstr) == 0:
|
|
error_msg = 'parsing error: the provided string has length zero.'
|
|
raise self.BibDecodingError(error_msg, bibstr)
|
|
try:
|
|
entries = bp.bparser.BibTexParser(
|
|
bibstr, common_strings=True, customization=customizations,
|
|
homogenize_fields=True, ignore_nonstandard_types=False).get_entry_dict()
|
|
# Remove id from bibtexparser attribute which is stored as citekey
|
|
for e in entries:
|
|
entries[e].pop(BP_ID_KEY)
|
|
# Convert bibtexparser entrytype key to internal 'type'
|
|
t = entries[e].pop(BP_ENTRYTYPE_KEY)
|
|
entries[e][TYPE_KEY] = t
|
|
# Temporary fix to #188 (to be fully fixed when the upstream
|
|
# issue: sciunto-org/python-bibtexparser/#229 is fixed too)
|
|
if 'editor' in entries[e]:
|
|
entries[e]['editor'] = [
|
|
editor['name'] if isinstance(editor, dict) else editor
|
|
for editor in entries[e]['editor']]
|
|
if len(entries) > 0:
|
|
return entries
|
|
else:
|
|
raise self.BibDecodingError(('no valid entry found in the provided data: '
|
|
' {}').format(bibstr), bibstr)
|
|
except (pyparsing.ParseException, pyparsing.ParseSyntaxException) as e:
|
|
error_msg = self._format_parsing_error(e)
|
|
raise self.BibDecodingError(error_msg, bibstr)
|
|
except bibtexparser.bibdatabase.UndefinedString as e:
|
|
error_msg = 'parsing error: undefined string in provided data: {}'.format(e)
|
|
raise self.BibDecodingError(error_msg, bibstr)
|
|
|
|
@classmethod
|
|
def _format_parsing_error(cls, e):
|
|
"""Transform a pyparsing exception into an error message
|
|
|
|
Does a best effort to be useful, but might need to be improved.
|
|
"""
|
|
return '{}\n{}^\n{}'.format(e.line, (e.column - 1) * ' ', e)
|