Paper class
This commit is contained in:
parent
d1a4dd584f
commit
161be4f994
308
papers/paper.py
308
papers/paper.py
@ -1,260 +1,55 @@
|
||||
import os
|
||||
import copy
|
||||
import collections
|
||||
|
||||
import unicodedata
|
||||
import re
|
||||
from cStringIO import StringIO
|
||||
import yaml
|
||||
|
||||
from pybtex.database import Entry, BibliographyData, FieldDict, Person
|
||||
|
||||
import files
|
||||
|
||||
|
||||
DEFAULT_TYPE = 'article'
|
||||
|
||||
CONTROL_CHARS = ''.join(map(unichr, range(0, 32) + range(127, 160)))
|
||||
CITEKEY_FORBIDDEN_CHARS = '@\'\\,#}{~%/' # '/' is OK for bibtex but forbidden
|
||||
# here since we transform citekeys into filenames
|
||||
CITEKEY_EXCLUDE_RE = re.compile('[%s]'
|
||||
% re.escape(CONTROL_CHARS + CITEKEY_FORBIDDEN_CHARS))
|
||||
|
||||
BASE_META = {
|
||||
'external-document': None,
|
||||
'tags': set(),
|
||||
'notes': [],
|
||||
}
|
||||
|
||||
|
||||
def str2citekey(s):
|
||||
key = unicodedata.normalize('NFKD', unicode(s)).encode('ascii', 'ignore')
|
||||
key = CITEKEY_EXCLUDE_RE.sub('', key)
|
||||
# Normalize chars and remove non-ascii
|
||||
return key
|
||||
|
||||
|
||||
def get_bibentry_from_file(bibfile):
|
||||
"""Extract first entry (supposed to be the only one) from given file.
|
||||
"""
|
||||
bib_data = files.load_externalbibfile(bibfile)
|
||||
first_key = list(bib_data.entries.keys())[0]
|
||||
first_entry = bib_data.entries[first_key]
|
||||
return first_key, first_entry
|
||||
|
||||
|
||||
def get_bibentry_from_string(content):
|
||||
"""Extract first entry (supposed to be the only one) from given file.
|
||||
"""
|
||||
bib_data = files.parse_bibdata(StringIO(content))
|
||||
first_key = list(bib_data.entries.keys())[0]
|
||||
first_entry = bib_data.entries[first_key]
|
||||
return first_key, first_entry
|
||||
|
||||
|
||||
def copy_person(p):
|
||||
return Person(first=p.get_part_as_text('first'),
|
||||
middle=p.get_part_as_text('middle'),
|
||||
prelast=p.get_part_as_text('prelast'),
|
||||
last=p.get_part_as_text('last'),
|
||||
lineage=p.get_part_as_text('lineage'))
|
||||
|
||||
|
||||
def copy_bibentry(entry):
|
||||
fd = FieldDict(entry.fields.parent, entry.fields)
|
||||
persons = dict([(k, [copy_person(p) for p in v])
|
||||
for k, v in entry.persons.items()])
|
||||
return Entry(entry.type, fields=fd, persons=persons)
|
||||
|
||||
|
||||
def get_safe_metadata(meta):
|
||||
base_meta = Paper.create_meta()
|
||||
if meta is not None:
|
||||
base_meta.update(meta)
|
||||
base_meta['tags'] = set(base_meta['tags'])
|
||||
return base_meta
|
||||
|
||||
|
||||
def get_safe_metadata_from_content(content):
|
||||
return get_safe_metadata(yaml.load(content))
|
||||
|
||||
|
||||
def get_safe_metadata_from_path(metapath):
|
||||
if metapath is None:
|
||||
content = None
|
||||
else:
|
||||
content = files.read_yamlfile(metapath)
|
||||
return get_safe_metadata(content)
|
||||
|
||||
|
||||
def check_citekey(citekey):
|
||||
# TODO This is not the right way to test that (17/12/2012)
|
||||
if unicode(citekey) != str2citekey(citekey):
|
||||
raise ValueError("Invalid citekey: %s" % citekey)
|
||||
|
||||
|
||||
class NoDocumentFile(Exception):
|
||||
pass
|
||||
from . import bibstruct
|
||||
|
||||
DEFAULT_META = collections.OrderedDict([('docfile', None), ('tags', set()), ('notes', [])])
|
||||
DEFAULT_META = {'docfile': None, 'tags': set(), 'notes': []}
|
||||
|
||||
class Paper(object):
|
||||
"""Paper class. The object is responsible for the integrity of its own
|
||||
data, and for loading and writing it to disc.
|
||||
""" Paper class. The object is responsible for the integrity of its data
|
||||
|
||||
The object uses a pybtex.database.BibliographyData object to store
|
||||
biblography data and an additional dictionary to store meta data.
|
||||
The object is not responsible of any disk i/o.
|
||||
self.bibdata is a pybtex.database.BibliographyData object
|
||||
self.metadata is a dictionary
|
||||
"""
|
||||
|
||||
def __init__(self, bibentry=None, metadata=None, citekey=None):
|
||||
if bibentry is None:
|
||||
bibentry = Entry(DEFAULT_TYPE)
|
||||
self.bibentry = bibentry
|
||||
if metadata is None:
|
||||
metadata = Paper.create_meta()
|
||||
def __init__(self, bibdata, citekey=None, metadata=None):
|
||||
self.citekey = citekey
|
||||
self.metadata = metadata
|
||||
check_citekey(citekey)
|
||||
self.citekey = citekey
|
||||
self.bibdata = bibdata
|
||||
|
||||
if self.metadata is None:
|
||||
self.metadata = copy.deepcopy(DEFAULT_META)
|
||||
if self.citekey is None:
|
||||
self.citekey = bibstruct.extract_citekey(self.bibdata)
|
||||
bibstruct.check_citekey(self.citekey)
|
||||
|
||||
def __eq__(self, other):
|
||||
return (isinstance(self, Paper) and type(other) is type(self)
|
||||
and self.bibentry == other.bibentry
|
||||
and self.bibdata == other.bibdata
|
||||
and self.metadata == other.metadata
|
||||
and self.citekey == other.citekey)
|
||||
and self.citekey == other.citekey)
|
||||
|
||||
def __repr__(self):
|
||||
return 'Paper(%s, %s, %s)' % (
|
||||
self.citekey, self.bibentry, self.metadata)
|
||||
|
||||
def __str__(self):
|
||||
return self.__repr__()
|
||||
def deepcopy(self):
|
||||
return Paper(citekey =self.citekey,
|
||||
metadata=copy.deepcopy(self.metadata),
|
||||
bibdata=copy.deepcopy(self.bibdata))
|
||||
|
||||
# TODO add mechanism to verify keys (15/12/2012)
|
||||
@property
|
||||
def docpath(self):
|
||||
return self.metadata.get('docfile', '')
|
||||
|
||||
def get_external_document_path(self):
|
||||
if self.metadata['external-document'] is not None:
|
||||
return self.metadata['external-document']
|
||||
else:
|
||||
raise NoDocumentFile
|
||||
@docpath.setter
|
||||
def docpath(self, path):
|
||||
"""Does not verify if the path exists."""
|
||||
self.metadata['docfile'] = path
|
||||
|
||||
def get_document_path(self):
|
||||
return self.get_external_document_path()
|
||||
|
||||
def set_external_document(self, docpath):
|
||||
fullpdfpath = os.path.abspath(docpath)
|
||||
files.check_file(fullpdfpath, fail=True)
|
||||
self.metadata['external-document'] = fullpdfpath
|
||||
|
||||
def check_document_path(self):
|
||||
return files.check_file(self.get_external_document_path())
|
||||
|
||||
def generate_citekey(self):
|
||||
"""Generate a citekey from bib_data.
|
||||
|
||||
Raises:
|
||||
KeyError if no author nor editor is defined.
|
||||
"""
|
||||
author_key = 'author'
|
||||
if not 'author' in self.bibentry.persons:
|
||||
author_key = 'editor'
|
||||
try:
|
||||
first_author = self.bibentry.persons[author_key][0]
|
||||
except KeyError:
|
||||
raise ValueError(
|
||||
'No author or editor defined: cannot generate a citekey.')
|
||||
try:
|
||||
year = self.bibentry.fields['year']
|
||||
except KeyError:
|
||||
year = ''
|
||||
citekey = u'{}{}'.format(u''.join(first_author.last()), year)
|
||||
return str2citekey(citekey)
|
||||
|
||||
def save(self, bib_filepath, meta_filepath):
|
||||
"""Creates a BibliographyData object containing a single entry and
|
||||
saves it to disc.
|
||||
"""
|
||||
if self.citekey is None:
|
||||
raise ValueError(
|
||||
'No valid citekey initialized. Cannot save paper')
|
||||
bibdata = BibliographyData(entries={self.citekey: self.bibentry})
|
||||
files.save_bibdata(bibdata, bib_filepath)
|
||||
files.save_meta(self.metadata, meta_filepath)
|
||||
|
||||
def update(self, key=None, bib=None, meta=None):
|
||||
if key is not None:
|
||||
check_citekey(key)
|
||||
self.citekey = key
|
||||
if bib is not None:
|
||||
self.bibentry = bib
|
||||
if meta is not None:
|
||||
self.metadata = meta
|
||||
|
||||
def get_document_file_from_bibdata(self, remove=False):
|
||||
"""Try extracting document file from bib data.
|
||||
Raises NoDocumentFile if not found.
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
remove: default: False
|
||||
remove field after extracting information
|
||||
"""
|
||||
try:
|
||||
field = self.bibentry.fields['file']
|
||||
# Check if this is mendeley specific
|
||||
for f in field.split(':'):
|
||||
if len(f) > 0:
|
||||
break
|
||||
if remove:
|
||||
self.bibentry.fields.pop('file')
|
||||
# This is a hck for Mendeley. Make clean
|
||||
if f[0] != '/':
|
||||
f = '/' + f
|
||||
return f
|
||||
except (KeyError, IndexError):
|
||||
raise NoDocumentFile('No file found in bib data.')
|
||||
|
||||
def copy(self):
|
||||
return Paper(bibentry=copy_bibentry(self.bibentry),
|
||||
metadata=self.metadata.copy(),
|
||||
citekey=self.citekey)
|
||||
|
||||
@classmethod
|
||||
def load(cls, bibpath, metapath=None):
|
||||
key, entry = get_bibentry_from_file(bibpath)
|
||||
metadata = get_safe_metadata_from_path(metapath)
|
||||
p = Paper(bibentry=entry, metadata=metadata, citekey=key)
|
||||
return p
|
||||
|
||||
@classmethod
|
||||
def create_meta(cls):
|
||||
return BASE_META.copy()
|
||||
|
||||
@classmethod
|
||||
def many_from_path(cls, bibpath):
|
||||
"""Extract list of papers found in bibliographic files in path.
|
||||
|
||||
The behavior is to:
|
||||
- ignore wrong entries,
|
||||
- overwrite duplicated entries.
|
||||
:returns: dictionary of (key, paper | exception)
|
||||
if loading of entry failed, the excpetion is returned in the
|
||||
dictionary in place of the paper
|
||||
"""
|
||||
bibpath = files.clean_path(bibpath)
|
||||
if os.path.isdir(bibpath):
|
||||
all_files = [os.path.join(bibpath, f) for f in os.listdir(bibpath)
|
||||
if os.path.splitext(f)[-1] in files.BIB_EXTENSIONS]
|
||||
else:
|
||||
all_files = [bibpath]
|
||||
bib_data = [files.load_externalbibfile(f) for f in all_files]
|
||||
papers = {}
|
||||
for b in bib_data:
|
||||
for k in b.entries:
|
||||
try:
|
||||
papers[k] = Paper(bibentry=b.entries[k], citekey=k)
|
||||
except ValueError, e:
|
||||
papers[k] = e
|
||||
return papers
|
||||
|
||||
|
||||
# tags
|
||||
# tags
|
||||
|
||||
@property
|
||||
def tags(self):
|
||||
@ -272,42 +67,3 @@ class Paper(object):
|
||||
def remove_tag(self, tag):
|
||||
"""Remove a tag from a paper if present."""
|
||||
self.tags.discard(tag)
|
||||
|
||||
|
||||
class PaperInRepo(Paper):
|
||||
"""Extend paper class with command specific to the case where the paper
|
||||
lives in a repository.
|
||||
"""
|
||||
|
||||
def __init__(self, repo, *args, **kwargs):
|
||||
Paper.__init__(self, *args, **kwargs)
|
||||
self.repo = repo
|
||||
|
||||
def get_document_path_in_repo(self):
|
||||
return self.repo.find_document(self.citekey)
|
||||
|
||||
def get_document_path(self):
|
||||
try:
|
||||
return self.get_document_path_in_repo()
|
||||
except NoDocumentFile:
|
||||
return self.get_external_document_path()
|
||||
|
||||
def copy(self):
|
||||
return PaperInRepo.from_paper(self.as_paper().copy(), self.repo)
|
||||
|
||||
def as_paper(self):
|
||||
return Paper(bibentry=self.bibentry,
|
||||
metadata=self.metadata,
|
||||
citekey=self.citekey)
|
||||
|
||||
@classmethod
|
||||
def load(cls, repo, bibpath, metapath=None):
|
||||
key, entry = get_bibentry_from_file(bibpath)
|
||||
metadata = get_safe_metadata_from_path(metapath)
|
||||
p = cls(repo, bibentry=entry, metadata=metadata, citekey=key)
|
||||
return p
|
||||
|
||||
@classmethod
|
||||
def from_paper(cls, paper, repo):
|
||||
return cls(repo, bibentry=paper.bibentry, metadata=paper.metadata,
|
||||
citekey=paper.citekey)
|
||||
|
Loading…
x
Reference in New Issue
Block a user