|
|
|
@ -1,260 +1,55 @@
|
|
|
|
|
import os
|
|
|
|
|
import copy
|
|
|
|
|
import collections
|
|
|
|
|
|
|
|
|
|
import unicodedata
|
|
|
|
|
import re
|
|
|
|
|
from cStringIO import StringIO
|
|
|
|
|
import yaml
|
|
|
|
|
|
|
|
|
|
from pybtex.database import Entry, BibliographyData, FieldDict, Person
|
|
|
|
|
|
|
|
|
|
import files
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
DEFAULT_TYPE = 'article'
|
|
|
|
|
|
|
|
|
|
CONTROL_CHARS = ''.join(map(unichr, range(0, 32) + range(127, 160)))
|
|
|
|
|
CITEKEY_FORBIDDEN_CHARS = '@\'\\,#}{~%/' # '/' is OK for bibtex but forbidden
|
|
|
|
|
# here since we transform citekeys into filenames
|
|
|
|
|
CITEKEY_EXCLUDE_RE = re.compile('[%s]'
|
|
|
|
|
% re.escape(CONTROL_CHARS + CITEKEY_FORBIDDEN_CHARS))
|
|
|
|
|
|
|
|
|
|
BASE_META = {
|
|
|
|
|
'external-document': None,
|
|
|
|
|
'tags': set(),
|
|
|
|
|
'notes': [],
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def str2citekey(s):
|
|
|
|
|
key = unicodedata.normalize('NFKD', unicode(s)).encode('ascii', 'ignore')
|
|
|
|
|
key = CITEKEY_EXCLUDE_RE.sub('', key)
|
|
|
|
|
# Normalize chars and remove non-ascii
|
|
|
|
|
return key
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_bibentry_from_file(bibfile):
|
|
|
|
|
"""Extract first entry (supposed to be the only one) from given file.
|
|
|
|
|
"""
|
|
|
|
|
bib_data = files.load_externalbibfile(bibfile)
|
|
|
|
|
first_key = list(bib_data.entries.keys())[0]
|
|
|
|
|
first_entry = bib_data.entries[first_key]
|
|
|
|
|
return first_key, first_entry
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_bibentry_from_string(content):
|
|
|
|
|
"""Extract first entry (supposed to be the only one) from given file.
|
|
|
|
|
"""
|
|
|
|
|
bib_data = files.parse_bibdata(StringIO(content))
|
|
|
|
|
first_key = list(bib_data.entries.keys())[0]
|
|
|
|
|
first_entry = bib_data.entries[first_key]
|
|
|
|
|
return first_key, first_entry
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def copy_person(p):
|
|
|
|
|
return Person(first=p.get_part_as_text('first'),
|
|
|
|
|
middle=p.get_part_as_text('middle'),
|
|
|
|
|
prelast=p.get_part_as_text('prelast'),
|
|
|
|
|
last=p.get_part_as_text('last'),
|
|
|
|
|
lineage=p.get_part_as_text('lineage'))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def copy_bibentry(entry):
|
|
|
|
|
fd = FieldDict(entry.fields.parent, entry.fields)
|
|
|
|
|
persons = dict([(k, [copy_person(p) for p in v])
|
|
|
|
|
for k, v in entry.persons.items()])
|
|
|
|
|
return Entry(entry.type, fields=fd, persons=persons)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_safe_metadata(meta):
|
|
|
|
|
base_meta = Paper.create_meta()
|
|
|
|
|
if meta is not None:
|
|
|
|
|
base_meta.update(meta)
|
|
|
|
|
base_meta['tags'] = set(base_meta['tags'])
|
|
|
|
|
return base_meta
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_safe_metadata_from_content(content):
|
|
|
|
|
return get_safe_metadata(yaml.load(content))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_safe_metadata_from_path(metapath):
|
|
|
|
|
if metapath is None:
|
|
|
|
|
content = None
|
|
|
|
|
else:
|
|
|
|
|
content = files.read_yamlfile(metapath)
|
|
|
|
|
return get_safe_metadata(content)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def check_citekey(citekey):
|
|
|
|
|
# TODO This is not the right way to test that (17/12/2012)
|
|
|
|
|
if unicode(citekey) != str2citekey(citekey):
|
|
|
|
|
raise ValueError("Invalid citekey: %s" % citekey)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class NoDocumentFile(Exception):
|
|
|
|
|
pass
|
|
|
|
|
from . import bibstruct
|
|
|
|
|
|
|
|
|
|
DEFAULT_META = collections.OrderedDict([('docfile', None), ('tags', set()), ('notes', [])])
|
|
|
|
|
DEFAULT_META = {'docfile': None, 'tags': set(), 'notes': []}
|
|
|
|
|
|
|
|
|
|
class Paper(object):
|
|
|
|
|
"""Paper class. The object is responsible for the integrity of its own
|
|
|
|
|
data, and for loading and writing it to disc.
|
|
|
|
|
""" Paper class. The object is responsible for the integrity of its data
|
|
|
|
|
|
|
|
|
|
The object uses a pybtex.database.BibliographyData object to store
|
|
|
|
|
biblography data and an additional dictionary to store meta data.
|
|
|
|
|
The object is not responsible of any disk i/o.
|
|
|
|
|
self.bibdata is a pybtex.database.BibliographyData object
|
|
|
|
|
self.metadata is a dictionary
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
def __init__(self, bibentry=None, metadata=None, citekey=None):
|
|
|
|
|
if bibentry is None:
|
|
|
|
|
bibentry = Entry(DEFAULT_TYPE)
|
|
|
|
|
self.bibentry = bibentry
|
|
|
|
|
if metadata is None:
|
|
|
|
|
metadata = Paper.create_meta()
|
|
|
|
|
def __init__(self, bibdata, citekey=None, metadata=None):
|
|
|
|
|
self.citekey = citekey
|
|
|
|
|
self.metadata = metadata
|
|
|
|
|
check_citekey(citekey)
|
|
|
|
|
self.citekey = citekey
|
|
|
|
|
self.bibdata = bibdata
|
|
|
|
|
|
|
|
|
|
if self.metadata is None:
|
|
|
|
|
self.metadata = copy.deepcopy(DEFAULT_META)
|
|
|
|
|
if self.citekey is None:
|
|
|
|
|
self.citekey = bibstruct.extract_citekey(self.bibdata)
|
|
|
|
|
bibstruct.check_citekey(self.citekey)
|
|
|
|
|
|
|
|
|
|
def __eq__(self, other):
|
|
|
|
|
return (isinstance(self, Paper) and type(other) is type(self)
|
|
|
|
|
and self.bibentry == other.bibentry
|
|
|
|
|
and self.bibdata == other.bibdata
|
|
|
|
|
and self.metadata == other.metadata
|
|
|
|
|
and self.citekey == other.citekey)
|
|
|
|
|
and self.citekey == other.citekey)
|
|
|
|
|
|
|
|
|
|
def __repr__(self):
|
|
|
|
|
return 'Paper(%s, %s, %s)' % (
|
|
|
|
|
self.citekey, self.bibentry, self.metadata)
|
|
|
|
|
|
|
|
|
|
def __str__(self):
|
|
|
|
|
return self.__repr__()
|
|
|
|
|
|
|
|
|
|
# TODO add mechanism to verify keys (15/12/2012)
|
|
|
|
|
|
|
|
|
|
def get_external_document_path(self):
|
|
|
|
|
if self.metadata['external-document'] is not None:
|
|
|
|
|
return self.metadata['external-document']
|
|
|
|
|
else:
|
|
|
|
|
raise NoDocumentFile
|
|
|
|
|
|
|
|
|
|
def get_document_path(self):
|
|
|
|
|
return self.get_external_document_path()
|
|
|
|
|
|
|
|
|
|
def set_external_document(self, docpath):
|
|
|
|
|
fullpdfpath = os.path.abspath(docpath)
|
|
|
|
|
files.check_file(fullpdfpath, fail=True)
|
|
|
|
|
self.metadata['external-document'] = fullpdfpath
|
|
|
|
|
|
|
|
|
|
def check_document_path(self):
|
|
|
|
|
return files.check_file(self.get_external_document_path())
|
|
|
|
|
|
|
|
|
|
def generate_citekey(self):
|
|
|
|
|
"""Generate a citekey from bib_data.
|
|
|
|
|
|
|
|
|
|
Raises:
|
|
|
|
|
KeyError if no author nor editor is defined.
|
|
|
|
|
"""
|
|
|
|
|
author_key = 'author'
|
|
|
|
|
if not 'author' in self.bibentry.persons:
|
|
|
|
|
author_key = 'editor'
|
|
|
|
|
try:
|
|
|
|
|
first_author = self.bibentry.persons[author_key][0]
|
|
|
|
|
except KeyError:
|
|
|
|
|
raise ValueError(
|
|
|
|
|
'No author or editor defined: cannot generate a citekey.')
|
|
|
|
|
try:
|
|
|
|
|
year = self.bibentry.fields['year']
|
|
|
|
|
except KeyError:
|
|
|
|
|
year = ''
|
|
|
|
|
citekey = u'{}{}'.format(u''.join(first_author.last()), year)
|
|
|
|
|
return str2citekey(citekey)
|
|
|
|
|
|
|
|
|
|
def save(self, bib_filepath, meta_filepath):
|
|
|
|
|
"""Creates a BibliographyData object containing a single entry and
|
|
|
|
|
saves it to disc.
|
|
|
|
|
"""
|
|
|
|
|
if self.citekey is None:
|
|
|
|
|
raise ValueError(
|
|
|
|
|
'No valid citekey initialized. Cannot save paper')
|
|
|
|
|
bibdata = BibliographyData(entries={self.citekey: self.bibentry})
|
|
|
|
|
files.save_bibdata(bibdata, bib_filepath)
|
|
|
|
|
files.save_meta(self.metadata, meta_filepath)
|
|
|
|
|
|
|
|
|
|
def update(self, key=None, bib=None, meta=None):
|
|
|
|
|
if key is not None:
|
|
|
|
|
check_citekey(key)
|
|
|
|
|
self.citekey = key
|
|
|
|
|
if bib is not None:
|
|
|
|
|
self.bibentry = bib
|
|
|
|
|
if meta is not None:
|
|
|
|
|
self.metadata = meta
|
|
|
|
|
|
|
|
|
|
def get_document_file_from_bibdata(self, remove=False):
|
|
|
|
|
"""Try extracting document file from bib data.
|
|
|
|
|
Raises NoDocumentFile if not found.
|
|
|
|
|
|
|
|
|
|
Parameters:
|
|
|
|
|
-----------
|
|
|
|
|
remove: default: False
|
|
|
|
|
remove field after extracting information
|
|
|
|
|
"""
|
|
|
|
|
try:
|
|
|
|
|
field = self.bibentry.fields['file']
|
|
|
|
|
# Check if this is mendeley specific
|
|
|
|
|
for f in field.split(':'):
|
|
|
|
|
if len(f) > 0:
|
|
|
|
|
break
|
|
|
|
|
if remove:
|
|
|
|
|
self.bibentry.fields.pop('file')
|
|
|
|
|
# This is a hck for Mendeley. Make clean
|
|
|
|
|
if f[0] != '/':
|
|
|
|
|
f = '/' + f
|
|
|
|
|
return f
|
|
|
|
|
except (KeyError, IndexError):
|
|
|
|
|
raise NoDocumentFile('No file found in bib data.')
|
|
|
|
|
|
|
|
|
|
def copy(self):
|
|
|
|
|
return Paper(bibentry=copy_bibentry(self.bibentry),
|
|
|
|
|
metadata=self.metadata.copy(),
|
|
|
|
|
citekey=self.citekey)
|
|
|
|
|
def deepcopy(self):
|
|
|
|
|
return Paper(citekey =self.citekey,
|
|
|
|
|
metadata=copy.deepcopy(self.metadata),
|
|
|
|
|
bibdata=copy.deepcopy(self.bibdata))
|
|
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
|
def load(cls, bibpath, metapath=None):
|
|
|
|
|
key, entry = get_bibentry_from_file(bibpath)
|
|
|
|
|
metadata = get_safe_metadata_from_path(metapath)
|
|
|
|
|
p = Paper(bibentry=entry, metadata=metadata, citekey=key)
|
|
|
|
|
return p
|
|
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
|
def create_meta(cls):
|
|
|
|
|
return BASE_META.copy()
|
|
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
|
def many_from_path(cls, bibpath):
|
|
|
|
|
"""Extract list of papers found in bibliographic files in path.
|
|
|
|
|
|
|
|
|
|
The behavior is to:
|
|
|
|
|
- ignore wrong entries,
|
|
|
|
|
- overwrite duplicated entries.
|
|
|
|
|
:returns: dictionary of (key, paper | exception)
|
|
|
|
|
if loading of entry failed, the excpetion is returned in the
|
|
|
|
|
dictionary in place of the paper
|
|
|
|
|
"""
|
|
|
|
|
bibpath = files.clean_path(bibpath)
|
|
|
|
|
if os.path.isdir(bibpath):
|
|
|
|
|
all_files = [os.path.join(bibpath, f) for f in os.listdir(bibpath)
|
|
|
|
|
if os.path.splitext(f)[-1] in files.BIB_EXTENSIONS]
|
|
|
|
|
else:
|
|
|
|
|
all_files = [bibpath]
|
|
|
|
|
bib_data = [files.load_externalbibfile(f) for f in all_files]
|
|
|
|
|
papers = {}
|
|
|
|
|
for b in bib_data:
|
|
|
|
|
for k in b.entries:
|
|
|
|
|
try:
|
|
|
|
|
papers[k] = Paper(bibentry=b.entries[k], citekey=k)
|
|
|
|
|
except ValueError, e:
|
|
|
|
|
papers[k] = e
|
|
|
|
|
return papers
|
|
|
|
|
@property
|
|
|
|
|
def docpath(self):
|
|
|
|
|
return self.metadata.get('docfile', '')
|
|
|
|
|
|
|
|
|
|
@docpath.setter
|
|
|
|
|
def docpath(self, path):
|
|
|
|
|
"""Does not verify if the path exists."""
|
|
|
|
|
self.metadata['docfile'] = path
|
|
|
|
|
|
|
|
|
|
# tags
|
|
|
|
|
# tags
|
|
|
|
|
|
|
|
|
|
@property
|
|
|
|
|
def tags(self):
|
|
|
|
@ -272,42 +67,3 @@ class Paper(object):
|
|
|
|
|
def remove_tag(self, tag):
|
|
|
|
|
"""Remove a tag from a paper if present."""
|
|
|
|
|
self.tags.discard(tag)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class PaperInRepo(Paper):
|
|
|
|
|
"""Extend paper class with command specific to the case where the paper
|
|
|
|
|
lives in a repository.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
def __init__(self, repo, *args, **kwargs):
|
|
|
|
|
Paper.__init__(self, *args, **kwargs)
|
|
|
|
|
self.repo = repo
|
|
|
|
|
|
|
|
|
|
def get_document_path_in_repo(self):
|
|
|
|
|
return self.repo.find_document(self.citekey)
|
|
|
|
|
|
|
|
|
|
def get_document_path(self):
|
|
|
|
|
try:
|
|
|
|
|
return self.get_document_path_in_repo()
|
|
|
|
|
except NoDocumentFile:
|
|
|
|
|
return self.get_external_document_path()
|
|
|
|
|
|
|
|
|
|
def copy(self):
|
|
|
|
|
return PaperInRepo.from_paper(self.as_paper().copy(), self.repo)
|
|
|
|
|
|
|
|
|
|
def as_paper(self):
|
|
|
|
|
return Paper(bibentry=self.bibentry,
|
|
|
|
|
metadata=self.metadata,
|
|
|
|
|
citekey=self.citekey)
|
|
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
|
def load(cls, repo, bibpath, metapath=None):
|
|
|
|
|
key, entry = get_bibentry_from_file(bibpath)
|
|
|
|
|
metadata = get_safe_metadata_from_path(metapath)
|
|
|
|
|
p = cls(repo, bibentry=entry, metadata=metadata, citekey=key)
|
|
|
|
|
return p
|
|
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
|
def from_paper(cls, paper, repo):
|
|
|
|
|
return cls(repo, bibentry=paper.bibentry, metadata=paper.metadata,
|
|
|
|
|
citekey=paper.citekey)
|
|
|
|
|