From 161be4f994a873ab8ca4eb9e44ec5b40473b4280 Mon Sep 17 00:00:00 2001 From: Fabien Benureau Date: Sun, 10 Nov 2013 02:03:03 +0100 Subject: [PATCH] Paper class --- papers/paper.py | 308 +++++------------------------------------------- 1 file changed, 32 insertions(+), 276 deletions(-) diff --git a/papers/paper.py b/papers/paper.py index 28b41c3..19a3dbd 100644 --- a/papers/paper.py +++ b/papers/paper.py @@ -1,260 +1,55 @@ -import os +import copy +import collections -import unicodedata -import re -from cStringIO import StringIO -import yaml - -from pybtex.database import Entry, BibliographyData, FieldDict, Person - -import files - - -DEFAULT_TYPE = 'article' - -CONTROL_CHARS = ''.join(map(unichr, range(0, 32) + range(127, 160))) -CITEKEY_FORBIDDEN_CHARS = '@\'\\,#}{~%/' # '/' is OK for bibtex but forbidden -# here since we transform citekeys into filenames -CITEKEY_EXCLUDE_RE = re.compile('[%s]' - % re.escape(CONTROL_CHARS + CITEKEY_FORBIDDEN_CHARS)) - -BASE_META = { - 'external-document': None, - 'tags': set(), - 'notes': [], - } - - -def str2citekey(s): - key = unicodedata.normalize('NFKD', unicode(s)).encode('ascii', 'ignore') - key = CITEKEY_EXCLUDE_RE.sub('', key) - # Normalize chars and remove non-ascii - return key - - -def get_bibentry_from_file(bibfile): - """Extract first entry (supposed to be the only one) from given file. - """ - bib_data = files.load_externalbibfile(bibfile) - first_key = list(bib_data.entries.keys())[0] - first_entry = bib_data.entries[first_key] - return first_key, first_entry - - -def get_bibentry_from_string(content): - """Extract first entry (supposed to be the only one) from given file. - """ - bib_data = files.parse_bibdata(StringIO(content)) - first_key = list(bib_data.entries.keys())[0] - first_entry = bib_data.entries[first_key] - return first_key, first_entry - - -def copy_person(p): - return Person(first=p.get_part_as_text('first'), - middle=p.get_part_as_text('middle'), - prelast=p.get_part_as_text('prelast'), - last=p.get_part_as_text('last'), - lineage=p.get_part_as_text('lineage')) - - -def copy_bibentry(entry): - fd = FieldDict(entry.fields.parent, entry.fields) - persons = dict([(k, [copy_person(p) for p in v]) - for k, v in entry.persons.items()]) - return Entry(entry.type, fields=fd, persons=persons) - - -def get_safe_metadata(meta): - base_meta = Paper.create_meta() - if meta is not None: - base_meta.update(meta) - base_meta['tags'] = set(base_meta['tags']) - return base_meta - - -def get_safe_metadata_from_content(content): - return get_safe_metadata(yaml.load(content)) - - -def get_safe_metadata_from_path(metapath): - if metapath is None: - content = None - else: - content = files.read_yamlfile(metapath) - return get_safe_metadata(content) - - -def check_citekey(citekey): - # TODO This is not the right way to test that (17/12/2012) - if unicode(citekey) != str2citekey(citekey): - raise ValueError("Invalid citekey: %s" % citekey) - - -class NoDocumentFile(Exception): - pass +from . import bibstruct +DEFAULT_META = collections.OrderedDict([('docfile', None), ('tags', set()), ('notes', [])]) +DEFAULT_META = {'docfile': None, 'tags': set(), 'notes': []} class Paper(object): - """Paper class. The object is responsible for the integrity of its own - data, and for loading and writing it to disc. + """ Paper class. The object is responsible for the integrity of its data - The object uses a pybtex.database.BibliographyData object to store - biblography data and an additional dictionary to store meta data. + The object is not responsible of any disk i/o. + self.bibdata is a pybtex.database.BibliographyData object + self.metadata is a dictionary """ - def __init__(self, bibentry=None, metadata=None, citekey=None): - if bibentry is None: - bibentry = Entry(DEFAULT_TYPE) - self.bibentry = bibentry - if metadata is None: - metadata = Paper.create_meta() + def __init__(self, bibdata, citekey=None, metadata=None): + self.citekey = citekey self.metadata = metadata - check_citekey(citekey) - self.citekey = citekey + self.bibdata = bibdata + + if self.metadata is None: + self.metadata = copy.deepcopy(DEFAULT_META) + if self.citekey is None: + self.citekey = bibstruct.extract_citekey(self.bibdata) + bibstruct.check_citekey(self.citekey) def __eq__(self, other): return (isinstance(self, Paper) and type(other) is type(self) - and self.bibentry == other.bibentry + and self.bibdata == other.bibdata and self.metadata == other.metadata - and self.citekey == other.citekey) + and self.citekey == other.citekey) def __repr__(self): return 'Paper(%s, %s, %s)' % ( self.citekey, self.bibentry, self.metadata) - def __str__(self): - return self.__repr__() - - # TODO add mechanism to verify keys (15/12/2012) - - def get_external_document_path(self): - if self.metadata['external-document'] is not None: - return self.metadata['external-document'] - else: - raise NoDocumentFile - - def get_document_path(self): - return self.get_external_document_path() - - def set_external_document(self, docpath): - fullpdfpath = os.path.abspath(docpath) - files.check_file(fullpdfpath, fail=True) - self.metadata['external-document'] = fullpdfpath - - def check_document_path(self): - return files.check_file(self.get_external_document_path()) - - def generate_citekey(self): - """Generate a citekey from bib_data. - - Raises: - KeyError if no author nor editor is defined. - """ - author_key = 'author' - if not 'author' in self.bibentry.persons: - author_key = 'editor' - try: - first_author = self.bibentry.persons[author_key][0] - except KeyError: - raise ValueError( - 'No author or editor defined: cannot generate a citekey.') - try: - year = self.bibentry.fields['year'] - except KeyError: - year = '' - citekey = u'{}{}'.format(u''.join(first_author.last()), year) - return str2citekey(citekey) - - def save(self, bib_filepath, meta_filepath): - """Creates a BibliographyData object containing a single entry and - saves it to disc. - """ - if self.citekey is None: - raise ValueError( - 'No valid citekey initialized. Cannot save paper') - bibdata = BibliographyData(entries={self.citekey: self.bibentry}) - files.save_bibdata(bibdata, bib_filepath) - files.save_meta(self.metadata, meta_filepath) - - def update(self, key=None, bib=None, meta=None): - if key is not None: - check_citekey(key) - self.citekey = key - if bib is not None: - self.bibentry = bib - if meta is not None: - self.metadata = meta - - def get_document_file_from_bibdata(self, remove=False): - """Try extracting document file from bib data. - Raises NoDocumentFile if not found. - - Parameters: - ----------- - remove: default: False - remove field after extracting information - """ - try: - field = self.bibentry.fields['file'] - # Check if this is mendeley specific - for f in field.split(':'): - if len(f) > 0: - break - if remove: - self.bibentry.fields.pop('file') - # This is a hck for Mendeley. Make clean - if f[0] != '/': - f = '/' + f - return f - except (KeyError, IndexError): - raise NoDocumentFile('No file found in bib data.') - - def copy(self): - return Paper(bibentry=copy_bibentry(self.bibentry), - metadata=self.metadata.copy(), - citekey=self.citekey) + def deepcopy(self): + return Paper(citekey =self.citekey, + metadata=copy.deepcopy(self.metadata), + bibdata=copy.deepcopy(self.bibdata)) - @classmethod - def load(cls, bibpath, metapath=None): - key, entry = get_bibentry_from_file(bibpath) - metadata = get_safe_metadata_from_path(metapath) - p = Paper(bibentry=entry, metadata=metadata, citekey=key) - return p - - @classmethod - def create_meta(cls): - return BASE_META.copy() - - @classmethod - def many_from_path(cls, bibpath): - """Extract list of papers found in bibliographic files in path. - - The behavior is to: - - ignore wrong entries, - - overwrite duplicated entries. - :returns: dictionary of (key, paper | exception) - if loading of entry failed, the excpetion is returned in the - dictionary in place of the paper - """ - bibpath = files.clean_path(bibpath) - if os.path.isdir(bibpath): - all_files = [os.path.join(bibpath, f) for f in os.listdir(bibpath) - if os.path.splitext(f)[-1] in files.BIB_EXTENSIONS] - else: - all_files = [bibpath] - bib_data = [files.load_externalbibfile(f) for f in all_files] - papers = {} - for b in bib_data: - for k in b.entries: - try: - papers[k] = Paper(bibentry=b.entries[k], citekey=k) - except ValueError, e: - papers[k] = e - return papers + @property + def docpath(self): + return self.metadata.get('docfile', '') + @docpath.setter + def docpath(self, path): + """Does not verify if the path exists.""" + self.metadata['docfile'] = path - # tags + # tags @property def tags(self): @@ -272,42 +67,3 @@ class Paper(object): def remove_tag(self, tag): """Remove a tag from a paper if present.""" self.tags.discard(tag) - - -class PaperInRepo(Paper): - """Extend paper class with command specific to the case where the paper - lives in a repository. - """ - - def __init__(self, repo, *args, **kwargs): - Paper.__init__(self, *args, **kwargs) - self.repo = repo - - def get_document_path_in_repo(self): - return self.repo.find_document(self.citekey) - - def get_document_path(self): - try: - return self.get_document_path_in_repo() - except NoDocumentFile: - return self.get_external_document_path() - - def copy(self): - return PaperInRepo.from_paper(self.as_paper().copy(), self.repo) - - def as_paper(self): - return Paper(bibentry=self.bibentry, - metadata=self.metadata, - citekey=self.citekey) - - @classmethod - def load(cls, repo, bibpath, metapath=None): - key, entry = get_bibentry_from_file(bibpath) - metadata = get_safe_metadata_from_path(metapath) - p = cls(repo, bibentry=entry, metadata=metadata, citekey=key) - return p - - @classmethod - def from_paper(cls, paper, repo): - return cls(repo, bibentry=paper.bibentry, metadata=paper.metadata, - citekey=paper.citekey)