import os import unicodedata import re import files import color import pretty CONTROL_CHARS = ''.join(map(unichr, range(0, 32) + range(127, 160))) CITEKEY_FORBIDDEN_CHARS = '@\'\\,#}{~%/' # '/' is OK for bibtex but forbidden # here since we transform citekeys into filenames CITEKEY_EXCLUDE_RE = re.compile('[%s]' % re.escape(CONTROL_CHARS + CITEKEY_FORBIDDEN_CHARS)) def str2citekey(s): return CITEKEY_EXCLUDE_RE.sub('', s) class NoDocumentFile(Exception): pass class Paper(object): """Paper class. The object is responsible for the integrity of its own data, and for loading and writing it to disc. """ @classmethod def from_disc(cls, name, citekey = None): bib_data = files.load_bibdata(name) metadata = files.load_meta(name) p = Paper(name, bib_data = bib_data, metadata = metadata, citekey = citekey) return p @classmethod def from_bibpdffiles(cls, pdfpath, bibpath): bib_data = cls.import_bibdata(bibpath) name, meta = cls.create_meta(bib_data, pdfpath=pdfpath) p = Paper(name, bib_data = bib_data, metadata = meta) return p def __init__(self, bib_data = None, metadata = None, citekey = None): self.citekey = citekey self.bib_data = bib_data self.metadata = metadata def has_file(self): """Whether there exist a document file for this entry. """ return self.metadata['path'] is not None def get_file_path(self): if self.has_file(): return self.metadata['path'] else: raise NoDocumentFile def check_file(self): return files.check_file(self.get_file_path()) def generate_citekey(self): """Generate a citekey from bib_data. Raises: KeyError if no author nor editor is defined. """ author_key = 'author' if not 'author' in self.bib_data.persons: author_key = 'editor' try: first_author = self.bib_data.persons[author_key][0] except KeyError: raise(ValueError, 'No author or editor defined: cannot generate a citekey.') try: year = entry.fields['year'] except KeyError: year = '' prefix = u'{}{}'.format(first_author.last()[0][:6], year) prefix = str2citekey(prefix) # Normalize chars and remove non-ascii prefix = unicodedata.normalize('NFKD', prefix ).encode('ascii', 'ignore') letter = 0 citekey = prefix while citekey in self.citekeys and citekey not in allowed: citekey = prefix + ALPHABET[letter] letter += 1 return citekey def save_to_disc(self): files.save_bibdata(self.bib_data, self.citekey) files.save_meta(self.metadata, self.citekey) # TODO move to repo @classmethod def import_bibdata(cls, bibfile): """Import bibligraphic data from a .bibyaml, .bib or .bibtex file""" fullbibpath = os.path.abspath(bibfile) bib_data = files.load_externalbibfile(fullbibpath) print('{}bibliographic data present in {}{}{}'.format( color.grey, color.cyan, bibfile, color.end)) print(pretty.bib_desc(bib_data)) return bib_data @classmethod def create_meta(cls, bib_data, pdfpath=None): if pdfpath is None: citekey = bib_data.entries.keys()[0] # TODO this introduces a bug and a security issue since the name # is used to generate a file name that is written. It should be # escaped here. (22/10/2012) fullpdfpath, ext = None, None else: fullpdfpath = os.path.abspath(pdfpath) files.check_file(fullpdfpath) name, ext = files.name_from_path(pdfpath) meta = {} meta['name'] = name meta['extension'] = ext meta['path'] = fullpdfpath meta['notes'] = [] return name, meta