diff --git a/README b/README index d383850..f4267b9 100644 --- a/README +++ b/README @@ -2,5 +2,3 @@ A paper correspond to 3 files : name.pdf a pdf or ps file, the paper itself, whose location is arbitrary bibdata/name.bibyaml a bibyaml file with all bibliographic data. meta/name.meta a metadata file for internal use, notes, citekeys, status, etc. - - \ No newline at end of file diff --git a/papers/commands/init_cmd.py b/papers/commands/init_cmd.py index 66fd3f9..b2a9178 100644 --- a/papers/commands/init_cmd.py +++ b/papers/commands/init_cmd.py @@ -22,14 +22,11 @@ def command(config): print('{}initializing papers in {}{}{}'.format( color.grey, color.cyan, papersdir, color.end)) - os.makedirs(papersdir) - os.makedirs(papersdir+os.sep+'bibdata') - os.makedirs(papersdir+os.sep+'meta') + os.makedirs(os.path.join(papersdir, 'bibdata')) + os.makedirs(os.path.join(papersdir, 'meta')) papers = {} - papers['count'] = 0 - papers['citekeys'] = {} - papers['numbers'] = {} + papers['citekeys'] = [] files.save_papers(papers) else: diff --git a/papers/commands/list_cmd.py b/papers/commands/list_cmd.py index d84e7e9..364099f 100644 --- a/papers/commands/list_cmd.py +++ b/papers/commands/list_cmd.py @@ -13,10 +13,10 @@ def command(config): rp = repo.Repository() articles = [] - for n in sorted(rp.numbers.keys()): - paper = rp.paper_from_number(n, fatal = True) + for n in range(rp.size()): + paper = rp.paper_from_number(n, fatal=True) bibdesc = pretty.bib_oneliner(paper.bib_data) - articles.append(u'{:3d} {}{}{}{} {}'.format(int(paper.number), color.purple, paper.citekey, color.end, (8-len(paper.citekey))*' ', bibdesc)) + articles.append((u'{:3d} {}{}{}{} {}'.format(int(paper.number), color.purple, paper.citekey, color.end, (10 - len(paper.citekey))*' ', bibdesc)).encode('utf-8')) with tempfile.NamedTemporaryFile(suffix=".tmp", delete=True) as tmpf: tmpf.write('\n'.join(articles)) diff --git a/papers/files.py b/papers/files.py index ee98ca7..565d800 100644 --- a/papers/files.py +++ b/papers/files.py @@ -1,4 +1,3 @@ - import sys, os import subprocess import tempfile @@ -99,11 +98,11 @@ def load_papers(): return read_yamlfile(paperyaml) def save_meta(meta_data, filename): - filepath = find_papersdir() + os.sep + 'meta' + os.sep + filename + '.meta' + filepath = os.path.join(find_papersdir(), 'meta', filename + '.meta') write_yamlfile(filepath, meta_data) def load_meta(filename): - filepath = find_papersdir() + os.sep + 'meta' + os.sep + filename + '.meta' + filepath = os.path.join(find_papersdir(), 'meta', filename + '.meta') return read_yamlfile(filepath) # specific to bibliography data @@ -129,11 +128,11 @@ def load_externalbibfile(fullbibpath): return bib_data def load_bibdata(filename): - fullbibpath = find_papersdir() + os.sep + 'bibdata' + os.sep + filename + '.bibyaml' + fullbibpath = os.path.join(find_papersdir(), 'bibdata', filename + '.bibyaml') return load_externalbibfile(fullbibpath) def save_bibdata(bib_data, filename): - filepath = find_papersdir() + os.sep + 'bibdata' + os.sep + filename + '.bibyaml' + filepath = os.path.join(find_papersdir(), 'bibdata', filename + '.bibyaml') with open(filepath, 'w') as f: parser = pybtex.database.output.bibyaml.Writer() parser.write_stream(bib_data, f) diff --git a/papers/paper.py b/papers/paper.py index 8217bea..2c94b6c 100644 --- a/papers/paper.py +++ b/papers/paper.py @@ -1,10 +1,23 @@ import os - +import unicodedata +import re + import files import color import pretty +CONTROL_CHARS = ''.join(map(unichr, range(0, 32) + range(127, 160))) +CITEKEY_FORBIDDEN_CHARS = '@\'\\,#}{~%/' # '/' is OK for bibtex but forbidden +# here since we transform citekeys into filenames +CITEKEY_EXCLUDE_RE = re.compile('[%s]' + % re.escape(CONTROL_CHARS + CITEKEY_FORBIDDEN_CHARS)) + + +def str2citekey(s): + return CITEKEY_EXCLUDE_RE.sub('', s) + + class NoDocumentFile(Exception): pass @@ -15,11 +28,11 @@ class Paper(object): """ @classmethod - def from_disc(cls, name, citekey = None, number = None): + def from_disc(cls, name, citekey = None): bib_data = files.load_bibdata(name) metadata = files.load_meta(name) - p = Paper(name, bib_data = bib_data, metadata = metadata, - citekey = citekey, number = number) + p = Paper(name, bib_data = bib_data, metadata = metadata, + citekey = citekey) return p @classmethod @@ -27,16 +40,14 @@ class Paper(object): bib_data = cls.import_bibdata(bibpath) name, meta = cls.create_meta(bib_data, pdfpath=pdfpath) p = Paper(name, bib_data = bib_data, metadata = meta) - - return p - def __init__(self, name, bib_data = None, metadata = None, - citekey = None, number = None): - self.name = name + return p + + def __init__(self, bib_data = None, metadata = None, + citekey = None): + self.citekey = citekey self.bib_data = bib_data self.metadata = metadata - self.citekey = citekey - self.number = number def has_file(self): """Whether there exist a document file for this entry. @@ -51,28 +62,60 @@ class Paper(object): def check_file(self): return files.check_file(self.get_file_path()) - + + def generate_citekey(self): + """Generate a citekey from bib_data. + + Raises: + KeyError if no author nor editor is defined. + """ + author_key = 'author' + if not 'author' in self.bib_data.persons: + author_key = 'editor' + try: + first_author = self.bib_data.persons[author_key][0] + except KeyError: + raise(ValueError, + 'No author or editor defined: cannot generate a citekey.') + try: + year = entry.fields['year'] + except KeyError: + year = '' + prefix = u'{}{}'.format(first_author.last()[0][:6], year) + prefix = str2citekey(prefix) + # Normalize chars and remove non-ascii + prefix = unicodedata.normalize('NFKD', prefix + ).encode('ascii', 'ignore') + letter = 0 + citekey = prefix + while citekey in self.citekeys and citekey not in allowed: + citekey = prefix + ALPHABET[letter] + letter += 1 + return citekey + + def save_to_disc(self): - files.save_bibdata(self.bib_data, self.name) - files.save_meta(self.metadata, self.name) + files.save_bibdata(self.bib_data, self.citekey) + files.save_meta(self.metadata, self.citekey) + # TODO move to repo @classmethod - def import_bibdata(cls, bibfile): + def import_bibdata(cls, bibfile): """Import bibligraphic data from a .bibyaml, .bib or .bibtex file""" fullbibpath = os.path.abspath(bibfile) - + bib_data = files.load_externalbibfile(fullbibpath) print('{}bibliographic data present in {}{}{}'.format( color.grey, color.cyan, bibfile, color.end)) print(pretty.bib_desc(bib_data)) - + return bib_data @classmethod def create_meta(cls, bib_data, pdfpath=None): - + if pdfpath is None: - name = bib_data.entries.keys()[0] + citekey = bib_data.entries.keys()[0] # TODO this introduces a bug and a security issue since the name # is used to generate a file name that is written. It should be # escaped here. (22/10/2012) @@ -80,15 +123,15 @@ class Paper(object): else: fullpdfpath = os.path.abspath(pdfpath) files.check_file(fullpdfpath) - + name, ext = files.name_from_path(pdfpath) - + meta = {} - + meta['name'] = name meta['extension'] = ext meta['path'] = fullpdfpath - + meta['notes'] = [] - + return name, meta diff --git a/papers/papers b/papers/papers index 5825363..9990952 100755 --- a/papers/papers +++ b/papers/papers @@ -1,4 +1,5 @@ #!/usr/bin/env python2 +# -*- coding:utf-8 -*- import argparse import collections @@ -7,7 +8,7 @@ from papers import commands cmds = collections.OrderedDict([ ('init', commands.init_cmd), - ('add' , commands.add_cmd), + ('add', commands.add_cmd), ('add_library', commands.add_library_cmd), ('list', commands.list_cmd), ('open', commands.open_cmd), diff --git a/papers/pretty.py b/papers/pretty.py index 5e97581..5d6a637 100644 --- a/papers/pretty.py +++ b/papers/pretty.py @@ -3,11 +3,11 @@ import color def person_repr(p): - return u' '.join(s for s in [u' '.join(p.first(abbr = True)), - u' '.join(p.middle(abbr = True)), - u' '.join(p.prelast(abbr = False)), - u' '.join(p.last(abbr = False)), - u' '.join(p.lineage(abbr = True))] if s) + return ' '.join(s for s in [' '.join(p.first(abbr = True)), + ' '.join(p.middle(abbr = True)), + ' '.join(p.prelast(abbr = False)), + ' '.join(p.last(abbr = False)), + ' '.join(p.lineage(abbr = True))] if s) def bib_oneliner(bib_data): article = bib_data.entries[list(bib_data.entries.keys())[0]] @@ -26,7 +26,7 @@ def bib_oneliner(bib_data): def bib_desc(bib_data): article = bib_data.entries[list(bib_data.entries.keys())[0]] - s = u'\n'.join(u'author: {}'.format(person_repr(p)) for p in article.persons['author']) - s += u'\n' - s += u'\n'.join(u'{}: {}'.format(k, v) for k, v in article.fields.items()) + s = '\n'.join('author: {}'.format(person_repr(p)) for p in article.persons['author']) + s += '\n' + s += '\n'.join('{}: {}'.format(k, v) for k, v in article.fields.items()) return s diff --git a/papers/repo.py b/papers/repo.py index 9d575ba..90e54ef 100644 --- a/papers/repo.py +++ b/papers/repo.py @@ -3,24 +3,26 @@ import color from paper import Paper -alphabet = 'abcdefghijklmopqrstuvwxyz' + +ALPHABET = 'abcdefghijklmopqrstuvwxyz' class Repository(object): - def __init__(self): - self.paperdir = files.find_papersdir() + def __init__(self, paperdir=None): + if paperdir: + self.paperdir = paperdir + else: + self.paperdir = files.find_papersdir() self.papers_config = files.load_papers() self.citekeys = self.papers_config['citekeys'] - self.numbers = self.papers_config['numbers'] # loading existing papers def paper_from_number(self, number, fatal = True): try: - citekey = self.numbers[int(number)] + citekey = self.citekeys[int(number)] paper = self.paper_from_citekey(citekey) - paper.number = int(number) return paper except KeyError: if fatal: @@ -29,13 +31,10 @@ class Repository(object): exit(-1) raise IOError, 'file not found' - def paper_from_citekey(self, citekey, fatal = True): + def paper_from_citekey(self, citekey, fatal=True): """Load a paper by its citekey from disk, if necessary.""" try: - name = self.citekeys[citekey] - paper = Paper.from_disc(name, citekey = citekey) - paper.citekey = citekey - return paper + return Paper.from_disc(citekey) except KeyError: if fatal: print('{}error{}: no paper with citekey {}{}{}'.format( @@ -59,24 +58,20 @@ class Repository(object): # creating new papers def add_paper_from_paths(self, pdfpath, bibpath): - - p = Paper.from_bibpdffiles(pdfpath, bibpath) + p = Paper.from_bibpdffiles(pdfpath, bibpath) self.add_paper(p) def add_paper(self, p): # updating papersconfig - p.citekey = self.create_citekey(p.bib_data) - p.number = self.create_number() + bib_data_entry = p.bib_data.entries[list(p.bib_data.entries.keys())[0]] + p.citekey = self.get_valid_citekey(bib_data_entry) - self.papers_config['citekeys'][p.citekey] = p.name - self.papers_config['numbers'][p.number] = p.citekey + self.papers_config['citekeys'].append(p.citekey) + self.citekeys.append(p.citekey) - self.citekeys[p.citekey] = p.name - self.numbers[p.number] = p.citekey - # writing all to disk - files.save_papers(self.papers_config) p.save_to_disc() + files.save_papers(self.papers_config) print "Added: %s" % p.citekey return p @@ -88,38 +83,44 @@ class Repository(object): name, meta = Paper.create_meta(sub_bib, pdfpath=None) p = Paper(name, bib_data = sub_bib, metadata = meta) self.add_paper(p) - - def create_citekey(self, bib_data, allowed = tuple()): + + def get_valid_citekey(self, entry): + citekey = str2citekey(entry.key) + if citekey in self.citekeys: + raise(ValueError, "An entry with same citekey already exists.") + if len(citekey) == 0: + citekey = self.create_citekey(entry) + return citekey + + def create_citekey(self, entry, allowed = tuple()): """Create a cite key unique to a given bib_data. Raises: KeyError if no author is defined. """ - article = bib_data.entries[list(bib_data.entries.keys())[0]] author_key = 'author' - if not 'author' in article.persons: + if not 'author' in entry.persons: author_key = 'editor' try: - first_author = article.persons[author_key][0] + first_author = entry.persons[author_key][0] except KeyError: raise(ValueError, 'No author or editor defined: cannot generate a citekey.') try: - year = article.fields['year'] + year = entry.fields['year'] except KeyError: year = '' - prefix = '{}{}'.format(first_author.last()[0][:6], year) - + prefix = u'{}{}'.format(first_author.last()[0][:6], year) + prefix = str2citekey(prefix) + # Normalize chars and remove non-ascii + prefix = unicodedata.normalize('NFKD', prefix + ).encode('ascii', 'ignore') letter = 0 - citekey = prefix while citekey in self.citekeys and citekey not in allowed: - citekey = prefix + alphabet[letter] + citekey = prefix + ALPHABET[letter] letter += 1 - return citekey - - def create_number(self): - count = int(self.papers_config['count']) - self.papers_config['count'] = count + 1 - return count + + def size(self): + return len(self.citekeys)