diff --git a/papers/files.py b/papers/files.py index 565d800..da7eea5 100644 --- a/papers/files.py +++ b/papers/files.py @@ -1,14 +1,9 @@ -import sys, os +import os import subprocess import tempfile -try: - import ConfigParser as configparser -except ImportError: - import configparser - import yaml - + import color try: @@ -24,87 +19,116 @@ try: import pybtex.database.output.bibyaml except ImportError: - print '{}error{}: you need to install Pybtex; try running \'pip install pybtex\' or \'easy_install pybtex\''.format(color.red, color.end) + print '{}error{}: you need to install Pybtex; try running \'pip install' + 'pybtex\' or \'easy_install pybtex\''.format(color.red, color.end) _papersdir = None +try: + EDITOR = os.environ['EDITOR'] +except KeyError: + EDITOR = 'nano' + + def find_papersdir(): """Find .papers directory in this directory and the parent directories""" global _papersdir if _papersdir is None: curdir = os.path.abspath(os.getcwd()) while curdir != '': - if os.path.exists(curdir + '/.papers') and os.path.isdir(curdir + '/.papers'): + if (os.path.exists(curdir + '/.papers') + and os.path.isdir(curdir + '/.papers')): _papersdir = curdir + '/.papers' curdir = '' if curdir == '/': curdir = '' else: curdir = os.path.split(curdir)[0] - if _papersdir is None: - print '{}error{} : no papers repo found in this directory or in any parent directory.{}'.format( - color.red, color.grey, color.end) + print '{}error{} : no papers repo found in this directory or in' + 'any parent directory.{}'.format(color.red, color.grey, color.end) exit(-1) - return _papersdir -def name_from_path(fullpdfpath, verbose = False): + +def name_from_path(fullpdfpath, verbose=False): name, ext = os.path.splitext(os.path.split(fullpdfpath)[1]) if verbose: if ext != '.pdf' and ext != '.ps': print('{}warning{}: extension {}{}{} not recognized{}'.format( - color.yellow, color.grey, color.cyan, ext, color.grey, color.end)) - return name, ext + color.yellow, color.grey, color.cyan, ext, color.grey, + color.end)) + return name, ext + def check_file(filepath): if not os.path.exists(filepath): print '{}error{}: {}{}{} does not exists{}'.format( - color.red, color.grey, color.cyan, filepath, color.grey, color.end) + color.red, color.grey, color.cyan, filepath, color.grey, + color.end) exit(-1) if not os.path.isfile(filepath): print '{}error{}: {}{}{} is not a file{}'.format( - color.red, color.grey, color.cyan, filepath, color.grey, color.end) + color.red, color.grey, color.cyan, filepath, color.grey, + color.end) exit(-1) - + + # yaml I/O def write_yamlfile(filepath, datamap): try: with open(filepath, 'w') as f: yaml.dump(datamap, f) - except IOError as e: + except IOError: print '{}error{} : impossible to read file {}{:s}{}'.format( color.red, color.grey, color.cyan, filepath, color.end) exit(-1) + def read_yamlfile(filepath): check_file(filepath) try: with open(filepath, 'r') as f: return yaml.load(f) - except IOError as e: + except IOError: print '{}error{} : impossible to read file {}{:s}{}'.format( - color.red, color.grey, color.cyan, paperdir, color.end) + color.red, color.grey, color.cyan, filepath, color.end) exit(-1) + def save_papers(datamap): paperyaml = find_papersdir() + os.sep + 'papers.yaml' write_yamlfile(paperyaml, datamap) + def load_papers(): - paperyaml = find_papersdir() + os.sep + 'papers.yaml' + paperyaml = os.path.join(find_papersdir(), 'papers.yaml') return read_yamlfile(paperyaml) -def save_meta(meta_data, filename): - filepath = os.path.join(find_papersdir(), 'meta', filename + '.meta') + +def path_to_paper_file(name, file_, path_to_repo=None): + if path_to_repo is None: + path_to_repo = find_papersdir() + if file_ == 'bib': + return os.path.join(path_to_repo, 'bibdata', name + '.bibyaml') + elif file_ == 'meta': + return os.path.join(path_to_repo, 'meta', name + '.meta') + else: + raise(ValueError, "%s is not a valid paper file." % file_) + + +def save_meta(meta_data, filename, path=None): + filepath = path_to_paper_file(filename, 'meta', path_to_repo=path) write_yamlfile(filepath, meta_data) -def load_meta(filename): - filepath = os.path.join(find_papersdir(), 'meta', filename + '.meta') + +def load_meta(filename, path=None): + filepath = path_to_paper_file(filename, 'meta', path_to_repo=path) return read_yamlfile(filepath) + # specific to bibliography data def load_externalbibfile(fullbibpath): @@ -127,24 +151,22 @@ def load_externalbibfile(fullbibpath): return bib_data -def load_bibdata(filename): - fullbibpath = os.path.join(find_papersdir(), 'bibdata', filename + '.bibyaml') - return load_externalbibfile(fullbibpath) -def save_bibdata(bib_data, filename): - filepath = os.path.join(find_papersdir(), 'bibdata', filename + '.bibyaml') +def load_bibdata(filename, path=None): + filepath = path_to_paper_file(filename, 'bib', path_to_repo=path) + return load_externalbibfile(filepath) + + +def save_bibdata(bib_data, filename, path=None): + filepath = path_to_paper_file(filename, 'bib', path_to_repo=path) with open(filepath, 'w') as f: parser = pybtex.database.output.bibyaml.Writer() parser.write_stream(bib_data, f) -# vim input -try: - EDITOR = os.environ['EDITOR'] -except KeyError: - EDITOR = 'nano' +# vim input -def vim_input(initial = ""): +def vim_input(initial=""): """Use an editor to get input""" with tempfile.NamedTemporaryFile(suffix=".tmp", delete=False) as temp_file: diff --git a/papers/paper.py b/papers/paper.py index 2c94b6c..f2c8a8d 100644 --- a/papers/paper.py +++ b/papers/paper.py @@ -2,11 +2,15 @@ import os import unicodedata import re +from pybtex.database import Entry, BibliographyData + import files import color import pretty +DEFAULT_TYPE = 'article' + CONTROL_CHARS = ''.join(map(unichr, range(0, 32) + range(127, 160))) CITEKEY_FORBIDDEN_CHARS = '@\'\\,#}{~%/' # '/' is OK for bibtex but forbidden # here since we transform citekeys into filenames @@ -15,7 +19,10 @@ CITEKEY_EXCLUDE_RE = re.compile('[%s]' def str2citekey(s): - return CITEKEY_EXCLUDE_RE.sub('', s) + key = unicodedata.normalize('NFKD', unicode(s)).encode('ascii', 'ignore') + key = CITEKEY_EXCLUDE_RE.sub('', key) + # Normalize chars and remove non-ascii + return key class NoDocumentFile(Exception): @@ -23,31 +30,54 @@ class NoDocumentFile(Exception): class Paper(object): - """Paper class. The object is responsible for the integrity of its own data, - and for loading and writing it to disc. + """Paper class. The object is responsible for the integrity of its own + data, and for loading and writing it to disc. + + The object uses a pybtex.database.BibliographyData object to store + biblography data and an additional dictionary to store meta data. """ @classmethod - def from_disc(cls, name, citekey = None): - bib_data = files.load_bibdata(name) - metadata = files.load_meta(name) - p = Paper(name, bib_data = bib_data, metadata = metadata, - citekey = citekey) + def load(cls, bibpath, metapath): + bib_data = files.load_externalbibfile(bibpath) + metadata = files.read_yamlfile(metapath) + # Extract first entry (supposed to be the only one) + first_key = bib_data.entries.keys()[0] + first_entry = bib_data.entries[first_key] + p = Paper(bibentry=first_entry, metadata=metadata, citekey=first_key) return p - @classmethod - def from_bibpdffiles(cls, pdfpath, bibpath): - bib_data = cls.import_bibdata(bibpath) - name, meta = cls.create_meta(bib_data, pdfpath=pdfpath) - p = Paper(name, bib_data = bib_data, metadata = meta) +# @classmethod +# def from_bibpdffiles(cls, pdfpath, bibpath): +# bib_data = cls.import_bibdata(bibpath) +# name, meta = cls.create_meta(bib_data, pdfpath=pdfpath) +# p = Paper(name, bib_data = bib_data, metadata = meta) +# +# return p + + def __init__(self, bibentry=None, metadata=None, citekey=None): + if not bibentry: + bibentry = Entry(DEFAULT_TYPE) + self.bibentry = bibentry + if not metadata: + metadata = Paper.create_meta() + self.metadata = metadata + self.citekey = citekey - return p + def __eq__(self, other): + return (type(other) is Paper + and self.bibentry == other.bibentry + and self.metadata == other.metadata + and self.citekey == other.citekey) - def __init__(self, bib_data = None, metadata = None, - citekey = None): - self.citekey = citekey - self.bib_data = bib_data - self.metadata = metadata + def __repr__(self): + return 'Paper(%s, %s, %s)' % ( + self.citekey, self.bibentry, self.metadata) + + def __str__(self): + return self.__repr__() + + # TODO add mechanism to verify keys (15/12/2012) def has_file(self): """Whether there exist a document file for this entry. @@ -65,73 +95,58 @@ class Paper(object): def generate_citekey(self): """Generate a citekey from bib_data. - + Raises: KeyError if no author nor editor is defined. """ author_key = 'author' - if not 'author' in self.bib_data.persons: + if not 'author' in self.bibentry.persons: author_key = 'editor' try: - first_author = self.bib_data.persons[author_key][0] + first_author = self.bibentry.persons[author_key][0] except KeyError: raise(ValueError, 'No author or editor defined: cannot generate a citekey.') try: - year = entry.fields['year'] + year = self.bibentry.fields['year'] except KeyError: year = '' - prefix = u'{}{}'.format(first_author.last()[0][:6], year) - prefix = str2citekey(prefix) - # Normalize chars and remove non-ascii - prefix = unicodedata.normalize('NFKD', prefix - ).encode('ascii', 'ignore') - letter = 0 - citekey = prefix - while citekey in self.citekeys and citekey not in allowed: - citekey = prefix + ALPHABET[letter] - letter += 1 - return citekey - - - def save_to_disc(self): - files.save_bibdata(self.bib_data, self.citekey) - files.save_meta(self.metadata, self.citekey) + citekey = u'{}{}'.format(u''.join(first_author.last()), year) + return str2citekey(citekey) + + def save_to_disc(self, path): + """Creates a BibliographyData object containing a single entry and + saves it to disc. + """ + if self.citekey is None: + raise(ValueError, + 'No valid citekey initialized. Cannot save paper') + bibdata = BibliographyData(entries={self.citekey: self.bibentry}) + files.save_bibdata(bibdata, self.citekey, path=path) + files.save_meta(self.metadata, self.citekey, path=path) # TODO move to repo @classmethod def import_bibdata(cls, bibfile): """Import bibligraphic data from a .bibyaml, .bib or .bibtex file""" fullbibpath = os.path.abspath(bibfile) - bib_data = files.load_externalbibfile(fullbibpath) print('{}bibliographic data present in {}{}{}'.format( color.grey, color.cyan, bibfile, color.end)) print(pretty.bib_desc(bib_data)) - return bib_data @classmethod - def create_meta(cls, bib_data, pdfpath=None): - + def create_meta(cls, pdfpath=None): if pdfpath is None: - citekey = bib_data.entries.keys()[0] - # TODO this introduces a bug and a security issue since the name - # is used to generate a file name that is written. It should be - # escaped here. (22/10/2012) - fullpdfpath, ext = None, None + name, fullpdfpath, ext = None, None, None else: fullpdfpath = os.path.abspath(pdfpath) files.check_file(fullpdfpath) - name, ext = files.name_from_path(pdfpath) - meta = {} - - meta['name'] = name + meta['filename'] = name # TODO remove ? meta['extension'] = ext meta['path'] = fullpdfpath - meta['notes'] = [] - - return name, meta + return meta diff --git a/papers/repo.py b/papers/repo.py index 90e54ef..6882a59 100644 --- a/papers/repo.py +++ b/papers/repo.py @@ -34,7 +34,7 @@ class Repository(object): def paper_from_citekey(self, citekey, fatal=True): """Load a paper by its citekey from disk, if necessary.""" try: - return Paper.from_disc(citekey) + return Paper.load(citekey) except KeyError: if fatal: print('{}error{}: no paper with citekey {}{}{}'.format( @@ -70,6 +70,7 @@ class Repository(object): self.citekeys.append(p.citekey) # writing all to disk + # TODO Update by giving filename (17/12/2012) p.save_to_disc() files.save_papers(self.papers_config) print "Added: %s" % p.citekey @@ -80,7 +81,8 @@ class Repository(object): for k in bib_data.entries: sub_bib = type(bib_data)(preamble=bib_data._preamble) sub_bib.add_entry(k, bib_data.entries[k]) - name, meta = Paper.create_meta(sub_bib, pdfpath=None) + meta = Paper.create_meta(pdfpath=None) + name = meta['filename'] p = Paper(name, bib_data = sub_bib, metadata = meta) self.add_paper(p) diff --git a/tests/test_paper.py b/tests/test_paper.py new file mode 100644 index 0000000..af32e86 --- /dev/null +++ b/tests/test_paper.py @@ -0,0 +1,113 @@ +# -*- coding: utf-8 -*- +import os +import unittest +import tempfile +import shutil + +import yaml +from pybtex.database import Person + +from papers.paper import Paper +from papers import files + + +BIB = """ +entries: + Turing1950: + author: + - first: 'Alan' + last: 'Turing' + title: 'Computing machinery and intelligence.' + type: article + year: '1950' +""" +META = """ +filename: null +extension: null +notes: [] +path: null +""" + + +class TestCreateCitekey(unittest.TestCase): + + def test_fails_on_empty_paper(self): + paper = Paper() + with self.assertRaises(ValueError): + paper.generate_citekey() + + def test_escapes_chars(self): + paper = Paper() + paper.bibentry.persons['author'] = [ + Person(last=u'Z ôu\\@/', first='Zde'), + Person(string='John Doe')] + key = paper.generate_citekey() + self.assertEqual(key, 'Zou') + + def test_simple(self): + paper = Paper() + paper.bibentry.persons['author'] = [Person(string='John Doe')] + paper.bibentry.fields['year'] = '2001' + key = paper.generate_citekey() + self.assertEqual(key, 'Doe2001') + + +class TestSaveLoad(unittest.TestCase): + + def setUp(self): + self.tmpdir = tempfile.mkdtemp() + os.makedirs(os.path.join(self.tmpdir, 'bibdata')) + os.makedirs(os.path.join(self.tmpdir, 'meta')) + self.bibfile = os.path.join(self.tmpdir, 'bib.bibyaml') + with open(self.bibfile, 'w') as f: + f.write(BIB) + self.metafile = os.path.join(self.tmpdir, 'meta.meta') + with open(self.metafile, 'w') as f: + f.write(META) + self.turing1950 = Paper() + self.turing1950.bibentry.fields['title'] = u'Computing machinery and intelligence.' + self.turing1950.bibentry.fields['year'] = u'1950' + self.turing1950.bibentry.persons['author'] = [Person(u'Alan Turing')] + self.turing1950.citekey = self.turing1950.generate_citekey() + + def test_load_valid(self): + p = Paper.load(self.bibfile, self.metafile) + self.assertEqual(self.turing1950, p) + + def test_save_fails_with_no_citekey(self): + p = Paper() + with self.assertRaises(ValueError): + p.save_to_disc(self.tmpdir) + + def test_save_creates_bib(self): + self.turing1950.save_to_disc(self.tmpdir) + bibfile = files.path_to_paper_file('Turing1950', 'bib', + path_to_repo=self.tmpdir) + self.assertTrue(os.path.exists(bibfile)) + + def test_save_creates_meta(self): + self.turing1950.save_to_disc(self.tmpdir) + metafile = files.path_to_paper_file('Turing1950', 'meta', + path_to_repo=self.tmpdir) + self.assertTrue(os.path.exists(metafile)) + + def test_save_right_bib(self): + self.turing1950.save_to_disc(self.tmpdir) + bibfile = files.path_to_paper_file('Turing1950', 'bib', + path_to_repo=self.tmpdir) + with open(bibfile, 'r') as f: + written = yaml.load(f) + ok = yaml.load(BIB) + self.assertEqual(written, ok) + + def test_save_right_meta(self): + self.turing1950.save_to_disc(self.tmpdir) + metafile = files.path_to_paper_file('Turing1950', 'meta', + path_to_repo=self.tmpdir) + with open(metafile, 'r') as f: + written = yaml.load(f) + ok = yaml.load(META) + self.assertEqual(written, ok) + + def teardown(self): + shutil.rmtree(self.tmpdir)