Cleanup Paper class and adds test.

main
Olivier Mangin 12 years ago
parent f2e9625aac
commit 2d758ea16e

@ -1,12 +1,7 @@
import sys, os import os
import subprocess import subprocess
import tempfile import tempfile
try:
import ConfigParser as configparser
except ImportError:
import configparser
import yaml import yaml
import color import color
@ -24,87 +19,116 @@ try:
import pybtex.database.output.bibyaml import pybtex.database.output.bibyaml
except ImportError: except ImportError:
print '{}error{}: you need to install Pybtex; try running \'pip install pybtex\' or \'easy_install pybtex\''.format(color.red, color.end) print '{}error{}: you need to install Pybtex; try running \'pip install'
'pybtex\' or \'easy_install pybtex\''.format(color.red, color.end)
_papersdir = None _papersdir = None
try:
EDITOR = os.environ['EDITOR']
except KeyError:
EDITOR = 'nano'
def find_papersdir(): def find_papersdir():
"""Find .papers directory in this directory and the parent directories""" """Find .papers directory in this directory and the parent directories"""
global _papersdir global _papersdir
if _papersdir is None: if _papersdir is None:
curdir = os.path.abspath(os.getcwd()) curdir = os.path.abspath(os.getcwd())
while curdir != '': while curdir != '':
if os.path.exists(curdir + '/.papers') and os.path.isdir(curdir + '/.papers'): if (os.path.exists(curdir + '/.papers')
and os.path.isdir(curdir + '/.papers')):
_papersdir = curdir + '/.papers' _papersdir = curdir + '/.papers'
curdir = '' curdir = ''
if curdir == '/': if curdir == '/':
curdir = '' curdir = ''
else: else:
curdir = os.path.split(curdir)[0] curdir = os.path.split(curdir)[0]
if _papersdir is None: if _papersdir is None:
print '{}error{} : no papers repo found in this directory or in any parent directory.{}'.format( print '{}error{} : no papers repo found in this directory or in'
color.red, color.grey, color.end) 'any parent directory.{}'.format(color.red, color.grey, color.end)
exit(-1) exit(-1)
return _papersdir return _papersdir
def name_from_path(fullpdfpath, verbose = False):
def name_from_path(fullpdfpath, verbose=False):
name, ext = os.path.splitext(os.path.split(fullpdfpath)[1]) name, ext = os.path.splitext(os.path.split(fullpdfpath)[1])
if verbose: if verbose:
if ext != '.pdf' and ext != '.ps': if ext != '.pdf' and ext != '.ps':
print('{}warning{}: extension {}{}{} not recognized{}'.format( print('{}warning{}: extension {}{}{} not recognized{}'.format(
color.yellow, color.grey, color.cyan, ext, color.grey, color.end)) color.yellow, color.grey, color.cyan, ext, color.grey,
color.end))
return name, ext return name, ext
def check_file(filepath): def check_file(filepath):
if not os.path.exists(filepath): if not os.path.exists(filepath):
print '{}error{}: {}{}{} does not exists{}'.format( print '{}error{}: {}{}{} does not exists{}'.format(
color.red, color.grey, color.cyan, filepath, color.grey, color.end) color.red, color.grey, color.cyan, filepath, color.grey,
color.end)
exit(-1) exit(-1)
if not os.path.isfile(filepath): if not os.path.isfile(filepath):
print '{}error{}: {}{}{} is not a file{}'.format( print '{}error{}: {}{}{} is not a file{}'.format(
color.red, color.grey, color.cyan, filepath, color.grey, color.end) color.red, color.grey, color.cyan, filepath, color.grey,
color.end)
exit(-1) exit(-1)
# yaml I/O # yaml I/O
def write_yamlfile(filepath, datamap): def write_yamlfile(filepath, datamap):
try: try:
with open(filepath, 'w') as f: with open(filepath, 'w') as f:
yaml.dump(datamap, f) yaml.dump(datamap, f)
except IOError as e: except IOError:
print '{}error{} : impossible to read file {}{:s}{}'.format( print '{}error{} : impossible to read file {}{:s}{}'.format(
color.red, color.grey, color.cyan, filepath, color.end) color.red, color.grey, color.cyan, filepath, color.end)
exit(-1) exit(-1)
def read_yamlfile(filepath): def read_yamlfile(filepath):
check_file(filepath) check_file(filepath)
try: try:
with open(filepath, 'r') as f: with open(filepath, 'r') as f:
return yaml.load(f) return yaml.load(f)
except IOError as e: except IOError:
print '{}error{} : impossible to read file {}{:s}{}'.format( print '{}error{} : impossible to read file {}{:s}{}'.format(
color.red, color.grey, color.cyan, paperdir, color.end) color.red, color.grey, color.cyan, filepath, color.end)
exit(-1) exit(-1)
def save_papers(datamap): def save_papers(datamap):
paperyaml = find_papersdir() + os.sep + 'papers.yaml' paperyaml = find_papersdir() + os.sep + 'papers.yaml'
write_yamlfile(paperyaml, datamap) write_yamlfile(paperyaml, datamap)
def load_papers(): def load_papers():
paperyaml = find_papersdir() + os.sep + 'papers.yaml' paperyaml = os.path.join(find_papersdir(), 'papers.yaml')
return read_yamlfile(paperyaml) return read_yamlfile(paperyaml)
def save_meta(meta_data, filename):
filepath = os.path.join(find_papersdir(), 'meta', filename + '.meta') def path_to_paper_file(name, file_, path_to_repo=None):
if path_to_repo is None:
path_to_repo = find_papersdir()
if file_ == 'bib':
return os.path.join(path_to_repo, 'bibdata', name + '.bibyaml')
elif file_ == 'meta':
return os.path.join(path_to_repo, 'meta', name + '.meta')
else:
raise(ValueError, "%s is not a valid paper file." % file_)
def save_meta(meta_data, filename, path=None):
filepath = path_to_paper_file(filename, 'meta', path_to_repo=path)
write_yamlfile(filepath, meta_data) write_yamlfile(filepath, meta_data)
def load_meta(filename):
filepath = os.path.join(find_papersdir(), 'meta', filename + '.meta') def load_meta(filename, path=None):
filepath = path_to_paper_file(filename, 'meta', path_to_repo=path)
return read_yamlfile(filepath) return read_yamlfile(filepath)
# specific to bibliography data # specific to bibliography data
def load_externalbibfile(fullbibpath): def load_externalbibfile(fullbibpath):
@ -127,24 +151,22 @@ def load_externalbibfile(fullbibpath):
return bib_data return bib_data
def load_bibdata(filename):
fullbibpath = os.path.join(find_papersdir(), 'bibdata', filename + '.bibyaml')
return load_externalbibfile(fullbibpath)
def save_bibdata(bib_data, filename): def load_bibdata(filename, path=None):
filepath = os.path.join(find_papersdir(), 'bibdata', filename + '.bibyaml') filepath = path_to_paper_file(filename, 'bib', path_to_repo=path)
return load_externalbibfile(filepath)
def save_bibdata(bib_data, filename, path=None):
filepath = path_to_paper_file(filename, 'bib', path_to_repo=path)
with open(filepath, 'w') as f: with open(filepath, 'w') as f:
parser = pybtex.database.output.bibyaml.Writer() parser = pybtex.database.output.bibyaml.Writer()
parser.write_stream(bib_data, f) parser.write_stream(bib_data, f)
# vim input
try: # vim input
EDITOR = os.environ['EDITOR']
except KeyError:
EDITOR = 'nano'
def vim_input(initial = ""): def vim_input(initial=""):
"""Use an editor to get input""" """Use an editor to get input"""
with tempfile.NamedTemporaryFile(suffix=".tmp", delete=False) as temp_file: with tempfile.NamedTemporaryFile(suffix=".tmp", delete=False) as temp_file:

@ -2,11 +2,15 @@ import os
import unicodedata import unicodedata
import re import re
from pybtex.database import Entry, BibliographyData
import files import files
import color import color
import pretty import pretty
DEFAULT_TYPE = 'article'
CONTROL_CHARS = ''.join(map(unichr, range(0, 32) + range(127, 160))) CONTROL_CHARS = ''.join(map(unichr, range(0, 32) + range(127, 160)))
CITEKEY_FORBIDDEN_CHARS = '@\'\\,#}{~%/' # '/' is OK for bibtex but forbidden CITEKEY_FORBIDDEN_CHARS = '@\'\\,#}{~%/' # '/' is OK for bibtex but forbidden
# here since we transform citekeys into filenames # here since we transform citekeys into filenames
@ -15,7 +19,10 @@ CITEKEY_EXCLUDE_RE = re.compile('[%s]'
def str2citekey(s): def str2citekey(s):
return CITEKEY_EXCLUDE_RE.sub('', s) key = unicodedata.normalize('NFKD', unicode(s)).encode('ascii', 'ignore')
key = CITEKEY_EXCLUDE_RE.sub('', key)
# Normalize chars and remove non-ascii
return key
class NoDocumentFile(Exception): class NoDocumentFile(Exception):
@ -23,31 +30,54 @@ class NoDocumentFile(Exception):
class Paper(object): class Paper(object):
"""Paper class. The object is responsible for the integrity of its own data, """Paper class. The object is responsible for the integrity of its own
and for loading and writing it to disc. data, and for loading and writing it to disc.
The object uses a pybtex.database.BibliographyData object to store
biblography data and an additional dictionary to store meta data.
""" """
@classmethod @classmethod
def from_disc(cls, name, citekey = None): def load(cls, bibpath, metapath):
bib_data = files.load_bibdata(name) bib_data = files.load_externalbibfile(bibpath)
metadata = files.load_meta(name) metadata = files.read_yamlfile(metapath)
p = Paper(name, bib_data = bib_data, metadata = metadata, # Extract first entry (supposed to be the only one)
citekey = citekey) first_key = bib_data.entries.keys()[0]
first_entry = bib_data.entries[first_key]
p = Paper(bibentry=first_entry, metadata=metadata, citekey=first_key)
return p return p
@classmethod # @classmethod
def from_bibpdffiles(cls, pdfpath, bibpath): # def from_bibpdffiles(cls, pdfpath, bibpath):
bib_data = cls.import_bibdata(bibpath) # bib_data = cls.import_bibdata(bibpath)
name, meta = cls.create_meta(bib_data, pdfpath=pdfpath) # name, meta = cls.create_meta(bib_data, pdfpath=pdfpath)
p = Paper(name, bib_data = bib_data, metadata = meta) # p = Paper(name, bib_data = bib_data, metadata = meta)
#
# return p
def __init__(self, bibentry=None, metadata=None, citekey=None):
if not bibentry:
bibentry = Entry(DEFAULT_TYPE)
self.bibentry = bibentry
if not metadata:
metadata = Paper.create_meta()
self.metadata = metadata
self.citekey = citekey
return p def __eq__(self, other):
return (type(other) is Paper
and self.bibentry == other.bibentry
and self.metadata == other.metadata
and self.citekey == other.citekey)
def __init__(self, bib_data = None, metadata = None, def __repr__(self):
citekey = None): return 'Paper(%s, %s, %s)' % (
self.citekey = citekey self.citekey, self.bibentry, self.metadata)
self.bib_data = bib_data
self.metadata = metadata def __str__(self):
return self.__repr__()
# TODO add mechanism to verify keys (15/12/2012)
def has_file(self): def has_file(self):
"""Whether there exist a document file for this entry. """Whether there exist a document file for this entry.
@ -70,68 +100,53 @@ class Paper(object):
KeyError if no author nor editor is defined. KeyError if no author nor editor is defined.
""" """
author_key = 'author' author_key = 'author'
if not 'author' in self.bib_data.persons: if not 'author' in self.bibentry.persons:
author_key = 'editor' author_key = 'editor'
try: try:
first_author = self.bib_data.persons[author_key][0] first_author = self.bibentry.persons[author_key][0]
except KeyError: except KeyError:
raise(ValueError, raise(ValueError,
'No author or editor defined: cannot generate a citekey.') 'No author or editor defined: cannot generate a citekey.')
try: try:
year = entry.fields['year'] year = self.bibentry.fields['year']
except KeyError: except KeyError:
year = '' year = ''
prefix = u'{}{}'.format(first_author.last()[0][:6], year) citekey = u'{}{}'.format(u''.join(first_author.last()), year)
prefix = str2citekey(prefix) return str2citekey(citekey)
# Normalize chars and remove non-ascii
prefix = unicodedata.normalize('NFKD', prefix def save_to_disc(self, path):
).encode('ascii', 'ignore') """Creates a BibliographyData object containing a single entry and
letter = 0 saves it to disc.
citekey = prefix """
while citekey in self.citekeys and citekey not in allowed: if self.citekey is None:
citekey = prefix + ALPHABET[letter] raise(ValueError,
letter += 1 'No valid citekey initialized. Cannot save paper')
return citekey bibdata = BibliographyData(entries={self.citekey: self.bibentry})
files.save_bibdata(bibdata, self.citekey, path=path)
files.save_meta(self.metadata, self.citekey, path=path)
def save_to_disc(self):
files.save_bibdata(self.bib_data, self.citekey)
files.save_meta(self.metadata, self.citekey)
# TODO move to repo # TODO move to repo
@classmethod @classmethod
def import_bibdata(cls, bibfile): def import_bibdata(cls, bibfile):
"""Import bibligraphic data from a .bibyaml, .bib or .bibtex file""" """Import bibligraphic data from a .bibyaml, .bib or .bibtex file"""
fullbibpath = os.path.abspath(bibfile) fullbibpath = os.path.abspath(bibfile)
bib_data = files.load_externalbibfile(fullbibpath) bib_data = files.load_externalbibfile(fullbibpath)
print('{}bibliographic data present in {}{}{}'.format( print('{}bibliographic data present in {}{}{}'.format(
color.grey, color.cyan, bibfile, color.end)) color.grey, color.cyan, bibfile, color.end))
print(pretty.bib_desc(bib_data)) print(pretty.bib_desc(bib_data))
return bib_data return bib_data
@classmethod @classmethod
def create_meta(cls, bib_data, pdfpath=None): def create_meta(cls, pdfpath=None):
if pdfpath is None: if pdfpath is None:
citekey = bib_data.entries.keys()[0] name, fullpdfpath, ext = None, None, None
# TODO this introduces a bug and a security issue since the name
# is used to generate a file name that is written. It should be
# escaped here. (22/10/2012)
fullpdfpath, ext = None, None
else: else:
fullpdfpath = os.path.abspath(pdfpath) fullpdfpath = os.path.abspath(pdfpath)
files.check_file(fullpdfpath) files.check_file(fullpdfpath)
name, ext = files.name_from_path(pdfpath) name, ext = files.name_from_path(pdfpath)
meta = {} meta = {}
meta['filename'] = name # TODO remove ?
meta['name'] = name
meta['extension'] = ext meta['extension'] = ext
meta['path'] = fullpdfpath meta['path'] = fullpdfpath
meta['notes'] = [] meta['notes'] = []
return meta
return name, meta

@ -34,7 +34,7 @@ class Repository(object):
def paper_from_citekey(self, citekey, fatal=True): def paper_from_citekey(self, citekey, fatal=True):
"""Load a paper by its citekey from disk, if necessary.""" """Load a paper by its citekey from disk, if necessary."""
try: try:
return Paper.from_disc(citekey) return Paper.load(citekey)
except KeyError: except KeyError:
if fatal: if fatal:
print('{}error{}: no paper with citekey {}{}{}'.format( print('{}error{}: no paper with citekey {}{}{}'.format(
@ -70,6 +70,7 @@ class Repository(object):
self.citekeys.append(p.citekey) self.citekeys.append(p.citekey)
# writing all to disk # writing all to disk
# TODO Update by giving filename (17/12/2012)
p.save_to_disc() p.save_to_disc()
files.save_papers(self.papers_config) files.save_papers(self.papers_config)
print "Added: %s" % p.citekey print "Added: %s" % p.citekey
@ -80,7 +81,8 @@ class Repository(object):
for k in bib_data.entries: for k in bib_data.entries:
sub_bib = type(bib_data)(preamble=bib_data._preamble) sub_bib = type(bib_data)(preamble=bib_data._preamble)
sub_bib.add_entry(k, bib_data.entries[k]) sub_bib.add_entry(k, bib_data.entries[k])
name, meta = Paper.create_meta(sub_bib, pdfpath=None) meta = Paper.create_meta(pdfpath=None)
name = meta['filename']
p = Paper(name, bib_data = sub_bib, metadata = meta) p = Paper(name, bib_data = sub_bib, metadata = meta)
self.add_paper(p) self.add_paper(p)

@ -0,0 +1,113 @@
# -*- coding: utf-8 -*-
import os
import unittest
import tempfile
import shutil
import yaml
from pybtex.database import Person
from papers.paper import Paper
from papers import files
BIB = """
entries:
Turing1950:
author:
- first: 'Alan'
last: 'Turing'
title: 'Computing machinery and intelligence.'
type: article
year: '1950'
"""
META = """
filename: null
extension: null
notes: []
path: null
"""
class TestCreateCitekey(unittest.TestCase):
def test_fails_on_empty_paper(self):
paper = Paper()
with self.assertRaises(ValueError):
paper.generate_citekey()
def test_escapes_chars(self):
paper = Paper()
paper.bibentry.persons['author'] = [
Person(last=u'Z ôu\\@/', first='Zde'),
Person(string='John Doe')]
key = paper.generate_citekey()
self.assertEqual(key, 'Zou')
def test_simple(self):
paper = Paper()
paper.bibentry.persons['author'] = [Person(string='John Doe')]
paper.bibentry.fields['year'] = '2001'
key = paper.generate_citekey()
self.assertEqual(key, 'Doe2001')
class TestSaveLoad(unittest.TestCase):
def setUp(self):
self.tmpdir = tempfile.mkdtemp()
os.makedirs(os.path.join(self.tmpdir, 'bibdata'))
os.makedirs(os.path.join(self.tmpdir, 'meta'))
self.bibfile = os.path.join(self.tmpdir, 'bib.bibyaml')
with open(self.bibfile, 'w') as f:
f.write(BIB)
self.metafile = os.path.join(self.tmpdir, 'meta.meta')
with open(self.metafile, 'w') as f:
f.write(META)
self.turing1950 = Paper()
self.turing1950.bibentry.fields['title'] = u'Computing machinery and intelligence.'
self.turing1950.bibentry.fields['year'] = u'1950'
self.turing1950.bibentry.persons['author'] = [Person(u'Alan Turing')]
self.turing1950.citekey = self.turing1950.generate_citekey()
def test_load_valid(self):
p = Paper.load(self.bibfile, self.metafile)
self.assertEqual(self.turing1950, p)
def test_save_fails_with_no_citekey(self):
p = Paper()
with self.assertRaises(ValueError):
p.save_to_disc(self.tmpdir)
def test_save_creates_bib(self):
self.turing1950.save_to_disc(self.tmpdir)
bibfile = files.path_to_paper_file('Turing1950', 'bib',
path_to_repo=self.tmpdir)
self.assertTrue(os.path.exists(bibfile))
def test_save_creates_meta(self):
self.turing1950.save_to_disc(self.tmpdir)
metafile = files.path_to_paper_file('Turing1950', 'meta',
path_to_repo=self.tmpdir)
self.assertTrue(os.path.exists(metafile))
def test_save_right_bib(self):
self.turing1950.save_to_disc(self.tmpdir)
bibfile = files.path_to_paper_file('Turing1950', 'bib',
path_to_repo=self.tmpdir)
with open(bibfile, 'r') as f:
written = yaml.load(f)
ok = yaml.load(BIB)
self.assertEqual(written, ok)
def test_save_right_meta(self):
self.turing1950.save_to_disc(self.tmpdir)
metafile = files.path_to_paper_file('Turing1950', 'meta',
path_to_repo=self.tmpdir)
with open(metafile, 'r') as f:
written = yaml.load(f)
ok = yaml.load(META)
self.assertEqual(written, ok)
def teardown(self):
shutil.rmtree(self.tmpdir)
Loading…
Cancel
Save