Cleanup Paper class and adds test.

main
Olivier Mangin 12 years ago
parent f2e9625aac
commit 2d758ea16e

@ -1,12 +1,7 @@
import sys, os
import os
import subprocess
import tempfile
try:
import ConfigParser as configparser
except ImportError:
import configparser
import yaml
import color
@ -24,87 +19,116 @@ try:
import pybtex.database.output.bibyaml
except ImportError:
print '{}error{}: you need to install Pybtex; try running \'pip install pybtex\' or \'easy_install pybtex\''.format(color.red, color.end)
print '{}error{}: you need to install Pybtex; try running \'pip install'
'pybtex\' or \'easy_install pybtex\''.format(color.red, color.end)
_papersdir = None
try:
EDITOR = os.environ['EDITOR']
except KeyError:
EDITOR = 'nano'
def find_papersdir():
"""Find .papers directory in this directory and the parent directories"""
global _papersdir
if _papersdir is None:
curdir = os.path.abspath(os.getcwd())
while curdir != '':
if os.path.exists(curdir + '/.papers') and os.path.isdir(curdir + '/.papers'):
if (os.path.exists(curdir + '/.papers')
and os.path.isdir(curdir + '/.papers')):
_papersdir = curdir + '/.papers'
curdir = ''
if curdir == '/':
curdir = ''
else:
curdir = os.path.split(curdir)[0]
if _papersdir is None:
print '{}error{} : no papers repo found in this directory or in any parent directory.{}'.format(
color.red, color.grey, color.end)
print '{}error{} : no papers repo found in this directory or in'
'any parent directory.{}'.format(color.red, color.grey, color.end)
exit(-1)
return _papersdir
def name_from_path(fullpdfpath, verbose=False):
name, ext = os.path.splitext(os.path.split(fullpdfpath)[1])
if verbose:
if ext != '.pdf' and ext != '.ps':
print('{}warning{}: extension {}{}{} not recognized{}'.format(
color.yellow, color.grey, color.cyan, ext, color.grey, color.end))
color.yellow, color.grey, color.cyan, ext, color.grey,
color.end))
return name, ext
def check_file(filepath):
if not os.path.exists(filepath):
print '{}error{}: {}{}{} does not exists{}'.format(
color.red, color.grey, color.cyan, filepath, color.grey, color.end)
color.red, color.grey, color.cyan, filepath, color.grey,
color.end)
exit(-1)
if not os.path.isfile(filepath):
print '{}error{}: {}{}{} is not a file{}'.format(
color.red, color.grey, color.cyan, filepath, color.grey, color.end)
color.red, color.grey, color.cyan, filepath, color.grey,
color.end)
exit(-1)
# yaml I/O
def write_yamlfile(filepath, datamap):
try:
with open(filepath, 'w') as f:
yaml.dump(datamap, f)
except IOError as e:
except IOError:
print '{}error{} : impossible to read file {}{:s}{}'.format(
color.red, color.grey, color.cyan, filepath, color.end)
exit(-1)
def read_yamlfile(filepath):
check_file(filepath)
try:
with open(filepath, 'r') as f:
return yaml.load(f)
except IOError as e:
except IOError:
print '{}error{} : impossible to read file {}{:s}{}'.format(
color.red, color.grey, color.cyan, paperdir, color.end)
color.red, color.grey, color.cyan, filepath, color.end)
exit(-1)
def save_papers(datamap):
paperyaml = find_papersdir() + os.sep + 'papers.yaml'
write_yamlfile(paperyaml, datamap)
def load_papers():
paperyaml = find_papersdir() + os.sep + 'papers.yaml'
paperyaml = os.path.join(find_papersdir(), 'papers.yaml')
return read_yamlfile(paperyaml)
def save_meta(meta_data, filename):
filepath = os.path.join(find_papersdir(), 'meta', filename + '.meta')
def path_to_paper_file(name, file_, path_to_repo=None):
if path_to_repo is None:
path_to_repo = find_papersdir()
if file_ == 'bib':
return os.path.join(path_to_repo, 'bibdata', name + '.bibyaml')
elif file_ == 'meta':
return os.path.join(path_to_repo, 'meta', name + '.meta')
else:
raise(ValueError, "%s is not a valid paper file." % file_)
def save_meta(meta_data, filename, path=None):
filepath = path_to_paper_file(filename, 'meta', path_to_repo=path)
write_yamlfile(filepath, meta_data)
def load_meta(filename):
filepath = os.path.join(find_papersdir(), 'meta', filename + '.meta')
def load_meta(filename, path=None):
filepath = path_to_paper_file(filename, 'meta', path_to_repo=path)
return read_yamlfile(filepath)
# specific to bibliography data
def load_externalbibfile(fullbibpath):
@ -127,22 +151,20 @@ def load_externalbibfile(fullbibpath):
return bib_data
def load_bibdata(filename):
fullbibpath = os.path.join(find_papersdir(), 'bibdata', filename + '.bibyaml')
return load_externalbibfile(fullbibpath)
def save_bibdata(bib_data, filename):
filepath = os.path.join(find_papersdir(), 'bibdata', filename + '.bibyaml')
def load_bibdata(filename, path=None):
filepath = path_to_paper_file(filename, 'bib', path_to_repo=path)
return load_externalbibfile(filepath)
def save_bibdata(bib_data, filename, path=None):
filepath = path_to_paper_file(filename, 'bib', path_to_repo=path)
with open(filepath, 'w') as f:
parser = pybtex.database.output.bibyaml.Writer()
parser.write_stream(bib_data, f)
# vim input
try:
EDITOR = os.environ['EDITOR']
except KeyError:
EDITOR = 'nano'
# vim input
def vim_input(initial=""):
"""Use an editor to get input"""

@ -2,11 +2,15 @@ import os
import unicodedata
import re
from pybtex.database import Entry, BibliographyData
import files
import color
import pretty
DEFAULT_TYPE = 'article'
CONTROL_CHARS = ''.join(map(unichr, range(0, 32) + range(127, 160)))
CITEKEY_FORBIDDEN_CHARS = '@\'\\,#}{~%/' # '/' is OK for bibtex but forbidden
# here since we transform citekeys into filenames
@ -15,7 +19,10 @@ CITEKEY_EXCLUDE_RE = re.compile('[%s]'
def str2citekey(s):
return CITEKEY_EXCLUDE_RE.sub('', s)
key = unicodedata.normalize('NFKD', unicode(s)).encode('ascii', 'ignore')
key = CITEKEY_EXCLUDE_RE.sub('', key)
# Normalize chars and remove non-ascii
return key
class NoDocumentFile(Exception):
@ -23,31 +30,54 @@ class NoDocumentFile(Exception):
class Paper(object):
"""Paper class. The object is responsible for the integrity of its own data,
and for loading and writing it to disc.
"""Paper class. The object is responsible for the integrity of its own
data, and for loading and writing it to disc.
The object uses a pybtex.database.BibliographyData object to store
biblography data and an additional dictionary to store meta data.
"""
@classmethod
def from_disc(cls, name, citekey = None):
bib_data = files.load_bibdata(name)
metadata = files.load_meta(name)
p = Paper(name, bib_data = bib_data, metadata = metadata,
citekey = citekey)
def load(cls, bibpath, metapath):
bib_data = files.load_externalbibfile(bibpath)
metadata = files.read_yamlfile(metapath)
# Extract first entry (supposed to be the only one)
first_key = bib_data.entries.keys()[0]
first_entry = bib_data.entries[first_key]
p = Paper(bibentry=first_entry, metadata=metadata, citekey=first_key)
return p
@classmethod
def from_bibpdffiles(cls, pdfpath, bibpath):
bib_data = cls.import_bibdata(bibpath)
name, meta = cls.create_meta(bib_data, pdfpath=pdfpath)
p = Paper(name, bib_data = bib_data, metadata = meta)
# @classmethod
# def from_bibpdffiles(cls, pdfpath, bibpath):
# bib_data = cls.import_bibdata(bibpath)
# name, meta = cls.create_meta(bib_data, pdfpath=pdfpath)
# p = Paper(name, bib_data = bib_data, metadata = meta)
#
# return p
def __init__(self, bibentry=None, metadata=None, citekey=None):
if not bibentry:
bibentry = Entry(DEFAULT_TYPE)
self.bibentry = bibentry
if not metadata:
metadata = Paper.create_meta()
self.metadata = metadata
self.citekey = citekey
return p
def __eq__(self, other):
return (type(other) is Paper
and self.bibentry == other.bibentry
and self.metadata == other.metadata
and self.citekey == other.citekey)
def __init__(self, bib_data = None, metadata = None,
citekey = None):
self.citekey = citekey
self.bib_data = bib_data
self.metadata = metadata
def __repr__(self):
return 'Paper(%s, %s, %s)' % (
self.citekey, self.bibentry, self.metadata)
def __str__(self):
return self.__repr__()
# TODO add mechanism to verify keys (15/12/2012)
def has_file(self):
"""Whether there exist a document file for this entry.
@ -70,68 +100,53 @@ class Paper(object):
KeyError if no author nor editor is defined.
"""
author_key = 'author'
if not 'author' in self.bib_data.persons:
if not 'author' in self.bibentry.persons:
author_key = 'editor'
try:
first_author = self.bib_data.persons[author_key][0]
first_author = self.bibentry.persons[author_key][0]
except KeyError:
raise(ValueError,
'No author or editor defined: cannot generate a citekey.')
try:
year = entry.fields['year']
year = self.bibentry.fields['year']
except KeyError:
year = ''
prefix = u'{}{}'.format(first_author.last()[0][:6], year)
prefix = str2citekey(prefix)
# Normalize chars and remove non-ascii
prefix = unicodedata.normalize('NFKD', prefix
).encode('ascii', 'ignore')
letter = 0
citekey = prefix
while citekey in self.citekeys and citekey not in allowed:
citekey = prefix + ALPHABET[letter]
letter += 1
return citekey
def save_to_disc(self):
files.save_bibdata(self.bib_data, self.citekey)
files.save_meta(self.metadata, self.citekey)
citekey = u'{}{}'.format(u''.join(first_author.last()), year)
return str2citekey(citekey)
def save_to_disc(self, path):
"""Creates a BibliographyData object containing a single entry and
saves it to disc.
"""
if self.citekey is None:
raise(ValueError,
'No valid citekey initialized. Cannot save paper')
bibdata = BibliographyData(entries={self.citekey: self.bibentry})
files.save_bibdata(bibdata, self.citekey, path=path)
files.save_meta(self.metadata, self.citekey, path=path)
# TODO move to repo
@classmethod
def import_bibdata(cls, bibfile):
"""Import bibligraphic data from a .bibyaml, .bib or .bibtex file"""
fullbibpath = os.path.abspath(bibfile)
bib_data = files.load_externalbibfile(fullbibpath)
print('{}bibliographic data present in {}{}{}'.format(
color.grey, color.cyan, bibfile, color.end))
print(pretty.bib_desc(bib_data))
return bib_data
@classmethod
def create_meta(cls, bib_data, pdfpath=None):
def create_meta(cls, pdfpath=None):
if pdfpath is None:
citekey = bib_data.entries.keys()[0]
# TODO this introduces a bug and a security issue since the name
# is used to generate a file name that is written. It should be
# escaped here. (22/10/2012)
fullpdfpath, ext = None, None
name, fullpdfpath, ext = None, None, None
else:
fullpdfpath = os.path.abspath(pdfpath)
files.check_file(fullpdfpath)
name, ext = files.name_from_path(pdfpath)
meta = {}
meta['name'] = name
meta['filename'] = name # TODO remove ?
meta['extension'] = ext
meta['path'] = fullpdfpath
meta['notes'] = []
return name, meta
return meta

@ -34,7 +34,7 @@ class Repository(object):
def paper_from_citekey(self, citekey, fatal=True):
"""Load a paper by its citekey from disk, if necessary."""
try:
return Paper.from_disc(citekey)
return Paper.load(citekey)
except KeyError:
if fatal:
print('{}error{}: no paper with citekey {}{}{}'.format(
@ -70,6 +70,7 @@ class Repository(object):
self.citekeys.append(p.citekey)
# writing all to disk
# TODO Update by giving filename (17/12/2012)
p.save_to_disc()
files.save_papers(self.papers_config)
print "Added: %s" % p.citekey
@ -80,7 +81,8 @@ class Repository(object):
for k in bib_data.entries:
sub_bib = type(bib_data)(preamble=bib_data._preamble)
sub_bib.add_entry(k, bib_data.entries[k])
name, meta = Paper.create_meta(sub_bib, pdfpath=None)
meta = Paper.create_meta(pdfpath=None)
name = meta['filename']
p = Paper(name, bib_data = sub_bib, metadata = meta)
self.add_paper(p)

@ -0,0 +1,113 @@
# -*- coding: utf-8 -*-
import os
import unittest
import tempfile
import shutil
import yaml
from pybtex.database import Person
from papers.paper import Paper
from papers import files
BIB = """
entries:
Turing1950:
author:
- first: 'Alan'
last: 'Turing'
title: 'Computing machinery and intelligence.'
type: article
year: '1950'
"""
META = """
filename: null
extension: null
notes: []
path: null
"""
class TestCreateCitekey(unittest.TestCase):
def test_fails_on_empty_paper(self):
paper = Paper()
with self.assertRaises(ValueError):
paper.generate_citekey()
def test_escapes_chars(self):
paper = Paper()
paper.bibentry.persons['author'] = [
Person(last=u'Z ôu\\@/', first='Zde'),
Person(string='John Doe')]
key = paper.generate_citekey()
self.assertEqual(key, 'Zou')
def test_simple(self):
paper = Paper()
paper.bibentry.persons['author'] = [Person(string='John Doe')]
paper.bibentry.fields['year'] = '2001'
key = paper.generate_citekey()
self.assertEqual(key, 'Doe2001')
class TestSaveLoad(unittest.TestCase):
def setUp(self):
self.tmpdir = tempfile.mkdtemp()
os.makedirs(os.path.join(self.tmpdir, 'bibdata'))
os.makedirs(os.path.join(self.tmpdir, 'meta'))
self.bibfile = os.path.join(self.tmpdir, 'bib.bibyaml')
with open(self.bibfile, 'w') as f:
f.write(BIB)
self.metafile = os.path.join(self.tmpdir, 'meta.meta')
with open(self.metafile, 'w') as f:
f.write(META)
self.turing1950 = Paper()
self.turing1950.bibentry.fields['title'] = u'Computing machinery and intelligence.'
self.turing1950.bibentry.fields['year'] = u'1950'
self.turing1950.bibentry.persons['author'] = [Person(u'Alan Turing')]
self.turing1950.citekey = self.turing1950.generate_citekey()
def test_load_valid(self):
p = Paper.load(self.bibfile, self.metafile)
self.assertEqual(self.turing1950, p)
def test_save_fails_with_no_citekey(self):
p = Paper()
with self.assertRaises(ValueError):
p.save_to_disc(self.tmpdir)
def test_save_creates_bib(self):
self.turing1950.save_to_disc(self.tmpdir)
bibfile = files.path_to_paper_file('Turing1950', 'bib',
path_to_repo=self.tmpdir)
self.assertTrue(os.path.exists(bibfile))
def test_save_creates_meta(self):
self.turing1950.save_to_disc(self.tmpdir)
metafile = files.path_to_paper_file('Turing1950', 'meta',
path_to_repo=self.tmpdir)
self.assertTrue(os.path.exists(metafile))
def test_save_right_bib(self):
self.turing1950.save_to_disc(self.tmpdir)
bibfile = files.path_to_paper_file('Turing1950', 'bib',
path_to_repo=self.tmpdir)
with open(bibfile, 'r') as f:
written = yaml.load(f)
ok = yaml.load(BIB)
self.assertEqual(written, ok)
def test_save_right_meta(self):
self.turing1950.save_to_disc(self.tmpdir)
metafile = files.path_to_paper_file('Turing1950', 'meta',
path_to_repo=self.tmpdir)
with open(metafile, 'r') as f:
written = yaml.load(f)
ok = yaml.load(META)
self.assertEqual(written, ok)
def teardown(self):
shutil.rmtree(self.tmpdir)
Loading…
Cancel
Save