Work in progress: improves UTF-8 support, cleans paper creation.

main
Olivier Mangin 12 years ago
parent a779fc57fa
commit f2e9625aac

@ -2,5 +2,3 @@ A paper correspond to 3 files :
name.pdf a pdf or ps file, the paper itself, whose location is arbitrary
bibdata/name.bibyaml a bibyaml file with all bibliographic data.
meta/name.meta a metadata file for internal use, notes, citekeys, status, etc.

@ -22,14 +22,11 @@ def command(config):
print('{}initializing papers in {}{}{}'.format(
color.grey, color.cyan, papersdir, color.end))
os.makedirs(papersdir)
os.makedirs(papersdir+os.sep+'bibdata')
os.makedirs(papersdir+os.sep+'meta')
os.makedirs(os.path.join(papersdir, 'bibdata'))
os.makedirs(os.path.join(papersdir, 'meta'))
papers = {}
papers['count'] = 0
papers['citekeys'] = {}
papers['numbers'] = {}
papers['citekeys'] = []
files.save_papers(papers)
else:

@ -13,10 +13,10 @@ def command(config):
rp = repo.Repository()
articles = []
for n in sorted(rp.numbers.keys()):
paper = rp.paper_from_number(n, fatal = True)
for n in range(rp.size()):
paper = rp.paper_from_number(n, fatal=True)
bibdesc = pretty.bib_oneliner(paper.bib_data)
articles.append(u'{:3d} {}{}{}{} {}'.format(int(paper.number), color.purple, paper.citekey, color.end, (8-len(paper.citekey))*' ', bibdesc))
articles.append((u'{:3d} {}{}{}{} {}'.format(int(paper.number), color.purple, paper.citekey, color.end, (10 - len(paper.citekey))*' ', bibdesc)).encode('utf-8'))
with tempfile.NamedTemporaryFile(suffix=".tmp", delete=True) as tmpf:
tmpf.write('\n'.join(articles))

@ -1,4 +1,3 @@
import sys, os
import subprocess
import tempfile
@ -99,11 +98,11 @@ def load_papers():
return read_yamlfile(paperyaml)
def save_meta(meta_data, filename):
filepath = find_papersdir() + os.sep + 'meta' + os.sep + filename + '.meta'
filepath = os.path.join(find_papersdir(), 'meta', filename + '.meta')
write_yamlfile(filepath, meta_data)
def load_meta(filename):
filepath = find_papersdir() + os.sep + 'meta' + os.sep + filename + '.meta'
filepath = os.path.join(find_papersdir(), 'meta', filename + '.meta')
return read_yamlfile(filepath)
# specific to bibliography data
@ -129,11 +128,11 @@ def load_externalbibfile(fullbibpath):
return bib_data
def load_bibdata(filename):
fullbibpath = find_papersdir() + os.sep + 'bibdata' + os.sep + filename + '.bibyaml'
fullbibpath = os.path.join(find_papersdir(), 'bibdata', filename + '.bibyaml')
return load_externalbibfile(fullbibpath)
def save_bibdata(bib_data, filename):
filepath = find_papersdir() + os.sep + 'bibdata' + os.sep + filename + '.bibyaml'
filepath = os.path.join(find_papersdir(), 'bibdata', filename + '.bibyaml')
with open(filepath, 'w') as f:
parser = pybtex.database.output.bibyaml.Writer()
parser.write_stream(bib_data, f)

@ -1,10 +1,23 @@
import os
import unicodedata
import re
import files
import color
import pretty
CONTROL_CHARS = ''.join(map(unichr, range(0, 32) + range(127, 160)))
CITEKEY_FORBIDDEN_CHARS = '@\'\\,#}{~%/' # '/' is OK for bibtex but forbidden
# here since we transform citekeys into filenames
CITEKEY_EXCLUDE_RE = re.compile('[%s]'
% re.escape(CONTROL_CHARS + CITEKEY_FORBIDDEN_CHARS))
def str2citekey(s):
return CITEKEY_EXCLUDE_RE.sub('', s)
class NoDocumentFile(Exception):
pass
@ -15,11 +28,11 @@ class Paper(object):
"""
@classmethod
def from_disc(cls, name, citekey = None, number = None):
def from_disc(cls, name, citekey = None):
bib_data = files.load_bibdata(name)
metadata = files.load_meta(name)
p = Paper(name, bib_data = bib_data, metadata = metadata,
citekey = citekey, number = number)
p = Paper(name, bib_data = bib_data, metadata = metadata,
citekey = citekey)
return p
@classmethod
@ -27,16 +40,14 @@ class Paper(object):
bib_data = cls.import_bibdata(bibpath)
name, meta = cls.create_meta(bib_data, pdfpath=pdfpath)
p = Paper(name, bib_data = bib_data, metadata = meta)
return p
def __init__(self, name, bib_data = None, metadata = None,
citekey = None, number = None):
self.name = name
return p
def __init__(self, bib_data = None, metadata = None,
citekey = None):
self.citekey = citekey
self.bib_data = bib_data
self.metadata = metadata
self.citekey = citekey
self.number = number
def has_file(self):
"""Whether there exist a document file for this entry.
@ -51,28 +62,60 @@ class Paper(object):
def check_file(self):
return files.check_file(self.get_file_path())
def generate_citekey(self):
"""Generate a citekey from bib_data.
Raises:
KeyError if no author nor editor is defined.
"""
author_key = 'author'
if not 'author' in self.bib_data.persons:
author_key = 'editor'
try:
first_author = self.bib_data.persons[author_key][0]
except KeyError:
raise(ValueError,
'No author or editor defined: cannot generate a citekey.')
try:
year = entry.fields['year']
except KeyError:
year = ''
prefix = u'{}{}'.format(first_author.last()[0][:6], year)
prefix = str2citekey(prefix)
# Normalize chars and remove non-ascii
prefix = unicodedata.normalize('NFKD', prefix
).encode('ascii', 'ignore')
letter = 0
citekey = prefix
while citekey in self.citekeys and citekey not in allowed:
citekey = prefix + ALPHABET[letter]
letter += 1
return citekey
def save_to_disc(self):
files.save_bibdata(self.bib_data, self.name)
files.save_meta(self.metadata, self.name)
files.save_bibdata(self.bib_data, self.citekey)
files.save_meta(self.metadata, self.citekey)
# TODO move to repo
@classmethod
def import_bibdata(cls, bibfile):
def import_bibdata(cls, bibfile):
"""Import bibligraphic data from a .bibyaml, .bib or .bibtex file"""
fullbibpath = os.path.abspath(bibfile)
bib_data = files.load_externalbibfile(fullbibpath)
print('{}bibliographic data present in {}{}{}'.format(
color.grey, color.cyan, bibfile, color.end))
print(pretty.bib_desc(bib_data))
return bib_data
@classmethod
def create_meta(cls, bib_data, pdfpath=None):
if pdfpath is None:
name = bib_data.entries.keys()[0]
citekey = bib_data.entries.keys()[0]
# TODO this introduces a bug and a security issue since the name
# is used to generate a file name that is written. It should be
# escaped here. (22/10/2012)
@ -80,15 +123,15 @@ class Paper(object):
else:
fullpdfpath = os.path.abspath(pdfpath)
files.check_file(fullpdfpath)
name, ext = files.name_from_path(pdfpath)
meta = {}
meta['name'] = name
meta['extension'] = ext
meta['path'] = fullpdfpath
meta['notes'] = []
return name, meta

@ -1,4 +1,5 @@
#!/usr/bin/env python2
# -*- coding:utf-8 -*-
import argparse
import collections
@ -7,7 +8,7 @@ from papers import commands
cmds = collections.OrderedDict([
('init', commands.init_cmd),
('add' , commands.add_cmd),
('add', commands.add_cmd),
('add_library', commands.add_library_cmd),
('list', commands.list_cmd),
('open', commands.open_cmd),

@ -3,11 +3,11 @@
import color
def person_repr(p):
return u' '.join(s for s in [u' '.join(p.first(abbr = True)),
u' '.join(p.middle(abbr = True)),
u' '.join(p.prelast(abbr = False)),
u' '.join(p.last(abbr = False)),
u' '.join(p.lineage(abbr = True))] if s)
return ' '.join(s for s in [' '.join(p.first(abbr = True)),
' '.join(p.middle(abbr = True)),
' '.join(p.prelast(abbr = False)),
' '.join(p.last(abbr = False)),
' '.join(p.lineage(abbr = True))] if s)
def bib_oneliner(bib_data):
article = bib_data.entries[list(bib_data.entries.keys())[0]]
@ -26,7 +26,7 @@ def bib_oneliner(bib_data):
def bib_desc(bib_data):
article = bib_data.entries[list(bib_data.entries.keys())[0]]
s = u'\n'.join(u'author: {}'.format(person_repr(p)) for p in article.persons['author'])
s += u'\n'
s += u'\n'.join(u'{}: {}'.format(k, v) for k, v in article.fields.items())
s = '\n'.join('author: {}'.format(person_repr(p)) for p in article.persons['author'])
s += '\n'
s += '\n'.join('{}: {}'.format(k, v) for k, v in article.fields.items())
return s

@ -3,24 +3,26 @@ import color
from paper import Paper
alphabet = 'abcdefghijklmopqrstuvwxyz'
ALPHABET = 'abcdefghijklmopqrstuvwxyz'
class Repository(object):
def __init__(self):
self.paperdir = files.find_papersdir()
def __init__(self, paperdir=None):
if paperdir:
self.paperdir = paperdir
else:
self.paperdir = files.find_papersdir()
self.papers_config = files.load_papers()
self.citekeys = self.papers_config['citekeys']
self.numbers = self.papers_config['numbers']
# loading existing papers
def paper_from_number(self, number, fatal = True):
try:
citekey = self.numbers[int(number)]
citekey = self.citekeys[int(number)]
paper = self.paper_from_citekey(citekey)
paper.number = int(number)
return paper
except KeyError:
if fatal:
@ -29,13 +31,10 @@ class Repository(object):
exit(-1)
raise IOError, 'file not found'
def paper_from_citekey(self, citekey, fatal = True):
def paper_from_citekey(self, citekey, fatal=True):
"""Load a paper by its citekey from disk, if necessary."""
try:
name = self.citekeys[citekey]
paper = Paper.from_disc(name, citekey = citekey)
paper.citekey = citekey
return paper
return Paper.from_disc(citekey)
except KeyError:
if fatal:
print('{}error{}: no paper with citekey {}{}{}'.format(
@ -59,24 +58,20 @@ class Repository(object):
# creating new papers
def add_paper_from_paths(self, pdfpath, bibpath):
p = Paper.from_bibpdffiles(pdfpath, bibpath)
p = Paper.from_bibpdffiles(pdfpath, bibpath)
self.add_paper(p)
def add_paper(self, p):
# updating papersconfig
p.citekey = self.create_citekey(p.bib_data)
p.number = self.create_number()
bib_data_entry = p.bib_data.entries[list(p.bib_data.entries.keys())[0]]
p.citekey = self.get_valid_citekey(bib_data_entry)
self.papers_config['citekeys'][p.citekey] = p.name
self.papers_config['numbers'][p.number] = p.citekey
self.papers_config['citekeys'].append(p.citekey)
self.citekeys.append(p.citekey)
self.citekeys[p.citekey] = p.name
self.numbers[p.number] = p.citekey
# writing all to disk
files.save_papers(self.papers_config)
p.save_to_disc()
files.save_papers(self.papers_config)
print "Added: %s" % p.citekey
return p
@ -88,38 +83,44 @@ class Repository(object):
name, meta = Paper.create_meta(sub_bib, pdfpath=None)
p = Paper(name, bib_data = sub_bib, metadata = meta)
self.add_paper(p)
def create_citekey(self, bib_data, allowed = tuple()):
def get_valid_citekey(self, entry):
citekey = str2citekey(entry.key)
if citekey in self.citekeys:
raise(ValueError, "An entry with same citekey already exists.")
if len(citekey) == 0:
citekey = self.create_citekey(entry)
return citekey
def create_citekey(self, entry, allowed = tuple()):
"""Create a cite key unique to a given bib_data.
Raises:
KeyError if no author is defined.
"""
article = bib_data.entries[list(bib_data.entries.keys())[0]]
author_key = 'author'
if not 'author' in article.persons:
if not 'author' in entry.persons:
author_key = 'editor'
try:
first_author = article.persons[author_key][0]
first_author = entry.persons[author_key][0]
except KeyError:
raise(ValueError,
'No author or editor defined: cannot generate a citekey.')
try:
year = article.fields['year']
year = entry.fields['year']
except KeyError:
year = ''
prefix = '{}{}'.format(first_author.last()[0][:6], year)
prefix = u'{}{}'.format(first_author.last()[0][:6], year)
prefix = str2citekey(prefix)
# Normalize chars and remove non-ascii
prefix = unicodedata.normalize('NFKD', prefix
).encode('ascii', 'ignore')
letter = 0
citekey = prefix
while citekey in self.citekeys and citekey not in allowed:
citekey = prefix + alphabet[letter]
citekey = prefix + ALPHABET[letter]
letter += 1
return citekey
def create_number(self):
count = int(self.papers_config['count'])
self.papers_config['count'] = count + 1
return count
def size(self):
return len(self.citekeys)

Loading…
Cancel
Save