Improves document handling.

- configuration is now referenced in repo object,
- introduces new class PaperInRepo,
- simplifies storage of documents in metadata,
- changes a few names.
main
Olivier Mangin 12 years ago
parent 49821eab51
commit 2d700073a8

@ -1,7 +1,3 @@
import os
import sys
import shutil
from .. import repo from .. import repo
from ..paper import Paper, NoDocumentFile from ..paper import Paper, NoDocumentFile
from .. import files from .. import files
@ -27,30 +23,23 @@ def command(config, ui, bibpath, copy):
if copy is None: if copy is None:
copy = config.get('papers', 'import-copy') copy = config.get('papers', 'import-copy')
rp = repo.Repository.from_directory() rp = repo.Repository.from_directory()
# Get directory for document
doc_path = files.clean_path(rp.get_document_directory(config))
if not (os.path.exists(doc_path) and os.path.isdir(doc_path)):
print "Document directory %s, does not exist." % doc_path
sys.exit(1)
# Extract papers from bib # Extract papers from bib
papers = Paper.many_from_path(bibpath, fatal=False) papers = Paper.many_from_path(bibpath, fatal=False)
for p in papers: for p in papers:
doc_file = None doc_file = None
try: try:
file_path = p.get_document_file_from_bibdata(remove=True) file_path = p.get_document_file_from_bibdata(remove=True)
if os.path.exists(file_path): if files.check_file(file_path):
doc_file = file_path doc_file = file_path
else: else:
print "File does not exist for %s." % p.citekey print("File does not exist for %s (%s)."
% (p.citekey, file_path))
except NoDocumentFile: except NoDocumentFile:
print "No file for %s." % p.citekey print "No file for %s." % p.citekey
rp.add_paper(p) rp.add_paper(p)
if doc_file: if doc_file:
if copy: if copy:
ext = os.path.splitext(doc_file)[1] rp.import_document(p.citekey, doc_file)
new_doc_file = os.path.join(doc_path, p.citekey + ext)
shutil.copy(doc_file, new_doc_file)
else: else:
new_doc_file = doc_file p.set_external_document(doc_file)
p.set_document(new_doc_file) rp.add_or_update(p)
rp.add_or_update(p)

@ -17,14 +17,10 @@ def command(config, ui, citekey):
rp = repo.Repository.from_directory() rp = repo.Repository.from_directory()
paper = rp.paper_from_ref(citekey, fatal=True) paper = rp.paper_from_ref(citekey, fatal=True)
try: try:
if paper.check_file(): filepath = paper.get_document_path()
filepath = paper.get_file_path() subprocess.Popen([config.get('papers', 'open-cmd'), filepath])
subprocess.Popen([config.get('papers', 'open-cmd'), print("%s opened." % colored(filepath, 'filepath'))
filepath])
print('{} opened.'.format(colored(filepath, 'filepath')))
else:
raise NoDocumentFile
except NoDocumentFile: except NoDocumentFile:
print('{}: No document associated to this entry {}{}{}'.format( print("%s: No document associated to this entry %s."
colored('error', 'error'), colored('citekey', 'citekey'))) % (colored('error', 'error'), colored(citekey, 'citekey')))
exit(-1) exit(-1)

@ -72,17 +72,15 @@ def name_from_path(fullpdfpath, verbose=False):
return name, ext return name, ext
def check_file(filepath): def check_file(path, fail=False):
if not os.path.exists(filepath): if fail:
print(colored('error', 'error') + if not os.path.exists(path):
': {} does not exists'.format( raise(IOError, "File does not exist: %s." % path)
colored(filepath, 'filepath'))) if not os.path.isfile(path):
exit(-1) raise(IOError, "%s is not a file." % path)
if not os.path.isfile(filepath): return True
print(colored('error', 'error') else:
+ ': {} is not a file'.format( return os.path.exists(path) and os.path.isfile(path)
colored(filepath, 'filepath')))
exit(-1)
# yaml I/O # yaml I/O
@ -99,7 +97,7 @@ def write_yamlfile(filepath, datamap):
def read_yamlfile(filepath): def read_yamlfile(filepath):
check_file(filepath) check_file(filepath, fail=True)
try: try:
with open(filepath, 'r') as f: with open(filepath, 'r') as f:
return yaml.load(f) return yaml.load(f)
@ -131,8 +129,7 @@ def load_meta(filepath):
# specific to bibliography data # specific to bibliography data
def load_externalbibfile(fullbibpath): def load_externalbibfile(fullbibpath):
check_file(fullbibpath) check_file(fullbibpath, fail=True)
filename, ext = os.path.splitext(os.path.split(fullbibpath)[1]) filename, ext = os.path.splitext(os.path.split(fullbibpath)[1])
if ext[1:] in FORMATS.keys(): if ext[1:] in FORMATS.keys():
with open(fullbibpath) as f: with open(fullbibpath) as f:

@ -1,6 +1,9 @@
import os import os
import unicodedata import unicodedata
import re import re
from cStringIO import StringIO
import glob
from pybtex.database import Entry, BibliographyData from pybtex.database import Entry, BibliographyData
@ -16,9 +19,7 @@ CITEKEY_EXCLUDE_RE = re.compile('[%s]'
% re.escape(CONTROL_CHARS + CITEKEY_FORBIDDEN_CHARS)) % re.escape(CONTROL_CHARS + CITEKEY_FORBIDDEN_CHARS))
BASE_META = { BASE_META = {
'filename': None, 'external-document': None,
'extension': None,
'path': None,
'notes': [] 'notes': []
} }
@ -30,6 +31,31 @@ def str2citekey(s):
return key return key
def get_bibentry_from_file(bibfile):
"""Extract first entry (supposed to be the only one) from given file.
"""
bib_data = files.load_externalbibfile(bibfile)
first_key = bib_data.entries.keys()[0]
first_entry = bib_data.entries[first_key]
return first_key, first_entry
def get_bibentry_from_string(content):
"""Extract first entry (supposed to be the only one) from given file.
"""
bib_data = files.parse_bibdata(StringIO(content))
first_key = bib_data.entries.keys()[0]
first_entry = bib_data.entries[first_key]
return first_key, first_entry
def get_safe_metadata(metapath):
if metapath is None:
return None
else:
return files.read_yamlfile(metapath)
class NoDocumentFile(Exception): class NoDocumentFile(Exception):
pass pass
@ -69,20 +95,22 @@ class Paper(object):
# TODO add mechanism to verify keys (15/12/2012) # TODO add mechanism to verify keys (15/12/2012)
def has_file(self): def get_external_document_path(self):
"""Whether there exist a document file for this entry. if self.metadata['external-document'] is not None:
""" return self.metadata['external-document']
return self.metadata['path'] is not None
def get_file_path(self):
if self.has_file():
return self.metadata['path']
else: else:
raise NoDocumentFile raise NoDocumentFile
def check_file(self): def get_document_path(self):
path = self.get_file_path() return self.get_external_document_path()
return os.path.exists(path) and os.path.isfile(path)
def set_external_document(self, docpath):
fullpdfpath = os.path.abspath(docpath)
files.check_file(fullpdfpath, fail=True)
self.metadata['external-document'] = fullpdfpath
def check_document_path(self):
return files.check_file(self.get_external_document_path())
def generate_citekey(self): def generate_citekey(self):
"""Generate a citekey from bib_data. """Generate a citekey from bib_data.
@ -105,14 +133,6 @@ class Paper(object):
citekey = u'{}{}'.format(u''.join(first_author.last()), year) citekey = u'{}{}'.format(u''.join(first_author.last()), year)
return str2citekey(citekey) return str2citekey(citekey)
def set_document(self, docpath):
fullpdfpath = os.path.abspath(docpath)
files.check_file(fullpdfpath)
name, ext = files.name_from_path(docpath)
self.metadata['filename'] = name
self.metadata['extension'] = ext
self.metadata['path'] = fullpdfpath
def save_to_disc(self, bib_filepath, meta_filepath): def save_to_disc(self, bib_filepath, meta_filepath):
"""Creates a BibliographyData object containing a single entry and """Creates a BibliographyData object containing a single entry and
saves it to disc. saves it to disc.
@ -150,23 +170,11 @@ class Paper(object):
@classmethod @classmethod
def load(cls, bibpath, metapath=None): def load(cls, bibpath, metapath=None):
key, entry = cls.get_bibentry(bibpath) key, entry = get_bibentry_from_file(bibpath)
if metapath is None: metadata = get_safe_metadata(metapath)
metadata = None
else:
metadata = files.read_yamlfile(metapath)
p = Paper(bibentry=entry, metadata=metadata, citekey=key) p = Paper(bibentry=entry, metadata=metadata, citekey=key)
return p return p
@classmethod
def get_bibentry(cls, bibfile):
"""Extract first entry (supposed to be the only one) from given file.
"""
bib_data = files.load_externalbibfile(bibfile)
first_key = bib_data.entries.keys()[0]
first_entry = bib_data.entries[first_key]
return first_key, first_entry
@classmethod @classmethod
def create_meta(cls): def create_meta(cls):
return BASE_META.copy() return BASE_META.copy()
@ -194,3 +202,32 @@ class Paper(object):
except ValueError, e: except ValueError, e:
print "Warning, skipping paper (%s)." % e print "Warning, skipping paper (%s)." % e
return papers return papers
class PaperInRepo(Paper):
def __init__(self, repo, *args, **kwargs):
Paper.__init__(self, *args, **kwargs)
self.repo = repo
def get_document_path_in_repo(self):
doc_dir = files.clean_path(self.repo.get_document_directory())
found = glob.glob(doc_dir + "/%s.*" % self.citekey)
if found:
return found[0]
else:
raise NoDocumentFile
def get_document_path(self):
try:
return self.get_document_path_in_repo()
except NoDocumentFile:
return self.get_external_document_path()
@classmethod
def load(cls, repo, bibpath, metapath=None):
key, entry = get_bibentry_from_file(bibpath)
metadata = get_safe_metadata(metapath)
p = PaperInRepo(repo, bibentry=entry, metadata=metadata,
citekey=key)
return p

@ -14,12 +14,14 @@ def person_repr(p):
def short_authors(bibentry): def short_authors(bibentry):
authors = [person_repr(p) for p in bibentry.persons['author']] try:
if len(authors) < 3: authors = [person_repr(p) for p in bibentry.persons['author']]
return ', '.join(authors) if len(authors) < 3:
else: return ', '.join(authors)
return authors[0] + (' et al.' if len(authors) > 1 else '') else:
return authors[0] + (' et al.' if len(authors) > 1 else '')
except KeyError: # When no author is defined
return ''
def bib_oneliner(bibentry): def bib_oneliner(bibentry):
authors = short_authors(bibentry) authors = short_authors(bibentry)

@ -1,8 +1,10 @@
from .color import colored
import os import os
import shutil
import files import files
from paper import Paper from paper import Paper, PaperInRepo
from color import colored
import configs
ALPHABET = 'abcdefghijklmopqrstuvwxyz' ALPHABET = 'abcdefghijklmopqrstuvwxyz'
@ -14,16 +16,20 @@ DOC_DIR = 'doc'
class Repository(object): class Repository(object):
def __init__(self): def __init__(self, config=None):
self.papersdir = None self.papersdir = None
self.citekeys = [] self.citekeys = []
if config is None:
config = configs.CONFIG
self.config = config
# loading existing papers # loading existing papers
def paper_from_citekey(self, citekey): def paper_from_citekey(self, citekey):
"""Load a paper by its citekey from disk, if necessary.""" """Load a paper by its citekey from disk, if necessary."""
return Paper.load(self.path_to_paper_file(citekey, 'bib'), return PaperInRepo.load(
metapath=self.path_to_paper_file(citekey, 'meta')) self, self.path_to_paper_file(citekey, 'bib'),
metapath=self.path_to_paper_file(citekey, 'meta'))
def citekey_from_ref(self, ref, fatal=True): def citekey_from_ref(self, ref, fatal=True):
"""Tries to get citekey from given ref. """Tries to get citekey from given ref.
@ -48,9 +54,11 @@ class Repository(object):
# creating new papers # creating new papers
# Deprecated
# TODO merge
def add_paper_from_paths(self, docpath, bibpath): def add_paper_from_paths(self, docpath, bibpath):
p = Paper.load(bibpath) p = Paper.load(bibpath)
p.set_document(docpath) p.set_external_document(docpath)
self.add_paper(p) self.add_paper(p)
def add_paper(self, p): def add_paper(self, p):
@ -73,6 +81,15 @@ class Repository(object):
else: else:
self.save_paper(paper) self.save_paper(paper)
def remove(self, citekey):
self.citetekeys.remove(citekey)
paper = self.paper_from_citekey(citekey)
for f in ('bib', 'meta'):
shutil.rmtree(self.path_to_paper_file(citekey, f))
# TODO change
if paper.metadata['in-repo']:
shutil.rmtree(self.path_to_paper_file(citekey, f))
def save_paper(self, paper): def save_paper(self, paper):
if not paper.citekey in self.citekeys: if not paper.citekey in self.citekeys:
raise(ValueError('Paper not in repository, first add it.')) raise(ValueError('Paper not in repository, first add it.'))
@ -117,9 +134,9 @@ class Repository(object):
else: else:
raise(ValueError("%s is not a valid paper file." % file_)) raise(ValueError("%s is not a valid paper file." % file_))
def get_document_directory(self, config): def get_document_directory(self):
if config.has_option('papers', 'document-directory'): if self.config.has_option('papers', 'document-directory'):
return config.get('papers', 'document-directory') return self.config.get('papers', 'document-directory')
else: else:
return os.path.join(self.papersdir, DOC_DIR) return os.path.join(self.papersdir, DOC_DIR)
@ -127,6 +144,18 @@ class Repository(object):
for key in self.citekeys: for key in self.citekeys:
yield self.paper_from_citekey(key) yield self.paper_from_citekey(key)
def import_document(self, citekey, doc_file):
if citekey not in self.citekeys:
raise(ValueError, "Unknown citekey: %s." % citekey)
else:
doc_path = self.get_document_directory()
if not (os.path.exists(doc_path) and os.path.isdir(doc_path)):
raise(ValueError,
"Document directory %s, does not exist." % doc_path)
ext = os.path.splitext(doc_file)[1]
new_doc_file = os.path.join(doc_path, citekey + ext)
shutil.copy(doc_file, new_doc_file)
@classmethod @classmethod
def from_directory(cls, papersdir=None): def from_directory(cls, papersdir=None):
repo = cls() repo = cls()

Loading…
Cancel
Save