|
|
@ -1,16 +1,14 @@
|
|
|
|
import os
|
|
|
|
|
|
|
|
import shutil
|
|
|
|
import shutil
|
|
|
|
import glob
|
|
|
|
import glob
|
|
|
|
import itertools
|
|
|
|
import itertools
|
|
|
|
|
|
|
|
|
|
|
|
from . import files
|
|
|
|
from . import bibstruct
|
|
|
|
from .paper import PaperInRepo, NoDocumentFile, check_citekey
|
|
|
|
from . import events
|
|
|
|
from .events import RemoveEvent, RenameEvent, AddEvent
|
|
|
|
from . import datacache
|
|
|
|
|
|
|
|
from .paper import Paper
|
|
|
|
|
|
|
|
|
|
|
|
BASE_FILE = 'papers.yaml'
|
|
|
|
def _base27(n):
|
|
|
|
BIB_DIR = 'bibdata'
|
|
|
|
return _base27((n - 1) // 26) + chr(ord('a') + ((n - 1) % 26)) if n else ''
|
|
|
|
META_DIR = 'meta'
|
|
|
|
|
|
|
|
DOC_DIR = 'doc'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class CiteKeyCollision(Exception):
|
|
|
|
class CiteKeyCollision(Exception):
|
|
|
@ -23,199 +21,110 @@ class InvalidReference(Exception):
|
|
|
|
|
|
|
|
|
|
|
|
class Repository(object):
|
|
|
|
class Repository(object):
|
|
|
|
|
|
|
|
|
|
|
|
def __init__(self, config, load=True):
|
|
|
|
def __init__(self, config):
|
|
|
|
"""Initialize the repository.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
:param load: if load is True, load the repository from disk,
|
|
|
|
|
|
|
|
from path config.papers_dir.
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
self.config = config
|
|
|
|
self.config = config
|
|
|
|
self.citekeys = []
|
|
|
|
self._citekeys = None
|
|
|
|
if load:
|
|
|
|
self.databroker = datacache.DataCache(self.config.pubsdir)
|
|
|
|
self.load()
|
|
|
|
|
|
|
|
|
|
|
|
@property
|
|
|
|
# @classmethod
|
|
|
|
def citekeys(self):
|
|
|
|
# def from_directory(cls, config, papersdir=None):
|
|
|
|
if self._citekeys is None:
|
|
|
|
# repo = cls(config)
|
|
|
|
self._citekeys = self.databroker.citekeys()
|
|
|
|
# if papersdir is None:
|
|
|
|
return self._citekeys
|
|
|
|
# papersdir = config.papers_dir
|
|
|
|
|
|
|
|
# repo.papersdir = files.clean_path(papersdir)
|
|
|
|
|
|
|
|
# repo.load()
|
|
|
|
|
|
|
|
# return repo
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def __contains__(self, citekey):
|
|
|
|
def __contains__(self, citekey):
|
|
|
|
"""Allows to use 'if citekey in repo' pattern"""
|
|
|
|
""" Allows to use 'if citekey in repo' pattern
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Warning: costly the first time.
|
|
|
|
|
|
|
|
"""
|
|
|
|
return citekey in self.citekeys
|
|
|
|
return citekey in self.citekeys
|
|
|
|
|
|
|
|
|
|
|
|
def __len__(self):
|
|
|
|
def __len__(self):
|
|
|
|
|
|
|
|
"""Warning: costly the first time."""
|
|
|
|
return len(self.citekeys)
|
|
|
|
return len(self.citekeys)
|
|
|
|
|
|
|
|
|
|
|
|
# load, save repo
|
|
|
|
|
|
|
|
def _init_dirs(self, autodoc=True):
|
|
|
|
|
|
|
|
"""Create, if necessary, the repository directories.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Should only be called by load or save.
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
self.bib_dir = files.clean_path(self.config.papers_dir, BIB_DIR)
|
|
|
|
|
|
|
|
self.meta_dir = files.clean_path(self.config.papers_dir, META_DIR)
|
|
|
|
|
|
|
|
if self.config.doc_dir == 'doc':
|
|
|
|
|
|
|
|
self.doc_dir = files.clean_path(self.config.papers_dir, DOC_DIR)
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
self.doc_dir = files.clean_path(self.config.doc_dir)
|
|
|
|
|
|
|
|
self.cfg_path = files.clean_path(self.config.papers_dir, 'papers.yaml')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for d in [self.bib_dir, self.meta_dir, self.doc_dir]:
|
|
|
|
|
|
|
|
if not os.path.exists(d):
|
|
|
|
|
|
|
|
os.makedirs(d)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def load(self):
|
|
|
|
|
|
|
|
"""Load the repository, creating dirs if necessary"""
|
|
|
|
|
|
|
|
self._init_dirs()
|
|
|
|
|
|
|
|
repo_config = files.read_yamlfile(self.cfg_path)
|
|
|
|
|
|
|
|
self.citekeys = repo_config['citekeys']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def save(self):
|
|
|
|
|
|
|
|
"""Save the repo, creating dirs if necessary"""
|
|
|
|
|
|
|
|
self._init_dirs()
|
|
|
|
|
|
|
|
repo_cfg = {'citekeys': self.citekeys}
|
|
|
|
|
|
|
|
files.write_yamlfile(self.cfg_path, repo_cfg)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# reference
|
|
|
|
|
|
|
|
def ref2citekey(self, ref):
|
|
|
|
|
|
|
|
"""Tries to get citekey from given reference.
|
|
|
|
|
|
|
|
Ref can be a citekey or a number.
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
if ref in self.citekeys:
|
|
|
|
|
|
|
|
return ref
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
|
|
return self.citekeys[int(ref)]
|
|
|
|
|
|
|
|
except (IndexError, ValueError):
|
|
|
|
|
|
|
|
raise InvalidReference
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# papers
|
|
|
|
# papers
|
|
|
|
def all_papers(self):
|
|
|
|
def all_papers(self):
|
|
|
|
for key in self.citekeys:
|
|
|
|
for key in self.citekeys:
|
|
|
|
yield self.get_paper(key)
|
|
|
|
yield self.pull_paper(key)
|
|
|
|
|
|
|
|
|
|
|
|
def get_paper(self, citekey):
|
|
|
|
def pull_paper(self, citekey):
|
|
|
|
"""Load a paper by its citekey from disk, if necessary."""
|
|
|
|
"""Load a paper by its citekey from disk, if necessary."""
|
|
|
|
if citekey in self.citekeys:
|
|
|
|
if self.databroker.exists(paper.citekey, both = True):
|
|
|
|
return PaperInRepo.load(self, self._bibfile(citekey),
|
|
|
|
return Paper(self, self.databroker.pull_bibdata(citekey),
|
|
|
|
self._metafile(citekey))
|
|
|
|
self.databroker.pull_metadata(citekey))
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
raise InvalidReference
|
|
|
|
raise InvalidReference
|
|
|
|
|
|
|
|
|
|
|
|
def _add_citekey(self, citekey):
|
|
|
|
def push_paper(self, paper, overwrite=False, event=True):
|
|
|
|
if citekey not in self.citekeys:
|
|
|
|
""" Push a paper to disk
|
|
|
|
self.citekeys.append(citekey)
|
|
|
|
|
|
|
|
self.save()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _write_paper(self, paper):
|
|
|
|
|
|
|
|
"""Warning: overwrites the paper without checking if it exists."""
|
|
|
|
|
|
|
|
paper.save(self._bibfile(paper.citekey),
|
|
|
|
|
|
|
|
self._metafile(paper.citekey))
|
|
|
|
|
|
|
|
self._add_citekey(paper.citekey)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _remove_paper(self, citekey, remove_doc=True):
|
|
|
|
|
|
|
|
""" This version of remove is not meant to be accessed from outside.
|
|
|
|
|
|
|
|
It removes paper without raising the Remove Event"""
|
|
|
|
|
|
|
|
paper = self.get_paper(citekey)
|
|
|
|
|
|
|
|
self.citekeys.remove(citekey)
|
|
|
|
|
|
|
|
os.remove(self._metafile(citekey))
|
|
|
|
|
|
|
|
os.remove(self._bibfile(citekey))
|
|
|
|
|
|
|
|
# Eventually remove associated document
|
|
|
|
|
|
|
|
if remove_doc:
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
|
|
path = paper.get_document_path_in_repo()
|
|
|
|
|
|
|
|
os.remove(path)
|
|
|
|
|
|
|
|
except NoDocumentFile:
|
|
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
self.save()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _move_doc(self, old_citekey, paper):
|
|
|
|
:param overwrite: if False, mimick the behavior of adding a paper
|
|
|
|
"""Fragile. Make more robust"""
|
|
|
|
if True, mimick the behavior of updating a paper
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
bibstruct.check_citekey(paper.citekey)
|
|
|
|
|
|
|
|
if (not overwrite) and self.databroker.exists(paper.citekey, both = False):
|
|
|
|
|
|
|
|
raise IOError('files using this the {} citekey already exists'.format(citekey))
|
|
|
|
|
|
|
|
if (not overwrite) and self.citekeys is not None and paper.citekey in self.citekeys:
|
|
|
|
|
|
|
|
raise CiteKeyCollision('citekey {} already in use'.format(paper.citekey))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
self.databroker.push_bibdata(paper.citekey, paper.bibdata)
|
|
|
|
|
|
|
|
self.databroker.push_metadata(paper.citekey, paper.metadata)
|
|
|
|
|
|
|
|
self.citekeys.add(paper.citekey)
|
|
|
|
|
|
|
|
if event:
|
|
|
|
|
|
|
|
events.AddEvent(paper.citekey).send()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def remove_paper(self, citekey, remove_doc=True, event=True):
|
|
|
|
|
|
|
|
""" Remove a paper. Is silent if nothing needs to be done."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if event:
|
|
|
|
|
|
|
|
RemoveEvent(citekey).send()
|
|
|
|
|
|
|
|
if remove_doc:
|
|
|
|
try:
|
|
|
|
try:
|
|
|
|
old_docfile = self.find_document(old_citekey)
|
|
|
|
metadata = self.databroker.pull_metadata(paper.citekey)
|
|
|
|
ext = os.path.splitext(old_docfile)[1]
|
|
|
|
docpath = metadata.get('docfile', '')
|
|
|
|
new_docfile = os.path.join(self.doc_dir, paper.citekey + ext)
|
|
|
|
self.databroker.remove_doc(docpath)
|
|
|
|
shutil.move(old_docfile, new_docfile)
|
|
|
|
except IOError:
|
|
|
|
paper.set_external_document(new_docfile)
|
|
|
|
pass # FXME: if IOError is about being unable to
|
|
|
|
except NoDocumentFile:
|
|
|
|
# remove the file, we need to issue an error.I
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _add_paper(self, paper, overwrite=False):
|
|
|
|
|
|
|
|
check_citekey(paper.citekey)
|
|
|
|
|
|
|
|
if not overwrite and paper.citekey in self.citekeys:
|
|
|
|
|
|
|
|
raise CiteKeyCollision('Citekey {} already in use'.format(
|
|
|
|
|
|
|
|
paper.citekey))
|
|
|
|
|
|
|
|
self._write_paper(paper)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# add, remove papers
|
|
|
|
self.citekeys.remove(citekey)
|
|
|
|
def add_paper(self, paper):
|
|
|
|
self.databroker.remove(citekey)
|
|
|
|
self._add_paper(paper)
|
|
|
|
|
|
|
|
AddEvent(paper.citekey).send()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def save_paper(self, paper, old_citekey=None, overwrite=False):
|
|
|
|
def rename_paper(self, paper, new_citekey):
|
|
|
|
if old_citekey is None:
|
|
|
|
|
|
|
|
old_citekey = paper.citekey
|
|
|
|
old_citekey = paper.citekey
|
|
|
|
if not old_citekey in self.citekeys:
|
|
|
|
# check if new_citekey is not the same as paper.citekey
|
|
|
|
raise ValueError('Paper not in repository, first add it.')
|
|
|
|
if old_citekey == new_citekey:
|
|
|
|
if old_citekey == paper.citekey:
|
|
|
|
push_paper(paper, overwrite=True, event=False)
|
|
|
|
self._write_paper(paper)
|
|
|
|
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
self._add_paper(paper, overwrite=overwrite) # This checks for collisions
|
|
|
|
# check if new_citekey does not exists
|
|
|
|
# We do not want to send the RemoveEvent, associated documents should be moved
|
|
|
|
if self.databroker.exists(new_citekey, both=False):
|
|
|
|
self._remove_paper(old_citekey, remove_doc=False)
|
|
|
|
raise IOError("can't rename paper to {}, conflicting files exists".format(new_citekey))
|
|
|
|
self._move_doc(old_citekey, paper)
|
|
|
|
# modify bibdata
|
|
|
|
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
|
|
# move doc file if necessary
|
|
|
|
|
|
|
|
if self.databroker.is_pubsdir_doc(paper.docpath):
|
|
|
|
|
|
|
|
new_docpath = self.databroker.copy_doc(new_citekey, paper.docpath)
|
|
|
|
|
|
|
|
self.databroker.remove_doc(paper.docpath)
|
|
|
|
|
|
|
|
paper.docpath = new_docpath
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# push_paper to new_citekey
|
|
|
|
|
|
|
|
self.databroker.push(new_citekey, paper.metadata)
|
|
|
|
|
|
|
|
# remove_paper of old_citekey
|
|
|
|
|
|
|
|
self.databroker.remove(old_citekey)
|
|
|
|
|
|
|
|
# send event
|
|
|
|
RenameEvent(paper, old_citekey).send()
|
|
|
|
RenameEvent(paper, old_citekey).send()
|
|
|
|
|
|
|
|
|
|
|
|
def remove_paper(self, citekey, remove_doc=True):
|
|
|
|
def unique_citekey(self, base_key):
|
|
|
|
RemoveEvent(citekey).send()
|
|
|
|
"""Create a unique citekey for a given basekey."""
|
|
|
|
self._remove_paper(citekey, remove_doc)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _bibfile(self, citekey):
|
|
|
|
|
|
|
|
return os.path.join(self.bib_dir, citekey + '.bibyaml')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _metafile(self, citekey):
|
|
|
|
|
|
|
|
return os.path.join(self.meta_dir, citekey + '.meta')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def generate_citekey(self, paper, citekey=None):
|
|
|
|
|
|
|
|
"""Create a unique citekey for the given paper."""
|
|
|
|
|
|
|
|
if citekey is None:
|
|
|
|
|
|
|
|
citekey = paper.generate_citekey()
|
|
|
|
|
|
|
|
for n in itertools.count():
|
|
|
|
for n in itertools.count():
|
|
|
|
if not citekey + _base27(n) in self.citekeys:
|
|
|
|
if not base_key + _base27(n) in self.citekeys:
|
|
|
|
return citekey + _base27(n)
|
|
|
|
return base_key + _base27(n)
|
|
|
|
|
|
|
|
|
|
|
|
def find_document(self, citekey):
|
|
|
|
|
|
|
|
found = glob.glob('{}/{}.*'.format(self.doc_dir, citekey))
|
|
|
|
|
|
|
|
if found:
|
|
|
|
|
|
|
|
return found[0]
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
raise NoDocumentFile
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def import_document(self, citekey, doc_file):
|
|
|
|
|
|
|
|
if citekey not in self.citekeys:
|
|
|
|
|
|
|
|
raise ValueError("Unknown citekey: {}.".format(citekey))
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
if not os.path.isfile(doc_file):
|
|
|
|
|
|
|
|
raise ValueError("No file {} found.".format(doc_file))
|
|
|
|
|
|
|
|
ext = os.path.splitext(doc_file)[1]
|
|
|
|
|
|
|
|
new_doc_file = os.path.join(self.doc_dir, citekey + ext)
|
|
|
|
|
|
|
|
shutil.copy(doc_file, new_doc_file)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_tags(self):
|
|
|
|
def get_tags(self):
|
|
|
|
|
|
|
|
"""FIXME: bibdata doesn't need to be read."""
|
|
|
|
tags = set()
|
|
|
|
tags = set()
|
|
|
|
for p in self.all_papers():
|
|
|
|
for p in self.all_papers():
|
|
|
|
tags = tags.union(p.tags)
|
|
|
|
tags = tags.union(p.tags)
|
|
|
|
return tags
|
|
|
|
return tags
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _base27(n):
|
|
|
|
|
|
|
|
return _base27((n - 1) // 26) + chr(ord('a') + ((n - 1) % 26)) if n else ''
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _base(num, b):
|
|
|
|
|
|
|
|
q, r = divmod(num - 1, len(b))
|
|
|
|
|
|
|
|
return _base(q, b) + b[r] if num else ''
|
|
|
|
|
|
|
|