diff --git a/pubs/commands/add_cmd.py b/pubs/commands/add_cmd.py index b12dcef..8800b7a 100644 --- a/pubs/commands/add_cmd.py +++ b/pubs/commands/add_cmd.py @@ -82,7 +82,7 @@ def command(args): base_key = bibstruct.extract_citekey(bibdata) citekey = rp.unique_citekey(base_key) else: - rp.databroker.exists(citekey, both=False) + rp.databroker.exists(citekey, meta_check=False) p = paper.Paper(bibdata, citekey=citekey) diff --git a/pubs/databroker.py b/pubs/databroker.py index 7614a43..801bbe5 100644 --- a/pubs/databroker.py +++ b/pubs/databroker.py @@ -3,7 +3,7 @@ from . import endecoder class DataBroker(object): - """ DataBroker class + """ DataBroker class This is aimed at being a simple, high level interface to the content stored on disk. Requests are optimistically made, and exceptions are raised if something goes wrong. @@ -12,40 +12,49 @@ class DataBroker(object): def __init__(self, directory, create=False): self.filebroker = filebroker.FileBroker(directory, create=create) self.endecoder = endecoder.EnDecoder() - self.docbroker = filebroker.DocBroker(directory, scheme='docsdir', subdir='doc') - self.notebroker = filebroker.DocBroker(directory, scheme='notesdir', subdir='notes') + self.docbroker = filebroker.DocBroker(directory, scheme='docsdir', subdir='doc') + self.notebroker = filebroker.DocBroker(directory, scheme='notesdir', subdir='notes') # filebroker+endecoder def pull_metadata(self, citekey): metadata_raw = self.filebroker.pull_metafile(citekey) return self.endecoder.decode_metadata(metadata_raw) - + def pull_bibdata(self, citekey): bibdata_raw = self.filebroker.pull_bibfile(citekey) return self.endecoder.decode_bibdata(bibdata_raw) - + def push_metadata(self, citekey, metadata): metadata_raw = self.endecoder.encode_metadata(metadata) self.filebroker.push_metafile(citekey, metadata_raw) - + def push_bibdata(self, citekey, bibdata): bibdata_raw = self.endecoder.encode_bibdata(bibdata) self.filebroker.push_bibfile(citekey, bibdata_raw) - + def push(self, citekey, metadata, bibdata): self.filebroker.push(citekey, metadata, bibdata) - + def remove(self, citekey): self.filebroker.remove(citekey) - def exists(self, citekey, both = True): - return self.filebroker.exists(citekey, both=both) - + def exists(self, citekey, meta_check=False): + """ Checks wether the bibtex of a citekey exists. + + :param meta_check: if True, will return if both the bibtex and the meta file exists. + """ + return self.filebroker.exists(citekey, meta_check=meta_check) + + def citekeys(self): + listings = self.listing(filestats=False) + return set(listings['bibfiles']) + def listing(self, filestats=True): return self.filebroker.listing(filestats=filestats) def verify(self, bibdata_raw): + """Will return None if bibdata_raw can't be decoded""" try: return self.endecoder.decode_bibdata(bibdata_raw) except ValueError: @@ -57,7 +66,7 @@ class DataBroker(object): return self.docbroker.in_docsdir(docpath) def real_docpath(self, docpath): - return self.docbroker.real_docpath(docpath) + return self.docbroker.real_docpath(docpath) def add_doc(self, citekey, source_path, overwrite=False): return self.docbroker.add_doc(citekey, source_path, overwrite=overwrite) diff --git a/pubs/datacache.py b/pubs/datacache.py index b4fe98a..98b3914 100644 --- a/pubs/datacache.py +++ b/pubs/datacache.py @@ -7,7 +7,7 @@ class DataCache(object): Has two roles : 1. Provides a buffer between the commands and the hard drive. Until a command request a hard drive ressource, it does not touch it. - 2. Keeps a up-to-date, pickled version of the repository, to speed up things + 2. Keeps an up-to-date, pickled version of the repository, to speed up things when they are a lot of files. Update are also done only when required. Changes are detected using data modification timestamps. @@ -46,18 +46,16 @@ class DataCache(object): def remove(self, citekey): self.databroker.remove(citekey) - def exists(self, citekey, both=True): - return self.databroker.exists(citekey, both=both) + def exists(self, citekey, meta_check=False): + return self.databroker.exists(citekey, meta_check=meta_check) def citekeys(self): - listings = self.listing(filestats=False) - return set(listings['metafiles']).intersection(listings['bibfiles']) + return self.databroker.citekeys() def listing(self, filestats=True): return self.databroker.listing(filestats=filestats) def verify(self, bibdata_raw): - """Will return None if bibdata_raw can't be decoded""" return self.databroker.verify(bibdata_raw) # docbroker diff --git a/pubs/filebroker.py b/pubs/filebroker.py index 73b3ad6..09e6079 100644 --- a/pubs/filebroker.py +++ b/pubs/filebroker.py @@ -71,13 +71,16 @@ class FileBroker(object): if check_file(bibfilepath): os.remove(system_path(bibfilepath)) - def exists(self, citekey, both=True): - meta_exists = check_file(os.path.join(self.metadir, citekey + '.yaml'), fail=False) - bib_exists = check_file(os.path.join(self.bibdir, citekey + '.bib'), fail=False) - if both: - return meta_exists and bib_exists - else: - return meta_exists or bib_exists + def exists(self, citekey, meta_check=False): + """ Checks wether the bibtex of a citekey exists. + + :param meta_check: if True, will return if both the bibtex and the meta file exists. + """ + does_exists = check_file(os.path.join(self.bibdir, citekey + '.bib'), fail=False) + if meta_check: + meta_exists = check_file(os.path.join(self.metadir, citekey + '.yaml'), fail=False) + does_exists = does_exists and meta_exists + return does_exists def listing(self, filestats=True): metafiles = [] diff --git a/pubs/repo.py b/pubs/repo.py index 1f21b70..7ff8354 100644 --- a/pubs/repo.py +++ b/pubs/repo.py @@ -50,7 +50,8 @@ class Repository(object): def pull_paper(self, citekey): """Load a paper by its citekey from disk, if necessary.""" - if self.databroker.exists(citekey, both = True): + if self.databroker.exists(citekey, meta_check=True): + #TODO meta_check=False and default meta generation return Paper(self.databroker.pull_bibdata(citekey), citekey=citekey, metadata=self.databroker.pull_metadata(citekey)) @@ -64,7 +65,7 @@ class Repository(object): if True, mimick the behavior of updating a paper """ bibstruct.check_citekey(paper.citekey) - if (not overwrite) and (self.databroker.exists(paper.citekey, both=False) + if (not overwrite) and (self.databroker.exists(paper.citekey, meta_check=False) or (paper.citekey in self)): raise CiteKeyCollision('citekey {} already in use'.format(paper.citekey)) if not paper.added: diff --git a/readme.md b/readme.md index c529cd4..03130b2 100644 --- a/readme.md +++ b/readme.md @@ -32,6 +32,7 @@ or for bibtex containing a single file: Requirements ------------ - python >= 2.6 +- [pyYaml](http://pyyaml.org) - [bibtexparser](https://github.com/sciunto/python-bibtexparser) diff --git a/setup.py b/setup.py index eeb3edd..d9221aa 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ setup(name='pubs', author_email='fabien.benureau+inria@gmail.com', url='', description='research papers manager', - requires=['bibtexparser'], + requires=['bibtexparser', 'pyyaml'], packages=find_packages(), scripts=['pubs/pubs'] ) diff --git a/tests/test_databroker.py b/tests/test_databroker.py index d79c24a..c83190c 100644 --- a/tests/test_databroker.py +++ b/tests/test_databroker.py @@ -25,11 +25,12 @@ class TestDataBroker(unittest.TestCase): db = db_class('tmp', create=True) db.push_metadata('citekey1', page99_metadata) - self.assertTrue(db.exists('citekey1', both=False)) - self.assertFalse(db.exists('citekey1', both=True)) + self.assertFalse(db.exists('citekey1', meta_check=True)) + self.assertFalse(db.exists('citekey1', meta_check=False)) db.push_bibdata('citekey1', page99_bibdata) - self.assertTrue(db.exists('citekey1', both=True)) + self.assertTrue(db.exists('citekey1', meta_check=False)) + self.assertTrue(db.exists('citekey1', meta_check=True)) self.assertEqual(db.pull_metadata('citekey1'), page99_metadata) pulled = db.pull_bibdata('citekey1')['Page99']