From e5b898c5c2d49cbe8dc6a5a1020f8621e43cd987 Mon Sep 17 00:00:00 2001 From: Fabien Benureau Date: Wed, 20 Jan 2016 17:50:56 +0100 Subject: [PATCH] Cache implemented The implementation is designed to be robust to filesystems having integers or nanosecond stat time. Added a requirements.txt file for the tests: `pip install -r tests/requirements.txt` to install the necessary packages to run the tests. Fix #13, Fix #14 --- pubs/commands/add_cmd.py | 2 + pubs/commands/doc_cmd.py | 2 + pubs/commands/edit_cmd.py | 2 + pubs/commands/export_cmd.py | 2 + pubs/commands/import_cmd.py | 2 + pubs/commands/list_cmd.py | 2 + pubs/commands/note_cmd.py | 1 + pubs/commands/remove_cmd.py | 3 +- pubs/commands/rename_cmd.py | 2 + pubs/commands/tag_cmd.py | 2 + pubs/content.py | 16 ++++-- pubs/databroker.py | 16 +++++- pubs/datacache.py | 104 +++++++++++++++++++++++++++++++----- pubs/filebroker.py | 41 +++++++++++--- pubs/p3.py | 2 + pubs/repo.py | 5 +- tests/fake_env.py | 5 +- tests/requirements.txt | 4 ++ tests/test_usecase.py | 62 ++++++++++++++++++--- 19 files changed, 237 insertions(+), 38 deletions(-) create mode 100644 tests/requirements.txt diff --git a/pubs/commands/add_cmd.py b/pubs/commands/add_cmd.py index 1e787e7..95a07a2 100644 --- a/pubs/commands/add_cmd.py +++ b/pubs/commands/add_cmd.py @@ -141,3 +141,5 @@ def command(conf, args): ui.message('{} was moved to the pubs repository.'.format(docfile)) else: ui.message('{} was copied to the pubs repository.'.format(docfile)) + + rp.close() diff --git a/pubs/commands/doc_cmd.py b/pubs/commands/doc_cmd.py index daa13c0..96bcf0f 100644 --- a/pubs/commands/doc_cmd.py +++ b/pubs/commands/doc_cmd.py @@ -148,3 +148,5 @@ def command(conf, args): except OSError: ui.error("Command does not exist: %s." % with_command) ui.exit(127) + + rp.close() diff --git a/pubs/commands/edit_cmd.py b/pubs/commands/edit_cmd.py index 522b837..60f04d6 100644 --- a/pubs/commands/edit_cmd.py +++ b/pubs/commands/edit_cmd.py @@ -67,3 +67,5 @@ def command(conf, args): break # else edit again # Also handle malformed bibtex and metadata + + rp.close() diff --git a/pubs/commands/export_cmd.py b/pubs/commands/export_cmd.py index 46394c6..54e48f8 100644 --- a/pubs/commands/export_cmd.py +++ b/pubs/commands/export_cmd.py @@ -35,3 +35,5 @@ def command(conf, args): exporter = endecoder.EnDecoder() bibdata_raw = exporter.encode_bibdata(bib) ui.message(bibdata_raw) + + rp.close() diff --git a/pubs/commands/import_cmd.py b/pubs/commands/import_cmd.py index 7450f6e..990fc8c 100644 --- a/pubs/commands/import_cmd.py +++ b/pubs/commands/import_cmd.py @@ -86,3 +86,5 @@ def command(conf, args): else: rp.push_doc(p.citekey, docfile, copy=copy) #FIXME should move the file if configured to do so. + + rp.close() diff --git a/pubs/commands/list_cmd.py b/pubs/commands/list_cmd.py index 9e7063b..6901f08 100644 --- a/pubs/commands/list_cmd.py +++ b/pubs/commands/list_cmd.py @@ -52,6 +52,8 @@ def command(conf, args): pretty.paper_oneliner(p, citekey_only=args.citekeys) for p in papers)) + rp.close() + FIELD_ALIASES = { 'a': 'author', diff --git a/pubs/commands/note_cmd.py b/pubs/commands/note_cmd.py index 895e701..d157238 100644 --- a/pubs/commands/note_cmd.py +++ b/pubs/commands/note_cmd.py @@ -21,3 +21,4 @@ def command(conf, args): citekey = resolve_citekey(rp, args.citekey, ui=ui, exit_on_fail=True) notepath = rp.databroker.real_notepath(citekey) content.edit_file(conf['main']['edit_cmd'], notepath, temporary=False) + rp.close() diff --git a/pubs/commands/remove_cmd.py b/pubs/commands/remove_cmd.py index e673623..83785e2 100644 --- a/pubs/commands/remove_cmd.py +++ b/pubs/commands/remove_cmd.py @@ -2,6 +2,7 @@ from .. import repo from .. import color from ..uis import get_ui from ..utils import resolve_citekey_list +from ..p3 import ustr def parser(subparsers): parser = subparsers.add_parser('remove', help='removes a publication') @@ -31,7 +32,7 @@ def command(conf, args): try: rp.remove_paper(c) except Exception as e: - ui.error(e.message) + ui.error(ustr(e)) failed = True ui.message('The publication(s) [{}] were removed'.format( ', '.join([color.dye_out(c, 'citekey') for c in keys]))) diff --git a/pubs/commands/rename_cmd.py b/pubs/commands/rename_cmd.py index abe9fd6..dd4b903 100644 --- a/pubs/commands/rename_cmd.py +++ b/pubs/commands/rename_cmd.py @@ -28,3 +28,5 @@ def command(conf, args): ui.message("The '{}' citekey has been renamed into '{}'".format( color.dye_out(args.citekey, 'citekey'), color.dye_out(args.new_citekey, 'citekey'))) + + rp.close() diff --git a/pubs/commands/tag_cmd.py b/pubs/commands/tag_cmd.py index 1f05cb3..5ce91a7 100644 --- a/pubs/commands/tag_cmd.py +++ b/pubs/commands/tag_cmd.py @@ -113,3 +113,5 @@ def command(conf, args): ui.message('\n'.join(pretty.paper_oneliner(p) for p in papers_list)) + + rp.close() diff --git a/pubs/content.py b/pubs/content.py index 18982c9..e7b3521 100644 --- a/pubs/content.py +++ b/pubs/content.py @@ -68,14 +68,20 @@ def check_directory(path, fail=True): and _check_system_path_is(u'isdir', syspath, fail=fail)) -def read_text_file(filepath): - check_file(filepath) +def read_text_file(filepath, fail=True): + check_file(filepath, fail=fail) try: with _open(filepath, 'r') as f: content = f.read() except UnicodeDecodeError: raise UnableToDecodeTextFile(filepath) - # Should "raise from". TODO once python 2 is droped. + # Should "raise from", if Python 2 support is dropped. + return content + +def read_binary_file(filepath, fail=True): + check_file(filepath, fail=fail) + with _open(filepath, 'rb') as f: + content = f.read() return content @@ -84,9 +90,9 @@ def remove_file(filepath): os.remove(filepath) -def write_file(filepath, data): +def write_file(filepath, data, mode='w'): check_directory(os.path.dirname(filepath)) - with _open(filepath, 'w') as f: + with _open(filepath, mode) as f: f.write(data) diff --git a/pubs/databroker.py b/pubs/databroker.py index c98f1f9..12c6eda 100644 --- a/pubs/databroker.py +++ b/pubs/databroker.py @@ -1,6 +1,6 @@ from . import filebroker from . import endecoder - +from .p3 import pickle class DataBroker(object): """ DataBroker class @@ -15,6 +15,20 @@ class DataBroker(object): self.docbroker = filebroker.DocBroker(directory, scheme='docsdir', subdir='doc') self.notebroker = filebroker.DocBroker(directory, scheme='notesdir', subdir='notes') + # cache + + def close(self): + pass + + def pull_cache(self, name): + """Load cache data from distk. Exceptions are handled by the caller.""" + data_raw = self.filebroker.pull_cachefile(name) + return pickle.loads(data_raw) + + def push_cache(self, name, data): + data_raw = pickle.dumps(data) + self.filebroker.push_cachefile(name, data_raw) + # filebroker+endecoder def pull_metadata(self, citekey): diff --git a/pubs/datacache.py b/pubs/datacache.py index 2c3c5a4..e7738d9 100644 --- a/pubs/datacache.py +++ b/pubs/datacache.py @@ -1,6 +1,16 @@ +import os +import time from . import databroker + +class CacheEntry(object): + + def __init__(self, data, timestamp): + self.data = data + self.timestamp = timestamp + + class DataCache(object): """ DataCache class, provides a very similar interface as DataBroker @@ -16,9 +26,18 @@ class DataCache(object): def __init__(self, directory, create=False): self.directory = directory self._databroker = None + self._metacache = None + self._metacache_modified = False # if True, cache will need to be written to disk. + self._bibcache = None + self._bibcache_modified = False # if True, cache will need to be written to disk. + # does the filesystem supports subsecond stat time? + self.nsec_support = os.stat('.').st_mtime != int(os.stat('.').st_mtime) if create: self._create() + def close(self): + self.flush_cache() + @property def databroker(self): if self._databroker is None: @@ -28,23 +47,93 @@ class DataCache(object): def _create(self): self._databroker = databroker.DataBroker(self.directory, create=True) + @property + def metacache(self): + if self._metacache is None: + try: + self._metacache = self.databroker.pull_cache('metacache') + except IOError: + self._metacache = {} + return self._metacache + + @property + def bibcache(self): + if self._bibcache is None: + try: + self._bibcache = self.databroker.pull_cache('bibcache') + except IOError: + self._bibcache = {} + return self._bibcache + + def flush_cache(self, force=False): + """Write cache to disk""" + if force or self._metacache_modified: + self.databroker.push_cache('metacache', self.metacache) + self._metacache_modified = False + if force or self._bibcache_modified: + self.databroker.push_cache('bibcache', self.bibcache) + self._bibcache_modified = False + def pull_metadata(self, citekey): - return self.databroker.pull_metadata(citekey) + mtime = self.databroker.filebroker.mtime_metafile(citekey) + if citekey in self.metacache: + cached_metadata = self.metacache[citekey] + boundary = mtime if self.nsec_support else mtime + 1 + if cached_metadata.timestamp >= boundary: + return cached_metadata.data + + # if we get here, we must update the cache. + t = time.time() + data = self.databroker.pull_metadata(citekey) + self.metacache[citekey] = CacheEntry(data, t) + self._metacache_modified = True + return self.metacache[citekey].data def pull_bibentry(self, citekey): - return self.databroker.pull_bibentry(citekey) + mtime = self.databroker.filebroker.mtime_bibfile(citekey) + if citekey in self.bibcache: + cached_bibdata = self.bibcache[citekey] + boundary = mtime if self.nsec_support else mtime + 1 + if cached_bibdata.timestamp >= boundary: + return cached_bibdata.data + + # if we get here, we must update the cache. + t = time.time() + data = self.databroker.pull_bibentry(citekey) + self.bibcache[citekey] = CacheEntry(data, t) + self._bibcache_modified = True + return self.bibcache[citekey].data def push_metadata(self, citekey, metadata): self.databroker.push_metadata(citekey, metadata) + mtime = self.databroker.filebroker.mtime_metafile(citekey) + #print('push', mtime, metadata) + self.metacache[citekey] = CacheEntry(metadata, mtime) + self._metacache_modified = True def push_bibentry(self, citekey, bibdata): self.databroker.push_bibentry(citekey, bibdata) + mtime = self.databroker.filebroker.mtime_bibfile(citekey) + self.bibcache[citekey] = CacheEntry(bibdata, mtime) + self._bibcache_modified = True def push(self, citekey, metadata, bibdata): self.databroker.push(citekey, metadata, bibdata) + mtime = self.databroker.filebroker.mtime_metafile(citekey) + self.metacache[citekey] = CacheEntry(metadata, mtime) + self._metacache_modified = True + mtime = self.databroker.filebroker.mtime_bibfile(citekey) + self.bibcache[citekey] = CacheEntry(bibdata, mtime) + self._bibcache_modified = True def remove(self, citekey): self.databroker.remove(citekey) + if citekey in self.metacache: + self.metacache.pop(citekey) + self._metacache_modified = True + if citekey in self.bibcache: + self.bibcache.pop(citekey) + self._bibcache_modified = True def exists(self, citekey, meta_check=False): return self.databroker.exists(citekey, meta_check=meta_check) @@ -85,14 +174,3 @@ class DataCache(object): def rename_note(self, old_citekey, new_citekey): return self.databroker.rename_note(old_citekey, new_citekey) - - -# class ChangeTracker(object): - -# def __init__(self, cache, directory): -# self.cache = cache -# self.directory = directory - -# def changes(self): -# """ Returns the list of modified files since the last cache was saved to disk""" -# pass diff --git a/pubs/filebroker.py b/pubs/filebroker.py index 1d59fbb..934aaad 100644 --- a/pubs/filebroker.py +++ b/pubs/filebroker.py @@ -6,6 +6,8 @@ from .content import (check_file, check_directory, read_text_file, write_file, system_path, check_content, content_type, get_content, copy_content) +from . import content + def filter_filename(filename, ext): """ Return the filename without the extension if the extension matches ext. @@ -24,23 +26,50 @@ class FileBroker(object): """ def __init__(self, directory, create=False): - self.directory = directory - self.metadir = os.path.join(self.directory, 'meta') - self.bibdir = os.path.join(self.directory, 'bib') + self.directory = os.path.expanduser(directory) + self.metadir = os.path.join(self.directory, 'meta') + self.bibdir = os.path.join(self.directory, 'bib') + self.cachedir = os.path.join(self.directory, '.cache') if create: self._create() check_directory(self.directory) check_directory(self.metadir) check_directory(self.bibdir) + # cache directory is created (if absent) if other directories exists. + if not check_directory(self.cachedir, fail=False): + os.mkdir(system_path(self.cachedir)) def _create(self): - if not check_directory(self.directory, fail = False): + """Create meta and bib directories if absent""" + if not check_directory(self.directory, fail=False): os.mkdir(system_path(self.directory)) - if not check_directory(self.metadir, fail = False): + if not check_directory(self.metadir, fail=False): os.mkdir(system_path(self.metadir)) - if not check_directory(self.bibdir, fail = False): + if not check_directory(self.bibdir, fail=False): os.mkdir(system_path(self.bibdir)) + def pull_cachefile(self, filename): + filepath = os.path.join(self.cachedir, filename) + return content.read_binary_file(filepath) + + def push_cachefile(self, filename, data): + filepath = os.path.join(self.cachedir, filename) + write_file(filepath, data, mode='wb') + + def mtime_metafile(self, citekey): + try: + filepath = os.path.join(self.metadir, citekey + '.yaml') + return os.path.getmtime(filepath) + except OSError: + raise IOError("'{}' not found.".format(filepath)) + + def mtime_bibfile(self, citekey): + try: + filepath = os.path.join(self.bibdir, citekey + '.bib') + return os.path.getmtime(filepath) + except OSError: + raise IOError("'{}' not found.".format(filepath)) + def pull_metafile(self, citekey): filepath = os.path.join(self.metadir, citekey + '.yaml') return read_text_file(filepath) diff --git a/pubs/p3.py b/pubs/p3.py index 16bba58..6bb4d14 100644 --- a/pubs/p3.py +++ b/pubs/p3.py @@ -2,6 +2,7 @@ import io import sys if sys.version_info[0] == 2: + import cPickle as pickle def input(): return raw_input().decode(sys.stdin.encoding or 'utf8', 'ignore') @@ -48,6 +49,7 @@ else: stdio.seek(0) return stdio.read() + import pickle input = input diff --git a/pubs/repo.py b/pubs/repo.py index 5ee69f6..ceec061 100644 --- a/pubs/repo.py +++ b/pubs/repo.py @@ -41,6 +41,9 @@ class Repository(object): self._citekeys = None self.databroker = DataCache(self.conf['main']['pubsdir'], create=create) + def close(self): + self.databroker.close() + @property def citekeys(self): if self._citekeys is None: @@ -106,7 +109,7 @@ class Repository(object): self.databroker.remove_note(citekey, silent=True) except IOError: pass # FIXME: if IOError is about being unable to - # remove the file, we need to issue an error.I + # remove the file, we need to issue an error. self.citekeys.remove(citekey) self.databroker.remove(citekey) diff --git a/tests/fake_env.py b/tests/fake_env.py index ccceb19..7c1f754 100644 --- a/tests/fake_env.py +++ b/tests/fake_env.py @@ -43,14 +43,13 @@ class FakeIO(object): # https://github.com/jmcgeheeiv/pyfakefs/blob/master/pyfakefs/fake_filesystem.py#L2143 return self.fake_open(file, mode=mode, buffering=buffering) - BytesIO = real_io.BytesIO StringIO = real_io.StringIO -def create_fake_fs(module_list): +def create_fake_fs(module_list, nsec_stat=True): - fake_fs = fake_filesystem.FakeFilesystem() + fake_fs = fake_filesystem.FakeFilesystem(nsec_stat=nsec_stat) fake_os = fake_filesystem.FakeOsModule(fake_fs) fake_open = fake_filesystem.FakeFileOpen(fake_fs) fake_shutil = fake_filesystem_shutil.FakeShutilModule(fake_fs) diff --git a/tests/requirements.txt b/tests/requirements.txt new file mode 100644 index 0000000..1981041 --- /dev/null +++ b/tests/requirements.txt @@ -0,0 +1,4 @@ +# those are the additional packages required to run the tests +six +-e git+http://github.com/humm/pyfakefs@feat/float_mtime#egg=pyfakefs +ddt diff --git a/tests/test_usecase.py b/tests/test_usecase.py index 264ab4d..abfaefc 100644 --- a/tests/test_usecase.py +++ b/tests/test_usecase.py @@ -7,6 +7,7 @@ import os import sys import six +import ddt import dotdot import fake_env @@ -22,8 +23,8 @@ from pubs.commands import init_cmd, import_cmd # makes the tests very noisy -PRINT_OUTPUT=False -CAPTURE_OUTPUT=True +PRINT_OUTPUT = False +CAPTURE_OUTPUT = True class FakeSystemExit(Exception): @@ -69,8 +70,8 @@ class CommandTestCase(unittest.TestCase): maxDiff = 1000000 - def setUp(self): - self.fs = fake_env.create_fake_fs([content, filebroker, conf, init_cmd, import_cmd, configobj, update]) + def setUp(self, nsec_stat=True): + self.fs = fake_env.create_fake_fs([content, filebroker, conf, init_cmd, import_cmd, configobj, update], nsec_stat=nsec_stat) self.default_pubs_dir = self.fs['os'].path.expanduser('~/.pubs') self.default_conf_path = self.fs['os'].path.expanduser('~/.pubsrc') @@ -138,8 +139,8 @@ class DataCommandTestCase(CommandTestCase): copying fake data. """ - def setUp(self): - super(DataCommandTestCase, self).setUp() + def setUp(self, nsec_stat=True): + super(DataCommandTestCase, self).setUp(nsec_stat=nsec_stat) fake_env.copy_dir(self.fs, os.path.join(os.path.dirname(__file__), 'data'), 'data') fake_env.copy_dir(self.fs, os.path.join(os.path.dirname(__file__), 'bibexamples'), 'bibexamples') @@ -152,13 +153,13 @@ class TestInit(CommandTestCase): pubsdir = os.path.expanduser('~/pubs_test2') self.execute_cmds(['pubs init -p {}'.format(pubsdir)]) self.assertEqual(set(self.fs['os'].listdir(pubsdir)), - {'bib', 'doc', 'meta', 'notes'}) + {'bib', 'doc', 'meta', 'notes', '.cache'}) def test_init2(self): pubsdir = os.path.expanduser('~/.pubs') self.execute_cmds(['pubs init']) self.assertEqual(set(self.fs['os'].listdir(pubsdir)), - {'bib', 'doc', 'meta', 'notes'}) + {'bib', 'doc', 'meta', 'notes', '.cache'}) def test_init_config(self): self.execute_cmds(['pubs init']) @@ -598,5 +599,50 @@ class TestUsecase(DataCommandTestCase): self.assertFalse(self.fs['os'].path.isfile(self.default_conf_path)) self.assertTrue(self.fs['os'].path.isfile(alt_conf)) + + +@ddt.ddt +class TestCache(DataCommandTestCase): + """\ + Run tests on fake filesystems supporting both integers and nanosecond + stat times. + """ + + def setUp(self): + pass + + @ddt.data(True, False) + def test_remove(self, nsec_stat): + DataCommandTestCase.setUp(self, nsec_stat=nsec_stat) + cmds = ['pubs init', + 'pubs add data/pagerank.bib', + ('pubs remove Page99', ['y']), + 'pubs list', + ] + out = self.execute_cmds(cmds) + self.assertEqual(1, len(out[3].split('\n'))) + + @ddt.data(True, False) + def test_edit(self, nsec_stat): + DataCommandTestCase.setUp(self, nsec_stat=nsec_stat) + + bib = str_fixtures.bibtex_external0 + bib1 = re.sub('year = \{1999\}', 'year = {2007}', bib) + + line = '[Page99] Page, Lawrence et al. "The PageRank Citation Ranking: Bringing Order to the Web." (1999) \n' + line1 = re.sub('1999', '2007', line) + + cmds = ['pubs init', + 'pubs add data/pagerank.bib', + 'pubs list', + ('pubs edit Page99', [bib1]), + 'pubs list', + ] + + out = self.execute_cmds(cmds) + self.assertEqual(line, out[2]) + self.assertEqual(line1, out[4]) + + if __name__ == '__main__': unittest.main()