From 9ad6d8db251247df9782fa714d6c43f6cd2a2240 Mon Sep 17 00:00:00 2001 From: Fabien Benureau Date: Thu, 21 Jan 2016 15:03:48 +0100 Subject: [PATCH 1/7] Simplified fake_env code 1. Removed the unicode wrapper around fake_env io 2. Overrided pyfakefs cStringIO with StringIO (Python 2.x) 3. copy_dir will copy file in binary file to the fake filesystem *unless* they are bib, yaml, txt or md files 4. ui.handle_exception() conserve traceback Numerous unicode subtelties required diving into pyfakefs and configobj source code. Unicode on Python 2.x is the worst. --- pubs/uis.py | 2 +- tests/fake_env.py | 99 ++++++++++++----------------------------------- 2 files changed, 25 insertions(+), 76 deletions(-) diff --git a/pubs/uis.py b/pubs/uis.py index 47b973e..7298e77 100644 --- a/pubs/uis.py +++ b/pubs/uis.py @@ -85,7 +85,7 @@ class PrintUI(object): def handle_exception(self, exc): if self.debug: - raise exc + raise else: self.error(ustr(exc)) self.exit() diff --git a/tests/fake_env.py b/tests/fake_env.py index ef3db67..ccceb19 100644 --- a/tests/fake_env.py +++ b/tests/fake_env.py @@ -9,6 +9,16 @@ import dotdot from pyfakefs import (fake_filesystem, fake_filesystem_shutil, fake_filesystem_glob) +# pyfakefs uses cStringIO under Python 2.x, which does not accept arbitrary unicode strings +# (see https://docs.python.org/2/library/stringio.html#module-cStringIO) +# io.StringIO does not accept `str`, so we're left with the StringIO module +# PS: this nonsense explains why Python 3.x was created. +try: + import StringIO + fake_filesystem.io = StringIO +except ImportError: + pass + from pubs.p3 import input, _fake_stdio, _get_fake_stdio_ucontent from pubs import content, filebroker @@ -21,90 +31,23 @@ real_glob = glob real_io = io - -# def _mod_list(): -# ml = [] -# import pubs -# for importer, modname, ispkg in pkgutil.walk_packages( -# path=pubs.__path__, -# prefix=pubs.__name__ + '.', -# onerror=lambda x: None): -# # HACK to not load textnote -# if not modname.startswith('pubs.plugs.texnote'): -# ml.append((modname, __import__(modname, fromlist='dummy'))) -# return ml - - ENCODING = 'utf8' - -class UnicodeStringIOWrapper(object): - """This is a hack because fake_filesystem does not provide mock of io. - """ - - override = ['read', 'readline', 'readlines', 'write', 'writelines'] - - def __init__(self, strio): - self._strio = strio # The real StringIO - - def __getattr__(self, name): - if name in UnicodeStringIOWrapper.override: - return object.__getattribute__(self, name) - else: - return self._strio.__getattribute__(name) - - def __iter__(self): - for l in self.readlines(): - yield l - - def read(self, *args): - return self._strio.read(*args).decode(ENCODING) - - def readline(self, *args): - return self._strio.readline(*args).decode(ENCODING) - - def readlines(self, *args): - return [l.decode(ENCODING) for l in self._strio.readlines(*args)] - - def write(self, data): - self._strio.write(data.encode(ENCODING)) - - def writelines(self, data): - self._strio.write([l.encode(ENCODING) for l in data]) - - def __enter__(self): - self._strio.__enter__() - return self - - def __exit__(self, *args): - return self._strio.__exit__(*args) - - -def _force_binary_mode(mode): - if 'b' in mode: - return mode # python 2 fix. - # raise ValueError('Open should not happen in binary mode.') - return mode + 'b' - - class FakeIO(object): def __init__(self, fake_open): self.fake_open = fake_open - def open(self, *args, **kwargs): - # Forces binary mode for FakeFileOpen - args = list(args) - if len(args) > 1: - args[1] = _force_binary_mode(args[1]) - else: - kwargs['mode'] = _force_binary_mode(kwargs.get('mode', 'r')) - fakefs_stringio = self.fake_open.Call(*args, **kwargs) - return UnicodeStringIOWrapper(fakefs_stringio) + def open(self, file, mode='r', buffering=-1, encoding=None, errors=None, newline=None, closefd=True): + # encoding is ignored by pyfakefs + # https://github.com/jmcgeheeiv/pyfakefs/blob/master/pyfakefs/fake_filesystem.py#L2143 + return self.fake_open(file, mode=mode, buffering=buffering) + BytesIO = real_io.BytesIO StringIO = real_io.StringIO + def create_fake_fs(module_list): fake_fs = fake_filesystem.FakeFilesystem() @@ -162,8 +105,14 @@ def copy_dir(fs, real_dir, fake_dir = None): real_path = os.path.abspath(real_os.path.join(real_dir, filename)) fake_path = fs['os'].path.join(fake_dir, filename) if real_os.path.isfile(real_path): - with real_open(real_path, 'rb') as f: - fs['fs'].CreateFile(fake_path, contents=f.read()) + _, ext = real_os.path.splitext(filename) + if ext in ['.yaml', '.bib', '.txt', '.md']: + with real_io.open(real_path, 'r', encoding='utf-8') as f: + fs['fs'].CreateFile(fake_path, contents=f.read()) + else: + with real_io.open(real_path, 'rb') as f: + fs['fs'].CreateFile(fake_path, contents=f.read()) + if real_os.path.isdir(real_path): fs['fs'].CreateDirectory(fake_path) copy_dir(fs, real_path, fake_path) From e5b898c5c2d49cbe8dc6a5a1020f8621e43cd987 Mon Sep 17 00:00:00 2001 From: Fabien Benureau Date: Wed, 20 Jan 2016 17:50:56 +0100 Subject: [PATCH 2/7] Cache implemented The implementation is designed to be robust to filesystems having integers or nanosecond stat time. Added a requirements.txt file for the tests: `pip install -r tests/requirements.txt` to install the necessary packages to run the tests. Fix #13, Fix #14 --- pubs/commands/add_cmd.py | 2 + pubs/commands/doc_cmd.py | 2 + pubs/commands/edit_cmd.py | 2 + pubs/commands/export_cmd.py | 2 + pubs/commands/import_cmd.py | 2 + pubs/commands/list_cmd.py | 2 + pubs/commands/note_cmd.py | 1 + pubs/commands/remove_cmd.py | 3 +- pubs/commands/rename_cmd.py | 2 + pubs/commands/tag_cmd.py | 2 + pubs/content.py | 16 ++++-- pubs/databroker.py | 16 +++++- pubs/datacache.py | 104 +++++++++++++++++++++++++++++++----- pubs/filebroker.py | 41 +++++++++++--- pubs/p3.py | 2 + pubs/repo.py | 5 +- tests/fake_env.py | 5 +- tests/requirements.txt | 4 ++ tests/test_usecase.py | 62 ++++++++++++++++++--- 19 files changed, 237 insertions(+), 38 deletions(-) create mode 100644 tests/requirements.txt diff --git a/pubs/commands/add_cmd.py b/pubs/commands/add_cmd.py index 1e787e7..95a07a2 100644 --- a/pubs/commands/add_cmd.py +++ b/pubs/commands/add_cmd.py @@ -141,3 +141,5 @@ def command(conf, args): ui.message('{} was moved to the pubs repository.'.format(docfile)) else: ui.message('{} was copied to the pubs repository.'.format(docfile)) + + rp.close() diff --git a/pubs/commands/doc_cmd.py b/pubs/commands/doc_cmd.py index daa13c0..96bcf0f 100644 --- a/pubs/commands/doc_cmd.py +++ b/pubs/commands/doc_cmd.py @@ -148,3 +148,5 @@ def command(conf, args): except OSError: ui.error("Command does not exist: %s." % with_command) ui.exit(127) + + rp.close() diff --git a/pubs/commands/edit_cmd.py b/pubs/commands/edit_cmd.py index 522b837..60f04d6 100644 --- a/pubs/commands/edit_cmd.py +++ b/pubs/commands/edit_cmd.py @@ -67,3 +67,5 @@ def command(conf, args): break # else edit again # Also handle malformed bibtex and metadata + + rp.close() diff --git a/pubs/commands/export_cmd.py b/pubs/commands/export_cmd.py index 46394c6..54e48f8 100644 --- a/pubs/commands/export_cmd.py +++ b/pubs/commands/export_cmd.py @@ -35,3 +35,5 @@ def command(conf, args): exporter = endecoder.EnDecoder() bibdata_raw = exporter.encode_bibdata(bib) ui.message(bibdata_raw) + + rp.close() diff --git a/pubs/commands/import_cmd.py b/pubs/commands/import_cmd.py index 7450f6e..990fc8c 100644 --- a/pubs/commands/import_cmd.py +++ b/pubs/commands/import_cmd.py @@ -86,3 +86,5 @@ def command(conf, args): else: rp.push_doc(p.citekey, docfile, copy=copy) #FIXME should move the file if configured to do so. + + rp.close() diff --git a/pubs/commands/list_cmd.py b/pubs/commands/list_cmd.py index 9e7063b..6901f08 100644 --- a/pubs/commands/list_cmd.py +++ b/pubs/commands/list_cmd.py @@ -52,6 +52,8 @@ def command(conf, args): pretty.paper_oneliner(p, citekey_only=args.citekeys) for p in papers)) + rp.close() + FIELD_ALIASES = { 'a': 'author', diff --git a/pubs/commands/note_cmd.py b/pubs/commands/note_cmd.py index 895e701..d157238 100644 --- a/pubs/commands/note_cmd.py +++ b/pubs/commands/note_cmd.py @@ -21,3 +21,4 @@ def command(conf, args): citekey = resolve_citekey(rp, args.citekey, ui=ui, exit_on_fail=True) notepath = rp.databroker.real_notepath(citekey) content.edit_file(conf['main']['edit_cmd'], notepath, temporary=False) + rp.close() diff --git a/pubs/commands/remove_cmd.py b/pubs/commands/remove_cmd.py index e673623..83785e2 100644 --- a/pubs/commands/remove_cmd.py +++ b/pubs/commands/remove_cmd.py @@ -2,6 +2,7 @@ from .. import repo from .. import color from ..uis import get_ui from ..utils import resolve_citekey_list +from ..p3 import ustr def parser(subparsers): parser = subparsers.add_parser('remove', help='removes a publication') @@ -31,7 +32,7 @@ def command(conf, args): try: rp.remove_paper(c) except Exception as e: - ui.error(e.message) + ui.error(ustr(e)) failed = True ui.message('The publication(s) [{}] were removed'.format( ', '.join([color.dye_out(c, 'citekey') for c in keys]))) diff --git a/pubs/commands/rename_cmd.py b/pubs/commands/rename_cmd.py index abe9fd6..dd4b903 100644 --- a/pubs/commands/rename_cmd.py +++ b/pubs/commands/rename_cmd.py @@ -28,3 +28,5 @@ def command(conf, args): ui.message("The '{}' citekey has been renamed into '{}'".format( color.dye_out(args.citekey, 'citekey'), color.dye_out(args.new_citekey, 'citekey'))) + + rp.close() diff --git a/pubs/commands/tag_cmd.py b/pubs/commands/tag_cmd.py index 1f05cb3..5ce91a7 100644 --- a/pubs/commands/tag_cmd.py +++ b/pubs/commands/tag_cmd.py @@ -113,3 +113,5 @@ def command(conf, args): ui.message('\n'.join(pretty.paper_oneliner(p) for p in papers_list)) + + rp.close() diff --git a/pubs/content.py b/pubs/content.py index 18982c9..e7b3521 100644 --- a/pubs/content.py +++ b/pubs/content.py @@ -68,14 +68,20 @@ def check_directory(path, fail=True): and _check_system_path_is(u'isdir', syspath, fail=fail)) -def read_text_file(filepath): - check_file(filepath) +def read_text_file(filepath, fail=True): + check_file(filepath, fail=fail) try: with _open(filepath, 'r') as f: content = f.read() except UnicodeDecodeError: raise UnableToDecodeTextFile(filepath) - # Should "raise from". TODO once python 2 is droped. + # Should "raise from", if Python 2 support is dropped. + return content + +def read_binary_file(filepath, fail=True): + check_file(filepath, fail=fail) + with _open(filepath, 'rb') as f: + content = f.read() return content @@ -84,9 +90,9 @@ def remove_file(filepath): os.remove(filepath) -def write_file(filepath, data): +def write_file(filepath, data, mode='w'): check_directory(os.path.dirname(filepath)) - with _open(filepath, 'w') as f: + with _open(filepath, mode) as f: f.write(data) diff --git a/pubs/databroker.py b/pubs/databroker.py index c98f1f9..12c6eda 100644 --- a/pubs/databroker.py +++ b/pubs/databroker.py @@ -1,6 +1,6 @@ from . import filebroker from . import endecoder - +from .p3 import pickle class DataBroker(object): """ DataBroker class @@ -15,6 +15,20 @@ class DataBroker(object): self.docbroker = filebroker.DocBroker(directory, scheme='docsdir', subdir='doc') self.notebroker = filebroker.DocBroker(directory, scheme='notesdir', subdir='notes') + # cache + + def close(self): + pass + + def pull_cache(self, name): + """Load cache data from distk. Exceptions are handled by the caller.""" + data_raw = self.filebroker.pull_cachefile(name) + return pickle.loads(data_raw) + + def push_cache(self, name, data): + data_raw = pickle.dumps(data) + self.filebroker.push_cachefile(name, data_raw) + # filebroker+endecoder def pull_metadata(self, citekey): diff --git a/pubs/datacache.py b/pubs/datacache.py index 2c3c5a4..e7738d9 100644 --- a/pubs/datacache.py +++ b/pubs/datacache.py @@ -1,6 +1,16 @@ +import os +import time from . import databroker + +class CacheEntry(object): + + def __init__(self, data, timestamp): + self.data = data + self.timestamp = timestamp + + class DataCache(object): """ DataCache class, provides a very similar interface as DataBroker @@ -16,9 +26,18 @@ class DataCache(object): def __init__(self, directory, create=False): self.directory = directory self._databroker = None + self._metacache = None + self._metacache_modified = False # if True, cache will need to be written to disk. + self._bibcache = None + self._bibcache_modified = False # if True, cache will need to be written to disk. + # does the filesystem supports subsecond stat time? + self.nsec_support = os.stat('.').st_mtime != int(os.stat('.').st_mtime) if create: self._create() + def close(self): + self.flush_cache() + @property def databroker(self): if self._databroker is None: @@ -28,23 +47,93 @@ class DataCache(object): def _create(self): self._databroker = databroker.DataBroker(self.directory, create=True) + @property + def metacache(self): + if self._metacache is None: + try: + self._metacache = self.databroker.pull_cache('metacache') + except IOError: + self._metacache = {} + return self._metacache + + @property + def bibcache(self): + if self._bibcache is None: + try: + self._bibcache = self.databroker.pull_cache('bibcache') + except IOError: + self._bibcache = {} + return self._bibcache + + def flush_cache(self, force=False): + """Write cache to disk""" + if force or self._metacache_modified: + self.databroker.push_cache('metacache', self.metacache) + self._metacache_modified = False + if force or self._bibcache_modified: + self.databroker.push_cache('bibcache', self.bibcache) + self._bibcache_modified = False + def pull_metadata(self, citekey): - return self.databroker.pull_metadata(citekey) + mtime = self.databroker.filebroker.mtime_metafile(citekey) + if citekey in self.metacache: + cached_metadata = self.metacache[citekey] + boundary = mtime if self.nsec_support else mtime + 1 + if cached_metadata.timestamp >= boundary: + return cached_metadata.data + + # if we get here, we must update the cache. + t = time.time() + data = self.databroker.pull_metadata(citekey) + self.metacache[citekey] = CacheEntry(data, t) + self._metacache_modified = True + return self.metacache[citekey].data def pull_bibentry(self, citekey): - return self.databroker.pull_bibentry(citekey) + mtime = self.databroker.filebroker.mtime_bibfile(citekey) + if citekey in self.bibcache: + cached_bibdata = self.bibcache[citekey] + boundary = mtime if self.nsec_support else mtime + 1 + if cached_bibdata.timestamp >= boundary: + return cached_bibdata.data + + # if we get here, we must update the cache. + t = time.time() + data = self.databroker.pull_bibentry(citekey) + self.bibcache[citekey] = CacheEntry(data, t) + self._bibcache_modified = True + return self.bibcache[citekey].data def push_metadata(self, citekey, metadata): self.databroker.push_metadata(citekey, metadata) + mtime = self.databroker.filebroker.mtime_metafile(citekey) + #print('push', mtime, metadata) + self.metacache[citekey] = CacheEntry(metadata, mtime) + self._metacache_modified = True def push_bibentry(self, citekey, bibdata): self.databroker.push_bibentry(citekey, bibdata) + mtime = self.databroker.filebroker.mtime_bibfile(citekey) + self.bibcache[citekey] = CacheEntry(bibdata, mtime) + self._bibcache_modified = True def push(self, citekey, metadata, bibdata): self.databroker.push(citekey, metadata, bibdata) + mtime = self.databroker.filebroker.mtime_metafile(citekey) + self.metacache[citekey] = CacheEntry(metadata, mtime) + self._metacache_modified = True + mtime = self.databroker.filebroker.mtime_bibfile(citekey) + self.bibcache[citekey] = CacheEntry(bibdata, mtime) + self._bibcache_modified = True def remove(self, citekey): self.databroker.remove(citekey) + if citekey in self.metacache: + self.metacache.pop(citekey) + self._metacache_modified = True + if citekey in self.bibcache: + self.bibcache.pop(citekey) + self._bibcache_modified = True def exists(self, citekey, meta_check=False): return self.databroker.exists(citekey, meta_check=meta_check) @@ -85,14 +174,3 @@ class DataCache(object): def rename_note(self, old_citekey, new_citekey): return self.databroker.rename_note(old_citekey, new_citekey) - - -# class ChangeTracker(object): - -# def __init__(self, cache, directory): -# self.cache = cache -# self.directory = directory - -# def changes(self): -# """ Returns the list of modified files since the last cache was saved to disk""" -# pass diff --git a/pubs/filebroker.py b/pubs/filebroker.py index 1d59fbb..934aaad 100644 --- a/pubs/filebroker.py +++ b/pubs/filebroker.py @@ -6,6 +6,8 @@ from .content import (check_file, check_directory, read_text_file, write_file, system_path, check_content, content_type, get_content, copy_content) +from . import content + def filter_filename(filename, ext): """ Return the filename without the extension if the extension matches ext. @@ -24,23 +26,50 @@ class FileBroker(object): """ def __init__(self, directory, create=False): - self.directory = directory - self.metadir = os.path.join(self.directory, 'meta') - self.bibdir = os.path.join(self.directory, 'bib') + self.directory = os.path.expanduser(directory) + self.metadir = os.path.join(self.directory, 'meta') + self.bibdir = os.path.join(self.directory, 'bib') + self.cachedir = os.path.join(self.directory, '.cache') if create: self._create() check_directory(self.directory) check_directory(self.metadir) check_directory(self.bibdir) + # cache directory is created (if absent) if other directories exists. + if not check_directory(self.cachedir, fail=False): + os.mkdir(system_path(self.cachedir)) def _create(self): - if not check_directory(self.directory, fail = False): + """Create meta and bib directories if absent""" + if not check_directory(self.directory, fail=False): os.mkdir(system_path(self.directory)) - if not check_directory(self.metadir, fail = False): + if not check_directory(self.metadir, fail=False): os.mkdir(system_path(self.metadir)) - if not check_directory(self.bibdir, fail = False): + if not check_directory(self.bibdir, fail=False): os.mkdir(system_path(self.bibdir)) + def pull_cachefile(self, filename): + filepath = os.path.join(self.cachedir, filename) + return content.read_binary_file(filepath) + + def push_cachefile(self, filename, data): + filepath = os.path.join(self.cachedir, filename) + write_file(filepath, data, mode='wb') + + def mtime_metafile(self, citekey): + try: + filepath = os.path.join(self.metadir, citekey + '.yaml') + return os.path.getmtime(filepath) + except OSError: + raise IOError("'{}' not found.".format(filepath)) + + def mtime_bibfile(self, citekey): + try: + filepath = os.path.join(self.bibdir, citekey + '.bib') + return os.path.getmtime(filepath) + except OSError: + raise IOError("'{}' not found.".format(filepath)) + def pull_metafile(self, citekey): filepath = os.path.join(self.metadir, citekey + '.yaml') return read_text_file(filepath) diff --git a/pubs/p3.py b/pubs/p3.py index 16bba58..6bb4d14 100644 --- a/pubs/p3.py +++ b/pubs/p3.py @@ -2,6 +2,7 @@ import io import sys if sys.version_info[0] == 2: + import cPickle as pickle def input(): return raw_input().decode(sys.stdin.encoding or 'utf8', 'ignore') @@ -48,6 +49,7 @@ else: stdio.seek(0) return stdio.read() + import pickle input = input diff --git a/pubs/repo.py b/pubs/repo.py index 5ee69f6..ceec061 100644 --- a/pubs/repo.py +++ b/pubs/repo.py @@ -41,6 +41,9 @@ class Repository(object): self._citekeys = None self.databroker = DataCache(self.conf['main']['pubsdir'], create=create) + def close(self): + self.databroker.close() + @property def citekeys(self): if self._citekeys is None: @@ -106,7 +109,7 @@ class Repository(object): self.databroker.remove_note(citekey, silent=True) except IOError: pass # FIXME: if IOError is about being unable to - # remove the file, we need to issue an error.I + # remove the file, we need to issue an error. self.citekeys.remove(citekey) self.databroker.remove(citekey) diff --git a/tests/fake_env.py b/tests/fake_env.py index ccceb19..7c1f754 100644 --- a/tests/fake_env.py +++ b/tests/fake_env.py @@ -43,14 +43,13 @@ class FakeIO(object): # https://github.com/jmcgeheeiv/pyfakefs/blob/master/pyfakefs/fake_filesystem.py#L2143 return self.fake_open(file, mode=mode, buffering=buffering) - BytesIO = real_io.BytesIO StringIO = real_io.StringIO -def create_fake_fs(module_list): +def create_fake_fs(module_list, nsec_stat=True): - fake_fs = fake_filesystem.FakeFilesystem() + fake_fs = fake_filesystem.FakeFilesystem(nsec_stat=nsec_stat) fake_os = fake_filesystem.FakeOsModule(fake_fs) fake_open = fake_filesystem.FakeFileOpen(fake_fs) fake_shutil = fake_filesystem_shutil.FakeShutilModule(fake_fs) diff --git a/tests/requirements.txt b/tests/requirements.txt new file mode 100644 index 0000000..1981041 --- /dev/null +++ b/tests/requirements.txt @@ -0,0 +1,4 @@ +# those are the additional packages required to run the tests +six +-e git+http://github.com/humm/pyfakefs@feat/float_mtime#egg=pyfakefs +ddt diff --git a/tests/test_usecase.py b/tests/test_usecase.py index 264ab4d..abfaefc 100644 --- a/tests/test_usecase.py +++ b/tests/test_usecase.py @@ -7,6 +7,7 @@ import os import sys import six +import ddt import dotdot import fake_env @@ -22,8 +23,8 @@ from pubs.commands import init_cmd, import_cmd # makes the tests very noisy -PRINT_OUTPUT=False -CAPTURE_OUTPUT=True +PRINT_OUTPUT = False +CAPTURE_OUTPUT = True class FakeSystemExit(Exception): @@ -69,8 +70,8 @@ class CommandTestCase(unittest.TestCase): maxDiff = 1000000 - def setUp(self): - self.fs = fake_env.create_fake_fs([content, filebroker, conf, init_cmd, import_cmd, configobj, update]) + def setUp(self, nsec_stat=True): + self.fs = fake_env.create_fake_fs([content, filebroker, conf, init_cmd, import_cmd, configobj, update], nsec_stat=nsec_stat) self.default_pubs_dir = self.fs['os'].path.expanduser('~/.pubs') self.default_conf_path = self.fs['os'].path.expanduser('~/.pubsrc') @@ -138,8 +139,8 @@ class DataCommandTestCase(CommandTestCase): copying fake data. """ - def setUp(self): - super(DataCommandTestCase, self).setUp() + def setUp(self, nsec_stat=True): + super(DataCommandTestCase, self).setUp(nsec_stat=nsec_stat) fake_env.copy_dir(self.fs, os.path.join(os.path.dirname(__file__), 'data'), 'data') fake_env.copy_dir(self.fs, os.path.join(os.path.dirname(__file__), 'bibexamples'), 'bibexamples') @@ -152,13 +153,13 @@ class TestInit(CommandTestCase): pubsdir = os.path.expanduser('~/pubs_test2') self.execute_cmds(['pubs init -p {}'.format(pubsdir)]) self.assertEqual(set(self.fs['os'].listdir(pubsdir)), - {'bib', 'doc', 'meta', 'notes'}) + {'bib', 'doc', 'meta', 'notes', '.cache'}) def test_init2(self): pubsdir = os.path.expanduser('~/.pubs') self.execute_cmds(['pubs init']) self.assertEqual(set(self.fs['os'].listdir(pubsdir)), - {'bib', 'doc', 'meta', 'notes'}) + {'bib', 'doc', 'meta', 'notes', '.cache'}) def test_init_config(self): self.execute_cmds(['pubs init']) @@ -598,5 +599,50 @@ class TestUsecase(DataCommandTestCase): self.assertFalse(self.fs['os'].path.isfile(self.default_conf_path)) self.assertTrue(self.fs['os'].path.isfile(alt_conf)) + + +@ddt.ddt +class TestCache(DataCommandTestCase): + """\ + Run tests on fake filesystems supporting both integers and nanosecond + stat times. + """ + + def setUp(self): + pass + + @ddt.data(True, False) + def test_remove(self, nsec_stat): + DataCommandTestCase.setUp(self, nsec_stat=nsec_stat) + cmds = ['pubs init', + 'pubs add data/pagerank.bib', + ('pubs remove Page99', ['y']), + 'pubs list', + ] + out = self.execute_cmds(cmds) + self.assertEqual(1, len(out[3].split('\n'))) + + @ddt.data(True, False) + def test_edit(self, nsec_stat): + DataCommandTestCase.setUp(self, nsec_stat=nsec_stat) + + bib = str_fixtures.bibtex_external0 + bib1 = re.sub('year = \{1999\}', 'year = {2007}', bib) + + line = '[Page99] Page, Lawrence et al. "The PageRank Citation Ranking: Bringing Order to the Web." (1999) \n' + line1 = re.sub('1999', '2007', line) + + cmds = ['pubs init', + 'pubs add data/pagerank.bib', + 'pubs list', + ('pubs edit Page99', [bib1]), + 'pubs list', + ] + + out = self.execute_cmds(cmds) + self.assertEqual(line, out[2]) + self.assertEqual(line1, out[4]) + + if __name__ == '__main__': unittest.main() From ee3a7cd77d1d3e4ebe3f3ef51ad69ef06895e3d6 Mon Sep 17 00:00:00 2001 From: Fabien Benureau Date: Sat, 23 Jan 2016 02:11:08 +0100 Subject: [PATCH 3/7] Travis uses tests/requirements.txt --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 850b4e8..e990ebc 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,7 +6,7 @@ python: - "3.5" # command to install dependencies install: - - "pip install pyfakefs" + - "pip install -r tests/requirements.txt" - "python setup.py install" # command to run tests script: python -m unittest discover From 22c7acde999679ebb98e1e2423c3ccba7478aa1d Mon Sep 17 00:00:00 2001 From: Fabien Benureau Date: Sat, 23 Jan 2016 02:31:20 +0100 Subject: [PATCH 4/7] Rewrite cache in case of any problem --- pubs/datacache.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pubs/datacache.py b/pubs/datacache.py index e7738d9..852359b 100644 --- a/pubs/datacache.py +++ b/pubs/datacache.py @@ -52,7 +52,7 @@ class DataCache(object): if self._metacache is None: try: self._metacache = self.databroker.pull_cache('metacache') - except IOError: + except Exception as e: # take no prisonners; if something is wrong, no cache. self._metacache = {} return self._metacache @@ -61,7 +61,7 @@ class DataCache(object): if self._bibcache is None: try: self._bibcache = self.databroker.pull_cache('bibcache') - except IOError: + except Exception as e: self._bibcache = {} return self._bibcache From d9f24052fcb7342558ac96375edb72fe707f36cb Mon Sep 17 00:00:00 2001 From: Fabien Benureau Date: Sun, 31 Jan 2016 06:57:04 +0100 Subject: [PATCH 5/7] Update bs4 code; removes warning --- pubs/apis.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pubs/apis.py b/pubs/apis.py index cb1046f..8c95126 100644 --- a/pubs/apis.py +++ b/pubs/apis.py @@ -17,8 +17,7 @@ def isbn2bibtex(isbn): url = 'http://www.ottobib.com/isbn/{}/bibtex'.format(isbn) r = requests.get(url) - soup = BeautifulSoup(r.text) + soup = BeautifulSoup(r.text, "html.parser") citation = soup.find("textarea").text return citation - From 57a32e5601a504ef2f4c48cde4064016a40141e9 Mon Sep 17 00:00:00 2001 From: Olivier Mangin Date: Tue, 2 Feb 2016 19:06:34 -0500 Subject: [PATCH 6/7] Removes some code duplication. --- pubs/datacache.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/pubs/datacache.py b/pubs/datacache.py index 852359b..291ab1b 100644 --- a/pubs/datacache.py +++ b/pubs/datacache.py @@ -47,22 +47,22 @@ class DataCache(object): def _create(self): self._databroker = databroker.DataBroker(self.directory, create=True) + def _try_pull_cache(self, name): + try: + return self.databroker.pull_cache(name) + except Exception as e: # take no prisonners; if something is wrong, no cache. + return {} + @property def metacache(self): if self._metacache is None: - try: - self._metacache = self.databroker.pull_cache('metacache') - except Exception as e: # take no prisonners; if something is wrong, no cache. - self._metacache = {} + self._metacache = self._try_pull_cache('metacache') return self._metacache @property def bibcache(self): if self._bibcache is None: - try: - self._bibcache = self.databroker.pull_cache('bibcache') - except Exception as e: - self._bibcache = {} + self._bibcache = self._try_pull_cache('bibcache') return self._bibcache def flush_cache(self, force=False): @@ -74,12 +74,15 @@ class DataCache(object): self.databroker.push_cache('bibcache', self.bibcache) self._bibcache_modified = False + def _is_uptodate(self, cached_data, file_mtime): + boundary = file_mtime if self.nsec_support else file_mtime + 1 + return cached_data.timestamp <= boundary + def pull_metadata(self, citekey): mtime = self.databroker.filebroker.mtime_metafile(citekey) if citekey in self.metacache: cached_metadata = self.metacache[citekey] - boundary = mtime if self.nsec_support else mtime + 1 - if cached_metadata.timestamp >= boundary: + if self._is_uptodate(cached_metadata, mtime): return cached_metadata.data # if we get here, we must update the cache. @@ -93,8 +96,7 @@ class DataCache(object): mtime = self.databroker.filebroker.mtime_bibfile(citekey) if citekey in self.bibcache: cached_bibdata = self.bibcache[citekey] - boundary = mtime if self.nsec_support else mtime + 1 - if cached_bibdata.timestamp >= boundary: + if self._is_uptodate(cached_bibdata, mtime): return cached_bibdata.data # if we get here, we must update the cache. From 2859e4bc62c2e29ef0f7b293f49df87ef2737754 Mon Sep 17 00:00:00 2001 From: Olivier Mangin Date: Tue, 2 Feb 2016 21:21:24 -0500 Subject: [PATCH 7/7] More refactoring to remove code duplication. Also adds tests. --- pubs/datacache.py | 156 ++++++++++++++++++++++------------------ tests/test_datacache.py | 144 +++++++++++++++++++++++++++++++++++++ 2 files changed, 230 insertions(+), 70 deletions(-) create mode 100644 tests/test_datacache.py diff --git a/pubs/datacache.py b/pubs/datacache.py index 291ab1b..3787dda 100644 --- a/pubs/datacache.py +++ b/pubs/datacache.py @@ -11,6 +11,77 @@ class CacheEntry(object): self.timestamp = timestamp +class CacheEntrySet(object): + + def __init__(self, databroker, name): + self.databroker = databroker + self.name = name + if name == 'metacache': + self._pull_fun = databroker.pull_metadata + self._push_fun = databroker.push_metadata + self._mtime_fun = databroker.filebroker.mtime_metafile + elif name == 'bibcache': + self._pull_fun = databroker.pull_bibentry + self._push_fun = databroker.push_bibentry + self._mtime_fun = databroker.filebroker.mtime_bibfile + else: + raise ValueError + self._entries = None + self.modified = False + # does the filesystem supports subsecond stat time? + self.nsec_support = os.stat('.').st_mtime != int(os.stat('.').st_mtime) + + @property + def entries(self): + if self._entries is None: + self._entries = self._try_pull_cache() + return self._entries + + def flush(self, force=False): + if force or self.modified: + self.databroker.push_cache(self.name, self.entries) + self.modified = False + + def pull(self, citekey): + if self._is_outdated(citekey): + # if we get here, we must update the cache. + t = time.time() + data = self._pull_fun(citekey) + self.entries[citekey] = CacheEntry(data, t) + self.modified = True + return self.entries[citekey].data + + def push(self, citekey, data): + self._push_fun(citekey, data) + self.push_to_cache(citekey, data) + + def push_to_cache(self, citekey, data): + """Push to cash only.""" + mtime = self._mtime_fun(citekey) + self.entries[citekey] = CacheEntry(data, mtime) + self.modified = True + + def remove_from_cache(self, citekey): + """Removes from cache only.""" + if citekey in self.entries: + self.entries.pop(citekey) + self.modified = True + + def _try_pull_cache(self): + try: + return self.databroker.pull_cache(self.name) + except Exception: # take no prisonners; if something is wrong, no cache. + return {} + + def _is_outdated(self, citekey): + if citekey in self.entries: + mtime = self._mtime_fun(citekey) + boundary = mtime if self.nsec_support else mtime + 1 + return self.entries[citekey].timestamp < boundary + else: + return True + + class DataCache(object): """ DataCache class, provides a very similar interface as DataBroker @@ -27,11 +98,7 @@ class DataCache(object): self.directory = directory self._databroker = None self._metacache = None - self._metacache_modified = False # if True, cache will need to be written to disk. self._bibcache = None - self._bibcache_modified = False # if True, cache will need to be written to disk. - # does the filesystem supports subsecond stat time? - self.nsec_support = os.stat('.').st_mtime != int(os.stat('.').st_mtime) if create: self._create() @@ -44,98 +111,47 @@ class DataCache(object): self._databroker = databroker.DataBroker(self.directory, create=False) return self._databroker - def _create(self): - self._databroker = databroker.DataBroker(self.directory, create=True) - - def _try_pull_cache(self, name): - try: - return self.databroker.pull_cache(name) - except Exception as e: # take no prisonners; if something is wrong, no cache. - return {} - @property def metacache(self): if self._metacache is None: - self._metacache = self._try_pull_cache('metacache') + self._metacache = CacheEntrySet(self.databroker, 'metacache') return self._metacache @property def bibcache(self): if self._bibcache is None: - self._bibcache = self._try_pull_cache('bibcache') + self._bibcache = CacheEntrySet(self.databroker, 'bibcache') return self._bibcache + def _create(self): + self._databroker = databroker.DataBroker(self.directory, create=True) + def flush_cache(self, force=False): """Write cache to disk""" - if force or self._metacache_modified: - self.databroker.push_cache('metacache', self.metacache) - self._metacache_modified = False - if force or self._bibcache_modified: - self.databroker.push_cache('bibcache', self.bibcache) - self._bibcache_modified = False - - def _is_uptodate(self, cached_data, file_mtime): - boundary = file_mtime if self.nsec_support else file_mtime + 1 - return cached_data.timestamp <= boundary + self.metacache.flush(force=force) + self.bibcache.flush(force=force) def pull_metadata(self, citekey): - mtime = self.databroker.filebroker.mtime_metafile(citekey) - if citekey in self.metacache: - cached_metadata = self.metacache[citekey] - if self._is_uptodate(cached_metadata, mtime): - return cached_metadata.data - - # if we get here, we must update the cache. - t = time.time() - data = self.databroker.pull_metadata(citekey) - self.metacache[citekey] = CacheEntry(data, t) - self._metacache_modified = True - return self.metacache[citekey].data + return self.metacache.pull(citekey) def pull_bibentry(self, citekey): - mtime = self.databroker.filebroker.mtime_bibfile(citekey) - if citekey in self.bibcache: - cached_bibdata = self.bibcache[citekey] - if self._is_uptodate(cached_bibdata, mtime): - return cached_bibdata.data - - # if we get here, we must update the cache. - t = time.time() - data = self.databroker.pull_bibentry(citekey) - self.bibcache[citekey] = CacheEntry(data, t) - self._bibcache_modified = True - return self.bibcache[citekey].data + return self.bibcache.pull(citekey) def push_metadata(self, citekey, metadata): - self.databroker.push_metadata(citekey, metadata) - mtime = self.databroker.filebroker.mtime_metafile(citekey) - #print('push', mtime, metadata) - self.metacache[citekey] = CacheEntry(metadata, mtime) - self._metacache_modified = True + self.metacache.push(citekey, metadata) def push_bibentry(self, citekey, bibdata): - self.databroker.push_bibentry(citekey, bibdata) - mtime = self.databroker.filebroker.mtime_bibfile(citekey) - self.bibcache[citekey] = CacheEntry(bibdata, mtime) - self._bibcache_modified = True + self.bibcache.push(citekey, bibdata) def push(self, citekey, metadata, bibdata): self.databroker.push(citekey, metadata, bibdata) - mtime = self.databroker.filebroker.mtime_metafile(citekey) - self.metacache[citekey] = CacheEntry(metadata, mtime) - self._metacache_modified = True - mtime = self.databroker.filebroker.mtime_bibfile(citekey) - self.bibcache[citekey] = CacheEntry(bibdata, mtime) - self._bibcache_modified = True + self.metacache.push_to_cache(citekey, metadata) + self.bibcache.push_to_cache(citekey, bibdata) def remove(self, citekey): self.databroker.remove(citekey) - if citekey in self.metacache: - self.metacache.pop(citekey) - self._metacache_modified = True - if citekey in self.bibcache: - self.bibcache.pop(citekey) - self._bibcache_modified = True + self.metacache.remove_from_cache(citekey) + self.bibcache.remove_from_cache(citekey) def exists(self, citekey, meta_check=False): return self.databroker.exists(citekey, meta_check=meta_check) diff --git a/tests/test_datacache.py b/tests/test_datacache.py new file mode 100644 index 0000000..40641d6 --- /dev/null +++ b/tests/test_datacache.py @@ -0,0 +1,144 @@ +# -*- coding: utf-8 -*- +import unittest +import time + +import dotdot +import fake_env + +from pubs.datacache import CacheEntrySet + + +class FakeFileBrokerMeta(object): + + mtime = None + + def mtime_metafile(self, key): + return self.mtime + + +class FakeFileBrokerBib(object): + + mtime = None + + def mtime_bibfile(self, key): + return self.mtime + + +class FakeDataBrokerMeta(object): + + meta = None + + def pull_metadata(self, key): + return self.meta + + def push_metadata(self, key, meta): + self.meta = meta + + def push_cache(name, entries): + if name != 'metacache': + raise AttributeError + + +class FakeDataBrokerBib(object): + + bib = None + + def pull_bibentry(self, key): + return self.bib + + def push_bibentry(self, key, bib): + self.bib = bib + + def push_cache(name, entries): + if name != 'bibcache': + raise AttributeError + + +class TestCacheEntrySet(unittest.TestCase): + + def setUp(self): + self.databroker_meta = FakeDataBrokerMeta() + self.databroker_meta.filebroker = FakeFileBrokerMeta() + self.databroker_bib = FakeDataBrokerBib() + self.databroker_bib.filebroker = FakeFileBrokerBib() + self.metacache = CacheEntrySet(self.databroker_meta, 'metacache') + self.bibcache = CacheEntrySet(self.databroker_bib, 'bibcache') + + def test_metacache_does_not_call_bib_functions(self): + with self.assertRaises(AttributeError): + CacheEntrySet(self.databroker_bib, 'metacache') + + def test_bibcache_does_not_call_bib_functions(self): + with self.assertRaises(AttributeError): + CacheEntrySet(self.databroker_meta, 'bibcache') + + def test_push_pushes_to_file(self): + self.metacache.push('a', 'b') + self.assertEqual(self.databroker_meta.meta, 'b') + self.bibcache.push('a', 'b') + self.assertEqual(self.databroker_bib.bib, 'b') + + def test_push_to_cache_does_not_push_to_file(self): + self.metacache.push_to_cache('a', 'b') + self.assertIs(self.databroker_meta.meta, None) + self.bibcache.push_to_cache('a', 'b') + self.assertIs(self.databroker_bib.bib, None) + + def test_push_to_cache_pushes_to_cache(self): + self.metacache.push_to_cache('a', 'b') + self.assertIs(self.metacache.entries['a'].data, 'b') + self.bibcache.push_to_cache('a', 'b') + self.assertIs(self.bibcache.entries['a'].data, 'b') + + def test_pulls_from_file(self): + self.databroker_meta.meta = 'b' + value = self.metacache.pull('a') + self.assertEqual(value, 'b') + self.databroker_bib.bib = 'b' + value = self.bibcache.pull('a') + self.assertEqual(value, 'b') + + def test_pulls_from_cache(self): + self.databroker_meta.meta = 'b' + self.databroker_meta.filebroker.mtime = time.time() + self.metacache.push_to_cache('a', 'c') + self.databroker_meta.filebroker.mtime = time.time() - 1.1 + value = self.metacache.pull('a') + self.assertEqual(value, 'c') + self.databroker_meta.filebroker.mtime = time.time() + self.databroker_bib.bib = 'b' + self.databroker_bib.filebroker.mtime = time.time() - 1.1 + self.bibcache.push_to_cache('a', 'c') + value = self.bibcache.pull('a') + self.assertEqual(value, 'c') + + def test_pulls_outdated_from_file(self): + self.databroker_meta.meta = 'b' + self.databroker_meta.filebroker.mtime = time.time() + self.metacache.push_to_cache('a', 'c') + self.databroker_meta.filebroker.mtime = time.time() + 1.1 + value = self.metacache.pull('a') + self.assertEqual(value, 'b') + self.databroker_bib.bib = 'b' + self.databroker_bib.filebroker.mtime = time.time() + self.bibcache.push_to_cache('a', 'c') + self.databroker_bib.filebroker.mtime = time.time() + 1.1 + value = self.bibcache.pull('a') + self.assertEqual(value, 'b') + + + def test_is_outdated_when_unknown_citekey(self): + self.assertTrue(self.metacache._is_outdated('a')) + + def test_is_outdated_when_newer_mtime(self): + # Actually tests for mtime in future + self.bibcache.push_to_cache('a', 'b') + self.databroker_meta.filebroker.mtime = time.time() + 1.1 + self.assertTrue(self.metacache._is_outdated('a')) + + def test_is_not_outdated_when_older_mtime(self): + # Actually tests for mtime in future + self.databroker_meta.filebroker.mtime = time.time() + self.metacache.push_to_cache('a', 'b') + self.databroker_meta.filebroker.mtime = time.time() - 1.1 + self.assertFalse(self.metacache._is_outdated('a'))