Cache implemented

The implementation is designed to be robust to filesystems having integers
or nanosecond stat time.

Added a requirements.txt file for the tests:
`pip install -r tests/requirements.txt` to install the necessary packages
to run the tests.

Fix #13, Fix #14
main
Fabien Benureau 9 years ago
parent 9ad6d8db25
commit e5b898c5c2

@ -141,3 +141,5 @@ def command(conf, args):
ui.message('{} was moved to the pubs repository.'.format(docfile))
else:
ui.message('{} was copied to the pubs repository.'.format(docfile))
rp.close()

@ -148,3 +148,5 @@ def command(conf, args):
except OSError:
ui.error("Command does not exist: %s." % with_command)
ui.exit(127)
rp.close()

@ -67,3 +67,5 @@ def command(conf, args):
break
# else edit again
# Also handle malformed bibtex and metadata
rp.close()

@ -35,3 +35,5 @@ def command(conf, args):
exporter = endecoder.EnDecoder()
bibdata_raw = exporter.encode_bibdata(bib)
ui.message(bibdata_raw)
rp.close()

@ -86,3 +86,5 @@ def command(conf, args):
else:
rp.push_doc(p.citekey, docfile, copy=copy)
#FIXME should move the file if configured to do so.
rp.close()

@ -52,6 +52,8 @@ def command(conf, args):
pretty.paper_oneliner(p, citekey_only=args.citekeys)
for p in papers))
rp.close()
FIELD_ALIASES = {
'a': 'author',

@ -21,3 +21,4 @@ def command(conf, args):
citekey = resolve_citekey(rp, args.citekey, ui=ui, exit_on_fail=True)
notepath = rp.databroker.real_notepath(citekey)
content.edit_file(conf['main']['edit_cmd'], notepath, temporary=False)
rp.close()

@ -2,6 +2,7 @@ from .. import repo
from .. import color
from ..uis import get_ui
from ..utils import resolve_citekey_list
from ..p3 import ustr
def parser(subparsers):
parser = subparsers.add_parser('remove', help='removes a publication')
@ -31,7 +32,7 @@ def command(conf, args):
try:
rp.remove_paper(c)
except Exception as e:
ui.error(e.message)
ui.error(ustr(e))
failed = True
ui.message('The publication(s) [{}] were removed'.format(
', '.join([color.dye_out(c, 'citekey') for c in keys])))

@ -28,3 +28,5 @@ def command(conf, args):
ui.message("The '{}' citekey has been renamed into '{}'".format(
color.dye_out(args.citekey, 'citekey'),
color.dye_out(args.new_citekey, 'citekey')))
rp.close()

@ -113,3 +113,5 @@ def command(conf, args):
ui.message('\n'.join(pretty.paper_oneliner(p)
for p in papers_list))
rp.close()

@ -68,14 +68,20 @@ def check_directory(path, fail=True):
and _check_system_path_is(u'isdir', syspath, fail=fail))
def read_text_file(filepath):
check_file(filepath)
def read_text_file(filepath, fail=True):
check_file(filepath, fail=fail)
try:
with _open(filepath, 'r') as f:
content = f.read()
except UnicodeDecodeError:
raise UnableToDecodeTextFile(filepath)
# Should "raise from". TODO once python 2 is droped.
# Should "raise from", if Python 2 support is dropped.
return content
def read_binary_file(filepath, fail=True):
check_file(filepath, fail=fail)
with _open(filepath, 'rb') as f:
content = f.read()
return content
@ -84,9 +90,9 @@ def remove_file(filepath):
os.remove(filepath)
def write_file(filepath, data):
def write_file(filepath, data, mode='w'):
check_directory(os.path.dirname(filepath))
with _open(filepath, 'w') as f:
with _open(filepath, mode) as f:
f.write(data)

@ -1,6 +1,6 @@
from . import filebroker
from . import endecoder
from .p3 import pickle
class DataBroker(object):
""" DataBroker class
@ -15,6 +15,20 @@ class DataBroker(object):
self.docbroker = filebroker.DocBroker(directory, scheme='docsdir', subdir='doc')
self.notebroker = filebroker.DocBroker(directory, scheme='notesdir', subdir='notes')
# cache
def close(self):
pass
def pull_cache(self, name):
"""Load cache data from distk. Exceptions are handled by the caller."""
data_raw = self.filebroker.pull_cachefile(name)
return pickle.loads(data_raw)
def push_cache(self, name, data):
data_raw = pickle.dumps(data)
self.filebroker.push_cachefile(name, data_raw)
# filebroker+endecoder
def pull_metadata(self, citekey):

@ -1,6 +1,16 @@
import os
import time
from . import databroker
class CacheEntry(object):
def __init__(self, data, timestamp):
self.data = data
self.timestamp = timestamp
class DataCache(object):
""" DataCache class, provides a very similar interface as DataBroker
@ -16,9 +26,18 @@ class DataCache(object):
def __init__(self, directory, create=False):
self.directory = directory
self._databroker = None
self._metacache = None
self._metacache_modified = False # if True, cache will need to be written to disk.
self._bibcache = None
self._bibcache_modified = False # if True, cache will need to be written to disk.
# does the filesystem supports subsecond stat time?
self.nsec_support = os.stat('.').st_mtime != int(os.stat('.').st_mtime)
if create:
self._create()
def close(self):
self.flush_cache()
@property
def databroker(self):
if self._databroker is None:
@ -28,23 +47,93 @@ class DataCache(object):
def _create(self):
self._databroker = databroker.DataBroker(self.directory, create=True)
@property
def metacache(self):
if self._metacache is None:
try:
self._metacache = self.databroker.pull_cache('metacache')
except IOError:
self._metacache = {}
return self._metacache
@property
def bibcache(self):
if self._bibcache is None:
try:
self._bibcache = self.databroker.pull_cache('bibcache')
except IOError:
self._bibcache = {}
return self._bibcache
def flush_cache(self, force=False):
"""Write cache to disk"""
if force or self._metacache_modified:
self.databroker.push_cache('metacache', self.metacache)
self._metacache_modified = False
if force or self._bibcache_modified:
self.databroker.push_cache('bibcache', self.bibcache)
self._bibcache_modified = False
def pull_metadata(self, citekey):
return self.databroker.pull_metadata(citekey)
mtime = self.databroker.filebroker.mtime_metafile(citekey)
if citekey in self.metacache:
cached_metadata = self.metacache[citekey]
boundary = mtime if self.nsec_support else mtime + 1
if cached_metadata.timestamp >= boundary:
return cached_metadata.data
# if we get here, we must update the cache.
t = time.time()
data = self.databroker.pull_metadata(citekey)
self.metacache[citekey] = CacheEntry(data, t)
self._metacache_modified = True
return self.metacache[citekey].data
def pull_bibentry(self, citekey):
return self.databroker.pull_bibentry(citekey)
mtime = self.databroker.filebroker.mtime_bibfile(citekey)
if citekey in self.bibcache:
cached_bibdata = self.bibcache[citekey]
boundary = mtime if self.nsec_support else mtime + 1
if cached_bibdata.timestamp >= boundary:
return cached_bibdata.data
# if we get here, we must update the cache.
t = time.time()
data = self.databroker.pull_bibentry(citekey)
self.bibcache[citekey] = CacheEntry(data, t)
self._bibcache_modified = True
return self.bibcache[citekey].data
def push_metadata(self, citekey, metadata):
self.databroker.push_metadata(citekey, metadata)
mtime = self.databroker.filebroker.mtime_metafile(citekey)
#print('push', mtime, metadata)
self.metacache[citekey] = CacheEntry(metadata, mtime)
self._metacache_modified = True
def push_bibentry(self, citekey, bibdata):
self.databroker.push_bibentry(citekey, bibdata)
mtime = self.databroker.filebroker.mtime_bibfile(citekey)
self.bibcache[citekey] = CacheEntry(bibdata, mtime)
self._bibcache_modified = True
def push(self, citekey, metadata, bibdata):
self.databroker.push(citekey, metadata, bibdata)
mtime = self.databroker.filebroker.mtime_metafile(citekey)
self.metacache[citekey] = CacheEntry(metadata, mtime)
self._metacache_modified = True
mtime = self.databroker.filebroker.mtime_bibfile(citekey)
self.bibcache[citekey] = CacheEntry(bibdata, mtime)
self._bibcache_modified = True
def remove(self, citekey):
self.databroker.remove(citekey)
if citekey in self.metacache:
self.metacache.pop(citekey)
self._metacache_modified = True
if citekey in self.bibcache:
self.bibcache.pop(citekey)
self._bibcache_modified = True
def exists(self, citekey, meta_check=False):
return self.databroker.exists(citekey, meta_check=meta_check)
@ -85,14 +174,3 @@ class DataCache(object):
def rename_note(self, old_citekey, new_citekey):
return self.databroker.rename_note(old_citekey, new_citekey)
# class ChangeTracker(object):
# def __init__(self, cache, directory):
# self.cache = cache
# self.directory = directory
# def changes(self):
# """ Returns the list of modified files since the last cache was saved to disk"""
# pass

@ -6,6 +6,8 @@ from .content import (check_file, check_directory, read_text_file, write_file,
system_path, check_content, content_type, get_content,
copy_content)
from . import content
def filter_filename(filename, ext):
""" Return the filename without the extension if the extension matches ext.
@ -24,23 +26,50 @@ class FileBroker(object):
"""
def __init__(self, directory, create=False):
self.directory = directory
self.metadir = os.path.join(self.directory, 'meta')
self.bibdir = os.path.join(self.directory, 'bib')
self.directory = os.path.expanduser(directory)
self.metadir = os.path.join(self.directory, 'meta')
self.bibdir = os.path.join(self.directory, 'bib')
self.cachedir = os.path.join(self.directory, '.cache')
if create:
self._create()
check_directory(self.directory)
check_directory(self.metadir)
check_directory(self.bibdir)
# cache directory is created (if absent) if other directories exists.
if not check_directory(self.cachedir, fail=False):
os.mkdir(system_path(self.cachedir))
def _create(self):
if not check_directory(self.directory, fail = False):
"""Create meta and bib directories if absent"""
if not check_directory(self.directory, fail=False):
os.mkdir(system_path(self.directory))
if not check_directory(self.metadir, fail = False):
if not check_directory(self.metadir, fail=False):
os.mkdir(system_path(self.metadir))
if not check_directory(self.bibdir, fail = False):
if not check_directory(self.bibdir, fail=False):
os.mkdir(system_path(self.bibdir))
def pull_cachefile(self, filename):
filepath = os.path.join(self.cachedir, filename)
return content.read_binary_file(filepath)
def push_cachefile(self, filename, data):
filepath = os.path.join(self.cachedir, filename)
write_file(filepath, data, mode='wb')
def mtime_metafile(self, citekey):
try:
filepath = os.path.join(self.metadir, citekey + '.yaml')
return os.path.getmtime(filepath)
except OSError:
raise IOError("'{}' not found.".format(filepath))
def mtime_bibfile(self, citekey):
try:
filepath = os.path.join(self.bibdir, citekey + '.bib')
return os.path.getmtime(filepath)
except OSError:
raise IOError("'{}' not found.".format(filepath))
def pull_metafile(self, citekey):
filepath = os.path.join(self.metadir, citekey + '.yaml')
return read_text_file(filepath)

@ -2,6 +2,7 @@ import io
import sys
if sys.version_info[0] == 2:
import cPickle as pickle
def input():
return raw_input().decode(sys.stdin.encoding or 'utf8', 'ignore')
@ -48,6 +49,7 @@ else:
stdio.seek(0)
return stdio.read()
import pickle
input = input

@ -41,6 +41,9 @@ class Repository(object):
self._citekeys = None
self.databroker = DataCache(self.conf['main']['pubsdir'], create=create)
def close(self):
self.databroker.close()
@property
def citekeys(self):
if self._citekeys is None:
@ -106,7 +109,7 @@ class Repository(object):
self.databroker.remove_note(citekey, silent=True)
except IOError:
pass # FIXME: if IOError is about being unable to
# remove the file, we need to issue an error.I
# remove the file, we need to issue an error.
self.citekeys.remove(citekey)
self.databroker.remove(citekey)

@ -43,14 +43,13 @@ class FakeIO(object):
# https://github.com/jmcgeheeiv/pyfakefs/blob/master/pyfakefs/fake_filesystem.py#L2143
return self.fake_open(file, mode=mode, buffering=buffering)
BytesIO = real_io.BytesIO
StringIO = real_io.StringIO
def create_fake_fs(module_list):
def create_fake_fs(module_list, nsec_stat=True):
fake_fs = fake_filesystem.FakeFilesystem()
fake_fs = fake_filesystem.FakeFilesystem(nsec_stat=nsec_stat)
fake_os = fake_filesystem.FakeOsModule(fake_fs)
fake_open = fake_filesystem.FakeFileOpen(fake_fs)
fake_shutil = fake_filesystem_shutil.FakeShutilModule(fake_fs)

@ -0,0 +1,4 @@
# those are the additional packages required to run the tests
six
-e git+http://github.com/humm/pyfakefs@feat/float_mtime#egg=pyfakefs
ddt

@ -7,6 +7,7 @@ import os
import sys
import six
import ddt
import dotdot
import fake_env
@ -22,8 +23,8 @@ from pubs.commands import init_cmd, import_cmd
# makes the tests very noisy
PRINT_OUTPUT=False
CAPTURE_OUTPUT=True
PRINT_OUTPUT = False
CAPTURE_OUTPUT = True
class FakeSystemExit(Exception):
@ -69,8 +70,8 @@ class CommandTestCase(unittest.TestCase):
maxDiff = 1000000
def setUp(self):
self.fs = fake_env.create_fake_fs([content, filebroker, conf, init_cmd, import_cmd, configobj, update])
def setUp(self, nsec_stat=True):
self.fs = fake_env.create_fake_fs([content, filebroker, conf, init_cmd, import_cmd, configobj, update], nsec_stat=nsec_stat)
self.default_pubs_dir = self.fs['os'].path.expanduser('~/.pubs')
self.default_conf_path = self.fs['os'].path.expanduser('~/.pubsrc')
@ -138,8 +139,8 @@ class DataCommandTestCase(CommandTestCase):
copying fake data.
"""
def setUp(self):
super(DataCommandTestCase, self).setUp()
def setUp(self, nsec_stat=True):
super(DataCommandTestCase, self).setUp(nsec_stat=nsec_stat)
fake_env.copy_dir(self.fs, os.path.join(os.path.dirname(__file__), 'data'), 'data')
fake_env.copy_dir(self.fs, os.path.join(os.path.dirname(__file__), 'bibexamples'), 'bibexamples')
@ -152,13 +153,13 @@ class TestInit(CommandTestCase):
pubsdir = os.path.expanduser('~/pubs_test2')
self.execute_cmds(['pubs init -p {}'.format(pubsdir)])
self.assertEqual(set(self.fs['os'].listdir(pubsdir)),
{'bib', 'doc', 'meta', 'notes'})
{'bib', 'doc', 'meta', 'notes', '.cache'})
def test_init2(self):
pubsdir = os.path.expanduser('~/.pubs')
self.execute_cmds(['pubs init'])
self.assertEqual(set(self.fs['os'].listdir(pubsdir)),
{'bib', 'doc', 'meta', 'notes'})
{'bib', 'doc', 'meta', 'notes', '.cache'})
def test_init_config(self):
self.execute_cmds(['pubs init'])
@ -598,5 +599,50 @@ class TestUsecase(DataCommandTestCase):
self.assertFalse(self.fs['os'].path.isfile(self.default_conf_path))
self.assertTrue(self.fs['os'].path.isfile(alt_conf))
@ddt.ddt
class TestCache(DataCommandTestCase):
"""\
Run tests on fake filesystems supporting both integers and nanosecond
stat times.
"""
def setUp(self):
pass
@ddt.data(True, False)
def test_remove(self, nsec_stat):
DataCommandTestCase.setUp(self, nsec_stat=nsec_stat)
cmds = ['pubs init',
'pubs add data/pagerank.bib',
('pubs remove Page99', ['y']),
'pubs list',
]
out = self.execute_cmds(cmds)
self.assertEqual(1, len(out[3].split('\n')))
@ddt.data(True, False)
def test_edit(self, nsec_stat):
DataCommandTestCase.setUp(self, nsec_stat=nsec_stat)
bib = str_fixtures.bibtex_external0
bib1 = re.sub('year = \{1999\}', 'year = {2007}', bib)
line = '[Page99] Page, Lawrence et al. "The PageRank Citation Ranking: Bringing Order to the Web." (1999) \n'
line1 = re.sub('1999', '2007', line)
cmds = ['pubs init',
'pubs add data/pagerank.bib',
'pubs list',
('pubs edit Page99', [bib1]),
'pubs list',
]
out = self.execute_cmds(cmds)
self.assertEqual(line, out[2])
self.assertEqual(line1, out[4])
if __name__ == '__main__':
unittest.main()

Loading…
Cancel
Save