Merge #138: Rebuilds cache on version change and de-duplicate version definition.

See #86 and #129.

The version number is now only stored in pubs/version.py. setup.py loads the version from the file as well as does pubs/__init__.py.

The cache structure changes to now include the version of the code which wrote the cache, which enables rebuilding the cache on version change. Loading an old cache (i.e. without the version) fails and triggers a rebuild (so does loading the new cache from the old code) so moving across this change is transparent here.

The code also switches to version 0.8.dev1 to trigger a cache rebuild (and also because the milestone is almost there). In general we will need to bump the version number to a development one each time a change makes loading old cache incompatible. There is no test for that at the moment so this means testing new code on a local bibliography repository before pushing to master.

#129 discussed using git versions, which is not so easy to integrate nicely with setuptools. This is why I chose to stick with manually changing version numbers when necessary (which should not be so often).
main
Olivier Mangin 7 years ago committed by GitHub
commit 47b69820b8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -1 +1 @@
__version__ = '0.7.0'
from .version import __version__

@ -1,6 +1,7 @@
from . import filebroker
from . import endecoder
from .p3 import pickle
from . import __version__
class DataBroker(object):
@ -22,12 +23,16 @@ class DataBroker(object):
pass
def pull_cache(self, name):
"""Load cache data from distk. Exceptions are handled by the caller."""
"""Load cache data from disk. Exceptions are handled by the caller."""
data_raw = self.filebroker.pull_cachefile(name)
return pickle.loads(data_raw)
cache = pickle.loads(data_raw)
if cache['version'] != __version__:
raise ValueError('Cache not matching code version.')
return cache['data']
def push_cache(self, name, data):
data_raw = pickle.dumps(data)
cache_content = {'version': __version__, 'data': data}
data_raw = pickle.dumps(cache_content)
self.filebroker.push_cachefile(name, data_raw)
# filebroker+endecoder

@ -91,8 +91,6 @@ class DataCache(object):
2. Keeps an up-to-date, pickled version of the repository, to speed up things
when they are a lot of files. Update are also done only when required.
Changes are detected using data modification timestamps.
For the moment, only (1) is implemented.
"""
def __init__(self, pubsdir, docsdir, create=False):
self.pubsdir = pubsdir

@ -0,0 +1 @@
__version__ = '0.8.dev1'

@ -2,11 +2,12 @@
from setuptools import setup
VERSION = '0.7.0'
with open('pubs/version.py') as f:
exec(f.read()) # defines __version__
setup(
name='pubs',
version=VERSION,
version=__version__,
author='Fabien Benureau, Olivier Mangin, Jonathan Grizou',
author_email='fabien.benureau@gmail.com',
maintainer='Olivier Mangin',

@ -16,7 +16,6 @@ class TestDataBroker(fake_env.TestFakeFs):
def test_databroker(self):
ende = endecoder.EnDecoder()
page99_metadata = ende.decode_metadata(str_fixtures.metadata_raw0)
page99_bibentry = ende.decode_bibdata(str_fixtures.bibtex_raw0)
@ -71,6 +70,25 @@ class TestDataBroker(fake_env.TestFakeFs):
db.remove_doc('docsdir://Page99.pdf')
def test_push_pull_cache(self):
db = databroker.DataBroker('tmp', 'tmp/doc', create=True)
data_in = {'a': 1}
db.push_cache('meta', data_in)
data_out = db.pull_cache('meta')
self.assertEqual(data_in, data_out)
def test_pull_cache_fails_on_version_mismatch(self):
db = databroker.DataBroker('tmp', 'tmp/doc', create=True)
data_in = {'a': 1}
db.push_cache('meta', data_in)
ver = databroker.__version__
databroker.__version__ = '0.0.0'
try:
with self.assertRaises(ValueError):
db.pull_cache('meta')
finally:
databroker.__version__ = ver
if __name__ == '__main__':
unittest.main()

@ -126,7 +126,6 @@ class TestCacheEntrySet(unittest.TestCase):
value = self.bibcache.pull('a')
self.assertEqual(value, 'b')
def test_is_outdated_when_unknown_citekey(self):
self.assertTrue(self.metacache._is_outdated('a'))

Loading…
Cancel
Save