databroker, datacache class + tests

main
Fabien Benureau 12 years ago
parent 15857b5ecc
commit a774a1604e

@ -0,0 +1,65 @@
from . import filebroker
from . import endecoder
class DataBroker(object):
""" DataBroker class
This is aimed at being a simple, high level interface to the content stored on disk.
Requests are optimistically made, and exceptions are raised if something goes wrong.
"""
def __init__(self, directory, create=False):
self.filebroker = filebroker.FileBroker(directory, create=create)
self.endecoder = endecoder.EnDecoder()
self.docbroker = filebroker.DocBroker(directory)
# filebroker+endecoder
def pull_metadata(self, citekey):
metadata_raw = self.filebroker.pull_metafile(citekey)
return self.endecoder.decode_metadata(metadata_raw)
def pull_bibdata(self, citekey):
bibdata_raw = self.filebroker.pull_bibfile(citekey)
return self.endecoder.decode_bibdata(bibdata_raw)
def push_metadata(self, citekey, metadata):
metadata_raw = self.endecoder.encode_metadata(metadata)
self.filebroker.push_metafile(citekey, metadata_raw)
def push_bibdata(self, citekey, bibdata):
bibdata_raw = self.endecoder.encode_bibdata(bibdata)
self.filebroker.push_bibfile(citekey, bibdata_raw)
def push(self, citekey, metadata, bibdata):
self.filebroker.push(citekey, metadata, bibdata)
def remove(self, citekey):
self.filebroker.remove(citekey)
def exists(self, citekey, both = True):
return self.filebroker.exists(citekey, both=both)
def listing(self, filestats=True):
return self.filebroker.listing(filestats=filestats)
def verify(self, bibdata_raw):
try:
return self.endecoder.decode_bibdata(bibdata_raw)
except ValueError:
return None
# docbroker
def is_pubsdir_doc(self, docpath):
return self.docbroker.is_pusdir_doc(docpath)
def copy_doc(self, citekey, source_path, overwrite=False):
return self.docbroker.copy_doc(citekey, source_path, overwrite=overwrite)
def remove_doc(self, docpath):
return self.docbroker.remove_doc(docpath)
def real_docpath(self, docpath):
return self.docbroker.real_docpath(docpath)

@ -0,0 +1,85 @@
from . import databroker
class DataCache(object):
""" DataCache class, provides a very similar interface as DataBroker
Has two roles :
1. Provides a buffer between the commands and the hard drive.
Until a command request a hard drive ressource, it does not touch it.
2. Keeps a up-to-date, pickled version of the repository, to speed up things
when they are a lot of files. Update are also done only when required.
Changes are detected using data modification timestamps.
For the moment, only (1) is implemented.
"""
def __init__(self, directory, create=False):
self.directory = directory
self._databroker = None
if create:
self._create()
@property
def databroker(self):
if self._databroker is None:
self._databroker = databroker.DataBroker(self.directory, create=False)
return self._databroker
def _create(self):
self._databroker = databroker.DataBroker(self.directory, create=True)
def pull_metadata(self, citekey):
return self.databroker.pull_metadata(citekey)
def pull_bibdata(self, citekey):
return self.databroker.pull_bibdata(citekey)
def push_metadata(self, citekey, metadata):
self.databroker.push_metadata(citekey, metadata)
def push_bibdata(self, citekey, bibdata):
self.databroker.push_bibdata(citekey, bibdata)
def push(self, citekey, metadata, bibdata):
self.databroker.push(citekey, metadata, bibdata)
def remove(self, citekey):
self.databroker.remove(citekey)
def exists(self, citekey, both=True):
self.databroker.exists(citekey, both=both)
def citekeys(self):
listings = self.listing(filestats=False)
return set(listings['metafiles']).intersection(listings['bibfiles'])
def listing(self, filestats=True):
return self.databroker.listing(filestats=filestats)
def verify(self, bibdata_raw):
"""Will return None if bibdata_raw can't be decoded"""
return self.databroker.verify(bibdata_raw)
# docbroker
def is_pubsdir_doc(self, docpath):
return self.databroker.is_pusdir_doc(docpath)
def copy_doc(self, citekey, source_path, overwrite=False):
return self.databroker.copy_doc(citekey, source_path, overwrite=overwrite)
def remove_doc(self, docpath):
return self.databroker.remove_doc(docpath)
def real_docpath(self, docpath):
return self.databroker.real_docpath(docpath)
# class ChangeTracker(object):
# def __init__(self, cache, directory):
# self.cache = cache
# self.directory = directory
# def changes(self):
# """ Returns the list of modified files since the last cache was saved to disk"""
# pass

@ -0,0 +1,69 @@
# -*- coding: utf-8 -*-
import unittest
import os
import testenv
import fake_env
from papers import content, filebroker, databroker, datacache
import str_fixtures
from papers import endecoder
class TestFakeFs(unittest.TestCase):
"""Abstract TestCase intializing the fake filesystem."""
def setUp(self):
self.fs = fake_env.create_fake_fs([content, filebroker])
def tearDown(self):
fake_env.unset_fake_fs([content, filebroker])
class TestDataBroker(TestFakeFs):
def test_databroker(self):
ende = endecoder.EnDecoder()
page99_metadata = ende.decode_metadata(str_fixtures.metadata_raw0)
page99_bibdata = ende.decode_bibdata(str_fixtures.bibyaml_raw0)
dtb = databroker.DataBroker('tmp', create=True)
dtc = datacache.DataCache('tmp')
for db in [dtb, dtc]:
db.push_metadata('citekey1', page99_metadata)
db.push_bibdata('citekey1', page99_bibdata)
self.assertEqual(db.pull_metadata('citekey1'), page99_metadata)
self.assertEqual(db.pull_bibdata('citekey1'), page99_bibdata)
def test_existing_data(self):
ende = endecoder.EnDecoder()
page99_bibdata = ende.decode_bibdata(str_fixtures.bibyaml_raw0)
for db_class in [databroker.DataBroker, datacache.DataCache]:
self.fs = fake_env.create_fake_fs([content, filebroker])
fake_env.copy_dir(self.fs, os.path.join(os.path.dirname(__file__), 'testrepo'), 'repo')
db = db_class('repo', create=False)
self.assertEqual(db.pull_bibdata('Page99'), page99_bibdata)
for citekey in ['10.1371_journal.pone.0038236',
'10.1371journal.pone.0063400',
'journal0063400']:
db.pull_bibdata(citekey)
db.pull_metadata(citekey)
with self.assertRaises(IOError):
db.pull_bibdata('citekey')
with self.assertRaises(IOError):
db.pull_metadata('citekey')
db.copy_doc('Larry99', 'pubsdir://doc/Page99.pdf')
self.assertTrue(content.check_file('repo/doc/Page99.pdf', fail=False))
self.assertTrue(content.check_file('repo/doc/Larry99.pdf', fail=False))
db.remove_doc('pubsdir://doc/Page99.pdf')

@ -0,0 +1,45 @@
entries:
10.1371_journal.pone.0038236:
abstract: <p>The advent of humanoid robots has enabled a new approach to investigating
the acquisition of language, and we report on the development of robots
able to acquire rudimentary linguistic skills. Our work focuses on early
stages analogous to some characteristics of a human child of about 6 to
14 months, the transition from babbling to first word forms. We investigate
one mechanism among many that may contribute to this process, a key factor
being the sensitivity of learners to the statistical distribution of linguistic
elements. As well as being necessary for learning word meanings, the acquisition
of anchor word forms facilitates the segmentation of an acoustic stream
through other mechanisms. In our experiments some salient one-syllable
word forms are learnt by a humanoid robot in real-time interactions with
naive participants. Words emerge from random syllabic babble through a
learning process based on a dialogue between the robot and the human participant,
whose speech is perceived by the robot as a stream of phonemes. Numerous
ways of representing the speech as syllabic segments are possible. Furthermore,
the pronunciation of many words in spontaneous speech is variable. However,
in line with research elsewhere, we observe that salient content words
are more likely than function words to have consistent canonical representations;
thus their relative frequency increases, as does their influence on the
learner. Variable pronunciation may contribute to early word form acquisition.
The importance of contingent interaction in real-time between teacher
and learner is reflected by a reinforcement process, with variable success.
The examination of individual cases may be more informative than group
results. Nevertheless, word forms are usually produced by the robot after
a few minutes of dialogue, employing a simple, real-time, frequency dependent
mechanism. This work shows the potential of human-robot interaction systems
in studies of the dynamics of early language acquisition.</p>
author:
- first: Caroline
last: Saunders
middle: Lyon AND Chrystopher L. Nehaniv AND Joe
doi: 10.1371/journal.pone.0038236
journal: PLoS ONE
month: '06'
number: '6'
pages: e38236
publisher: Public Library of Science
title: 'Interactive Language Learning by Robots: The Transition from Babbling
to Word Forms'
type: article
url: http://dx.doi.org/10.1371%2Fjournal.pone.0038236
volume: '7'
year: '2012'

@ -0,0 +1,36 @@
entries:
10.1371journal.pone.0063400:
abstract: <p>Information theory is a powerful tool to express principles to
drive autonomous systems because it is domain invariant and allows for
an intuitive interpretation. This paper studies the use of the predictive
information (PI), also called excess entropy or effective measure complexity,
of the sensorimotor process as a driving force to generate behavior. We
study nonlinear and nonstationary systems and introduce the time-local
predicting information (TiPI) which allows us to derive exact results
together with explicit update rules for the parameters of the controller
in the dynamical systems framework. In this way the information principle,
formulated at the level of behavior, is translated to the dynamics of
the synapses. We underpin our results with a number of case studies with
high-dimensional robotic systems. We show the spontaneous cooperativity
in a complex physical system with decentralized control. Moreover, a jointly
controlled humanoid robot develops a high behavioral variety depending
on its physics and the environment it is dynamically embedded into. The
behavior can be decomposed into a succession of low-dimensional modes
that increasingly explore the behavior space. This is a promising way
to avoid the curse of dimensionality which hinders learning systems to
scale well.</p>
author:
- first: Georg
last: Ay
middle: Martius AND Ralf Der AND Nihat
doi: 10.1371/journal.pone.0063400
journal: PLoS ONE
month: '05'
number: '5'
pages: e63400
publisher: Public Library of Science
title: Information Driven Self-Organization of Complex Robotic Behaviors
type: article
url: http://dx.doi.org/10.1371%2Fjournal.pone.0063400
volume: '8'
year: '2013'

@ -0,0 +1,28 @@
entries:
Page99:
abstract: The importance of a Web page is an inherently subjective matter,
which depends on the readers interests, knowledge and attitudes. But there
is still much that can be said objectively about the relative importance
of Web pages. This paper describes PageRank, a mathod for rating Web pages
objectively and mechanically, effectively measuring the human interest
and attention devoted to them. We compare PageRank to an idealized random
Web surfer. We show how to efficiently compute PageRank for large numbers
of pages. And, we show how to apply PageRank to search and to user navigation.
author:
- first: Lawrence
last: Page
- first: Sergey
last: Brin
- first: Rajeev
last: Motwani
- first: Terry
last: Winograd
institution: Stanford InfoLab
month: November
note: Previous number = SIDL-WP-1999-0120
number: 1999-66
publisher: Stanford InfoLab
title: 'The PageRank Citation Ranking: Bringing Order to the Web.'
type: techreport
url: http://ilpubs.stanford.edu:8090/422/
year: '1999'

@ -0,0 +1,15 @@
entries:
journal0063400:
author:
- first: Lawrence
last: Page
- first: Sergey
last: Brin
- first: Rajeev
last: Motwani
- first: Terry
last: Winograd
journal: PLoS ONE
publisher: Public Library of Science
title: Information Driven Self-Organization of Complex Robotic Behaviors
type: article

Binary file not shown.

@ -0,0 +1,3 @@
docfile: null
notes: []
tags: []

@ -0,0 +1,3 @@
docfile: null
notes: []
tags: []

@ -0,0 +1,3 @@
docfile: pubsdir://doc/Page99.pdf
notes: []
tags: [search, network]

@ -0,0 +1,3 @@
docfile: null
notes: []
tags: []
Loading…
Cancel
Save