diff --git a/papers/databroker.py b/papers/databroker.py new file mode 100644 index 0000000..fed7307 --- /dev/null +++ b/papers/databroker.py @@ -0,0 +1,65 @@ +from . import filebroker +from . import endecoder + + +class DataBroker(object): + """ DataBroker class + + This is aimed at being a simple, high level interface to the content stored on disk. + Requests are optimistically made, and exceptions are raised if something goes wrong. + """ + + def __init__(self, directory, create=False): + self.filebroker = filebroker.FileBroker(directory, create=create) + self.endecoder = endecoder.EnDecoder() + self.docbroker = filebroker.DocBroker(directory) + + # filebroker+endecoder + + def pull_metadata(self, citekey): + metadata_raw = self.filebroker.pull_metafile(citekey) + return self.endecoder.decode_metadata(metadata_raw) + + def pull_bibdata(self, citekey): + bibdata_raw = self.filebroker.pull_bibfile(citekey) + return self.endecoder.decode_bibdata(bibdata_raw) + + def push_metadata(self, citekey, metadata): + metadata_raw = self.endecoder.encode_metadata(metadata) + self.filebroker.push_metafile(citekey, metadata_raw) + + def push_bibdata(self, citekey, bibdata): + bibdata_raw = self.endecoder.encode_bibdata(bibdata) + self.filebroker.push_bibfile(citekey, bibdata_raw) + + def push(self, citekey, metadata, bibdata): + self.filebroker.push(citekey, metadata, bibdata) + + def remove(self, citekey): + self.filebroker.remove(citekey) + + def exists(self, citekey, both = True): + return self.filebroker.exists(citekey, both=both) + + def listing(self, filestats=True): + return self.filebroker.listing(filestats=filestats) + + def verify(self, bibdata_raw): + try: + return self.endecoder.decode_bibdata(bibdata_raw) + except ValueError: + return None + + # docbroker + + def is_pubsdir_doc(self, docpath): + return self.docbroker.is_pusdir_doc(docpath) + + def copy_doc(self, citekey, source_path, overwrite=False): + return self.docbroker.copy_doc(citekey, source_path, overwrite=overwrite) + + def remove_doc(self, docpath): + return self.docbroker.remove_doc(docpath) + + def real_docpath(self, docpath): + return self.docbroker.real_docpath(docpath) \ No newline at end of file diff --git a/papers/datacache.py b/papers/datacache.py new file mode 100644 index 0000000..0d75fb3 --- /dev/null +++ b/papers/datacache.py @@ -0,0 +1,85 @@ + +from . import databroker + +class DataCache(object): + """ DataCache class, provides a very similar interface as DataBroker + + Has two roles : + 1. Provides a buffer between the commands and the hard drive. + Until a command request a hard drive ressource, it does not touch it. + 2. Keeps a up-to-date, pickled version of the repository, to speed up things + when they are a lot of files. Update are also done only when required. + Changes are detected using data modification timestamps. + + For the moment, only (1) is implemented. + """ + def __init__(self, directory, create=False): + self.directory = directory + self._databroker = None + if create: + self._create() + + @property + def databroker(self): + if self._databroker is None: + self._databroker = databroker.DataBroker(self.directory, create=False) + return self._databroker + + def _create(self): + self._databroker = databroker.DataBroker(self.directory, create=True) + + def pull_metadata(self, citekey): + return self.databroker.pull_metadata(citekey) + + def pull_bibdata(self, citekey): + return self.databroker.pull_bibdata(citekey) + + def push_metadata(self, citekey, metadata): + self.databroker.push_metadata(citekey, metadata) + + def push_bibdata(self, citekey, bibdata): + self.databroker.push_bibdata(citekey, bibdata) + + def push(self, citekey, metadata, bibdata): + self.databroker.push(citekey, metadata, bibdata) + + def remove(self, citekey): + self.databroker.remove(citekey) + + def exists(self, citekey, both=True): + self.databroker.exists(citekey, both=both) + + def citekeys(self): + listings = self.listing(filestats=False) + return set(listings['metafiles']).intersection(listings['bibfiles']) + + def listing(self, filestats=True): + return self.databroker.listing(filestats=filestats) + + def verify(self, bibdata_raw): + """Will return None if bibdata_raw can't be decoded""" + return self.databroker.verify(bibdata_raw) + + # docbroker + + def is_pubsdir_doc(self, docpath): + return self.databroker.is_pusdir_doc(docpath) + + def copy_doc(self, citekey, source_path, overwrite=False): + return self.databroker.copy_doc(citekey, source_path, overwrite=overwrite) + + def remove_doc(self, docpath): + return self.databroker.remove_doc(docpath) + + def real_docpath(self, docpath): + return self.databroker.real_docpath(docpath) + +# class ChangeTracker(object): + +# def __init__(self, cache, directory): +# self.cache = cache +# self.directory = directory + +# def changes(self): +# """ Returns the list of modified files since the last cache was saved to disk""" +# pass diff --git a/tests/test_databroker.py b/tests/test_databroker.py new file mode 100644 index 0000000..8cb6344 --- /dev/null +++ b/tests/test_databroker.py @@ -0,0 +1,69 @@ +# -*- coding: utf-8 -*- +import unittest +import os + +import testenv +import fake_env + +from papers import content, filebroker, databroker, datacache + +import str_fixtures +from papers import endecoder + +class TestFakeFs(unittest.TestCase): + """Abstract TestCase intializing the fake filesystem.""" + + def setUp(self): + self.fs = fake_env.create_fake_fs([content, filebroker]) + + def tearDown(self): + fake_env.unset_fake_fs([content, filebroker]) + + +class TestDataBroker(TestFakeFs): + + def test_databroker(self): + + ende = endecoder.EnDecoder() + page99_metadata = ende.decode_metadata(str_fixtures.metadata_raw0) + page99_bibdata = ende.decode_bibdata(str_fixtures.bibyaml_raw0) + + dtb = databroker.DataBroker('tmp', create=True) + dtc = datacache.DataCache('tmp') + + for db in [dtb, dtc]: + db.push_metadata('citekey1', page99_metadata) + db.push_bibdata('citekey1', page99_bibdata) + + self.assertEqual(db.pull_metadata('citekey1'), page99_metadata) + self.assertEqual(db.pull_bibdata('citekey1'), page99_bibdata) + + def test_existing_data(self): + + ende = endecoder.EnDecoder() + page99_bibdata = ende.decode_bibdata(str_fixtures.bibyaml_raw0) + + for db_class in [databroker.DataBroker, datacache.DataCache]: + self.fs = fake_env.create_fake_fs([content, filebroker]) + fake_env.copy_dir(self.fs, os.path.join(os.path.dirname(__file__), 'testrepo'), 'repo') + + db = db_class('repo', create=False) + + self.assertEqual(db.pull_bibdata('Page99'), page99_bibdata) + + for citekey in ['10.1371_journal.pone.0038236', + '10.1371journal.pone.0063400', + 'journal0063400']: + db.pull_bibdata(citekey) + db.pull_metadata(citekey) + + with self.assertRaises(IOError): + db.pull_bibdata('citekey') + with self.assertRaises(IOError): + db.pull_metadata('citekey') + + db.copy_doc('Larry99', 'pubsdir://doc/Page99.pdf') + self.assertTrue(content.check_file('repo/doc/Page99.pdf', fail=False)) + self.assertTrue(content.check_file('repo/doc/Larry99.pdf', fail=False)) + + db.remove_doc('pubsdir://doc/Page99.pdf') diff --git a/tests/testrepo/bib/10.1371_journal.pone.0038236.bibyaml b/tests/testrepo/bib/10.1371_journal.pone.0038236.bibyaml new file mode 100644 index 0000000..26da434 --- /dev/null +++ b/tests/testrepo/bib/10.1371_journal.pone.0038236.bibyaml @@ -0,0 +1,45 @@ +entries: + 10.1371_journal.pone.0038236: + abstract:

The advent of humanoid robots has enabled a new approach to investigating + the acquisition of language, and we report on the development of robots + able to acquire rudimentary linguistic skills. Our work focuses on early + stages analogous to some characteristics of a human child of about 6 to + 14 months, the transition from babbling to first word forms. We investigate + one mechanism among many that may contribute to this process, a key factor + being the sensitivity of learners to the statistical distribution of linguistic + elements. As well as being necessary for learning word meanings, the acquisition + of anchor word forms facilitates the segmentation of an acoustic stream + through other mechanisms. In our experiments some salient one-syllable + word forms are learnt by a humanoid robot in real-time interactions with + naive participants. Words emerge from random syllabic babble through a + learning process based on a dialogue between the robot and the human participant, + whose speech is perceived by the robot as a stream of phonemes. Numerous + ways of representing the speech as syllabic segments are possible. Furthermore, + the pronunciation of many words in spontaneous speech is variable. However, + in line with research elsewhere, we observe that salient content words + are more likely than function words to have consistent canonical representations; + thus their relative frequency increases, as does their influence on the + learner. Variable pronunciation may contribute to early word form acquisition. + The importance of contingent interaction in real-time between teacher + and learner is reflected by a reinforcement process, with variable success. + The examination of individual cases may be more informative than group + results. Nevertheless, word forms are usually produced by the robot after + a few minutes of dialogue, employing a simple, real-time, frequency dependent + mechanism. This work shows the potential of human-robot interaction systems + in studies of the dynamics of early language acquisition.

+ author: + - first: Caroline + last: Saunders + middle: Lyon AND Chrystopher L. Nehaniv AND Joe + doi: 10.1371/journal.pone.0038236 + journal: PLoS ONE + month: '06' + number: '6' + pages: e38236 + publisher: Public Library of Science + title: 'Interactive Language Learning by Robots: The Transition from Babbling + to Word Forms' + type: article + url: http://dx.doi.org/10.1371%2Fjournal.pone.0038236 + volume: '7' + year: '2012' diff --git a/tests/testrepo/bib/10.1371journal.pone.0063400.bibyaml b/tests/testrepo/bib/10.1371journal.pone.0063400.bibyaml new file mode 100644 index 0000000..bdfda50 --- /dev/null +++ b/tests/testrepo/bib/10.1371journal.pone.0063400.bibyaml @@ -0,0 +1,36 @@ +entries: + 10.1371journal.pone.0063400: + abstract:

Information theory is a powerful tool to express principles to + drive autonomous systems because it is domain invariant and allows for + an intuitive interpretation. This paper studies the use of the predictive + information (PI), also called excess entropy or effective measure complexity, + of the sensorimotor process as a driving force to generate behavior. We + study nonlinear and nonstationary systems and introduce the time-local + predicting information (TiPI) which allows us to derive exact results + together with explicit update rules for the parameters of the controller + in the dynamical systems framework. In this way the information principle, + formulated at the level of behavior, is translated to the dynamics of + the synapses. We underpin our results with a number of case studies with + high-dimensional robotic systems. We show the spontaneous cooperativity + in a complex physical system with decentralized control. Moreover, a jointly + controlled humanoid robot develops a high behavioral variety depending + on its physics and the environment it is dynamically embedded into. The + behavior can be decomposed into a succession of low-dimensional modes + that increasingly explore the behavior space. This is a promising way + to avoid the curse of dimensionality which hinders learning systems to + scale well.

+ author: + - first: Georg + last: Ay + middle: Martius AND Ralf Der AND Nihat + doi: 10.1371/journal.pone.0063400 + journal: PLoS ONE + month: '05' + number: '5' + pages: e63400 + publisher: Public Library of Science + title: Information Driven Self-Organization of Complex Robotic Behaviors + type: article + url: http://dx.doi.org/10.1371%2Fjournal.pone.0063400 + volume: '8' + year: '2013' diff --git a/tests/testrepo/bib/Page99.bibyaml b/tests/testrepo/bib/Page99.bibyaml new file mode 100644 index 0000000..3e77c1c --- /dev/null +++ b/tests/testrepo/bib/Page99.bibyaml @@ -0,0 +1,28 @@ +entries: + Page99: + abstract: The importance of a Web page is an inherently subjective matter, + which depends on the readers interests, knowledge and attitudes. But there + is still much that can be said objectively about the relative importance + of Web pages. This paper describes PageRank, a mathod for rating Web pages + objectively and mechanically, effectively measuring the human interest + and attention devoted to them. We compare PageRank to an idealized random + Web surfer. We show how to efficiently compute PageRank for large numbers + of pages. And, we show how to apply PageRank to search and to user navigation. + author: + - first: Lawrence + last: Page + - first: Sergey + last: Brin + - first: Rajeev + last: Motwani + - first: Terry + last: Winograd + institution: Stanford InfoLab + month: November + note: Previous number = SIDL-WP-1999-0120 + number: 1999-66 + publisher: Stanford InfoLab + title: 'The PageRank Citation Ranking: Bringing Order to the Web.' + type: techreport + url: http://ilpubs.stanford.edu:8090/422/ + year: '1999' diff --git a/tests/testrepo/bib/journal0063400.bibyaml b/tests/testrepo/bib/journal0063400.bibyaml new file mode 100644 index 0000000..041a029 --- /dev/null +++ b/tests/testrepo/bib/journal0063400.bibyaml @@ -0,0 +1,15 @@ +entries: + journal0063400: + author: + - first: Lawrence + last: Page + - first: Sergey + last: Brin + - first: Rajeev + last: Motwani + - first: Terry + last: Winograd + journal: PLoS ONE + publisher: Public Library of Science + title: Information Driven Self-Organization of Complex Robotic Behaviors + type: article diff --git a/tests/testrepo/doc/Page99.pdf b/tests/testrepo/doc/Page99.pdf new file mode 100644 index 0000000..0523ae0 Binary files /dev/null and b/tests/testrepo/doc/Page99.pdf differ diff --git a/tests/testrepo/meta/10.1371_journal.pone.0038236.yaml b/tests/testrepo/meta/10.1371_journal.pone.0038236.yaml new file mode 100644 index 0000000..21ea181 --- /dev/null +++ b/tests/testrepo/meta/10.1371_journal.pone.0038236.yaml @@ -0,0 +1,3 @@ +docfile: null +notes: [] +tags: [] diff --git a/tests/testrepo/meta/10.1371journal.pone.0063400.yaml b/tests/testrepo/meta/10.1371journal.pone.0063400.yaml new file mode 100644 index 0000000..21ea181 --- /dev/null +++ b/tests/testrepo/meta/10.1371journal.pone.0063400.yaml @@ -0,0 +1,3 @@ +docfile: null +notes: [] +tags: [] diff --git a/tests/testrepo/meta/Page99.yaml b/tests/testrepo/meta/Page99.yaml new file mode 100644 index 0000000..44a426c --- /dev/null +++ b/tests/testrepo/meta/Page99.yaml @@ -0,0 +1,3 @@ +docfile: pubsdir://doc/Page99.pdf +notes: [] +tags: [search, network] diff --git a/tests/testrepo/meta/journal0063400.yaml b/tests/testrepo/meta/journal0063400.yaml new file mode 100644 index 0000000..21ea181 --- /dev/null +++ b/tests/testrepo/meta/journal0063400.yaml @@ -0,0 +1,3 @@ +docfile: null +notes: [] +tags: []