moving from pybtex to bibtextparser, fixed major regressions.

* only bibtex format is supported
* all tests except test_repo.py and edit test pass
* edit and update commands were not updated
* removed --format argument from export, only bibtex is supported.
main
Fabien Benureau 11 years ago
parent c692f23054
commit d3736e257b

@ -1 +1,2 @@
--ignore-directory=is:build --ignore-directory=is:build
--ignore-directory=is:pubs.egg-info

@ -14,13 +14,12 @@ A paper correspond to 3 files :
About strings: About strings:
-------------- --------------
- pybtex seems to store entries as utf-8 (TODO: check)
- so assumption is made that everything is utf-8 - so assumption is made that everything is utf-8
- conversions are performed at print time - conversions are performed at print time
Config values: Config values:
-------------- --------------
[papers] [pubs]
open-cmd = open open-cmd = open
edit-cmd = edit edit-cmd = edit
import-copy = True import-copy = True

@ -21,20 +21,25 @@ def check_citekey(citekey):
raise ValueError("Invalid citekey: %s" % citekey) raise ValueError("Invalid citekey: %s" % citekey)
def verify_bibdata(bibdata): def verify_bibdata(bibdata):
if not hasattr(bibdata, 'entries') or len(bibdata.entries) == 0: if bibdata is None or len(bibdata) == 0:
raise ValueError('no entries in the bibdata.') raise ValueError('no valid bibdata')
if len(bibdata.entries) > 1: if len(bibdata) > 1:
raise ValueError('ambiguous: multiple entries in the bibdata.') raise ValueError('ambiguous: multiple entries in the bibdata.')
def get_entry(bibdata): def get_entry(bibdata):
verify_bibdata(bibdata) verify_bibdata(bibdata)
return bibdata.entries.iteritems().next() for e in bibdata.items():
return e
def extract_citekey(bibdata): def extract_citekey(bibdata):
verify_bibdata(bibdata) verify_bibdata(bibdata)
citekey, entry = get_entry(bibdata) citekey, entry = get_entry(bibdata)
return citekey return citekey
def author_last(author_str):
""" Return the last name of the author """
return author_str.split(',')[0]
def generate_citekey(bibdata): def generate_citekey(bibdata):
""" Generate a citekey from bib_data. """ Generate a citekey from bib_data.
@ -44,17 +49,17 @@ def generate_citekey(bibdata):
""" """
citekey, entry = get_entry(bibdata) citekey, entry = get_entry(bibdata)
author_key = 'author' if 'author' in entry.persons else 'editor' author_key = 'author' if 'author' in entry else 'editor'
try: try:
first_author = entry.persons[author_key][0] first_author = entry[author_key][0]
except KeyError: except KeyError:
raise ValueError( raise ValueError(
'No author or editor defined: cannot generate a citekey.') 'No author or editor defined: cannot generate a citekey.')
try: try:
year = entry.fields['year'] year = entry['year']
except KeyError: except KeyError:
year = '' year = ''
citekey = u'{}{}'.format(u''.join(first_author.last()), year) citekey = u'{}{}'.format(u''.join(author_last(first_author)), year)
return str2citekey(citekey) return str2citekey(citekey)
@ -67,21 +72,21 @@ def extract_docfile(bibdata, remove=False):
citekey, entry = get_entry(bibdata) citekey, entry = get_entry(bibdata)
try: try:
if 'file' in entry.fields: if 'file' in entry:
field = entry.fields['file'] field = entry['file']
# Check if this is mendeley specific # Check if this is mendeley specific
for f in field.split(':'): for f in field.split(':'):
if len(f) > 0: if len(f) > 0:
break break
if remove: if remove:
entry.fields.pop('file') entry.pop('file')
# This is a hck for Mendeley. Make clean # This is a hck for Mendeley. Make clean
if f[0] != '/': if f[0] != '/':
f = '/' + f f = '/' + f
return f return f
if 'attachments' in entry.fields: if 'attachments' in entry:
return entry.fields['attachments'] return entry['attachments']
if 'pdf' in entry.fields: if 'pdf' in entry:
return entry.fields['pdf'] return entry['pdf']
except (KeyError, IndexError): except (KeyError, IndexError):
return None return None

@ -108,7 +108,7 @@ def command(args):
if copy_doc is None: if copy_doc is None:
copy_doc = config().import_copy copy_doc = config().import_copy
if copy_doc: if copy_doc:
docfile = rp.databroker.copy_doc(citekey, docfile) docfile = rp.databroker.add_doc(citekey, docfile)
# create the paper # create the paper

@ -34,7 +34,7 @@ def command(args):
try: try:
document = args.document document = args.document
if copy: if copy:
document = rp.databroker.copy_doc(paper.citekey, document) document = rp.databroker.add_doc(paper.citekey, document)
else: else:
pass # TODO warn if file does not exists pass # TODO warn if file does not exists
paper.docpath = document paper.docpath = document

@ -15,6 +15,24 @@ def parser(subparsers):
return parser return parser
def edit_meta(citekey):
rp = repo.Repository(config())
coder = endecoder.EnDecoder()
filepath = os.path.join(rp.databroker.databroker.filebroker.metadir(), citekey+'.yaml')
with open(filepath) as f:
content = f.read()
def edit_bib(citekey):
rp = repo.Repository(config())
coder = endecoder.EnDecoder()
filepath = os.path.join(rp.databroker.databroker.filebroker.bibdir(), citekey+'.bib')
with open(filepath) as f:
content = f.read()
def command(args): def command(args):
ui = get_ui() ui = get_ui()
@ -26,8 +44,7 @@ def command(args):
if meta: if meta:
filepath = os.path.join(rp.databroker.databroker.filebroker.metadir(), citekey+'.yaml') filepath = os.path.join(rp.databroker.databroker.filebroker.metadir(), citekey+'.yaml')
else: else:
filepath = os.path.join(rp.databroker.databroker.filebroker.bibdir(), citekey+'.bibyaml') filepath = os.path.join(rp.databroker.databroker.filebroker.bibdir(), citekey+'.bib')
with open(filepath) as f: with open(filepath) as f:
content = f.read() content = f.read()

@ -1,8 +1,6 @@
from __future__ import print_function from __future__ import print_function
import sys import sys
from pybtex.database import BibliographyData
from .. import repo from .. import repo
from ..configs import config from ..configs import config
from ..uis import get_ui from ..uis import get_ui
@ -11,8 +9,8 @@ from .. import endecoder
def parser(subparsers): def parser(subparsers):
parser = subparsers.add_parser('export', parser = subparsers.add_parser('export',
help='export bibliography') help='export bibliography')
parser.add_argument('-f', '--bib-format', default='bibtex', # parser.add_argument('-f', '--bib-format', default='bibtex',
help='export format') # help='export format')
parser.add_argument('citekeys', nargs='*', parser.add_argument('citekeys', nargs='*',
help='one or several citekeys') help='one or several citekeys')
return parser return parser
@ -20,11 +18,10 @@ def parser(subparsers):
def command(args): def command(args):
""" """
:param bib_format (in 'bibtex', 'yaml')
""" """
# :param bib_format (only 'bibtex' now)
ui = get_ui() ui = get_ui()
bib_format = args.bib_format
rp = repo.Repository(config()) rp = repo.Repository(config())
@ -36,12 +33,12 @@ def command(args):
if len(papers) == 0: if len(papers) == 0:
papers = rp.all_papers() papers = rp.all_papers()
bib = BibliographyData() bib = {}
for p in papers: for p in papers:
bib.add_entry(p.citekey, p.bibentry) bib[p.citekey] = p.bibentry
try: try:
exporter = endecoder.EnDecoder() exporter = endecoder.EnDecoder()
bibdata_raw = exporter.encode_bibdata(bib, fmt=bib_format) bibdata_raw = exporter.encode_bibdata(bib)
print(bibdata_raw, end='') print(bibdata_raw, end='')
except KeyError: except KeyError:
ui.error("Invalid output format: %s." % bib_format) ui.error("Invalid output format: %s." % bib_format)

@ -1,8 +1,6 @@
import os import os
import datetime import datetime
from pybtex.database import Entry, BibliographyData, FieldDict, Person
from .. import repo from .. import repo
from .. import endecoder from .. import endecoder
from .. import bibstruct from .. import bibstruct
@ -41,8 +39,9 @@ def many_from_path(bibpath):
bibpath = os.path.expanduser(bibpath) bibpath = os.path.expanduser(bibpath)
if os.path.isdir(bibpath): if os.path.isdir(bibpath):
print([os.path.splitext(f)[-1][1:] for f in os.listdir(bibpath)])
all_files = [os.path.join(bibpath, f) for f in os.listdir(bibpath) all_files = [os.path.join(bibpath, f) for f in os.listdir(bibpath)
if os.path.splitext(f)[-1][1:] in list(coder.decode_fmt.keys())] if os.path.splitext(f)[-1][1:] == 'bib']
else: else:
all_files = [bibpath] all_files = [bibpath]
@ -53,10 +52,10 @@ def many_from_path(bibpath):
papers = {} papers = {}
for b in biblist: for b in biblist:
for k in b.entries: for k in b.keys():
try: try:
bibdata = BibliographyData() bibdata = {}
bibdata.entries[k] = b.entries[k] bibdata[k] = b[k]
papers[k] = Paper(bibdata, citekey=k) papers[k] = Paper(bibdata, citekey=k)
papers[k].added = datetime.datetime.now() papers[k].added = datetime.datetime.now()
@ -94,7 +93,7 @@ def command(args):
if copy_doc is None: if copy_doc is None:
copy_doc = config().import_copy copy_doc = config().import_copy
if copy_doc: if copy_doc:
docfile = rp.databroker.copy_doc(p.citekey, docfile) docfile = rp.databroker.add_doc(p.citekey, docfile)
p.docpath = docfile p.docpath = docfile
rp.push_paper(p) rp.push_paper(p)

@ -1,9 +1,9 @@
from .. import repo from .. import repo
from .. import pretty from .. import pretty
from .. import bibstruct
from ..configs import config from ..configs import config
from ..uis import get_ui from ..uis import get_ui
class InvalidQuery(ValueError): class InvalidQuery(ValueError):
pass pass
@ -56,20 +56,15 @@ def _get_field_value(query_block):
return (field, value) return (field, value)
def _lower(string, lower=True): def _lower(s, lower=True):
if lower: return s.lower() if lower else s
return string.lower()
else:
return string
def _check_author_match(paper, query, case_sensitive=False): def _check_author_match(paper, query, case_sensitive=False):
"""Only checks within last names.""" """Only checks within last names."""
if not 'author' in paper.bibentry.persons: if not 'author' in paper.bibentry:
return False return False
return any([query in _lower(name, lower=(not case_sensitive)) return any([query == _lower(bibstruct.author_last(p), lower=(not case_sensitive))
for p in paper.bibentry.persons['author'] for p in paper.bibentry['author']])
for name in p.last()])
def _check_tag_match(paper, query, case_sensitive=False): def _check_tag_match(paper, query, case_sensitive=False):
@ -78,7 +73,7 @@ def _check_tag_match(paper, query, case_sensitive=False):
def _check_field_match(paper, field, query, case_sensitive=False): def _check_field_match(paper, field, query, case_sensitive=False):
return query in _lower(paper.bibentry.fields[field], return query in _lower(paper.bibentry[field],
lower=(not case_sensitive)) lower=(not case_sensitive))
@ -92,7 +87,7 @@ def _check_query_block(paper, query_block, case_sensitive=None):
return _check_tag_match(paper, value, case_sensitive=case_sensitive) return _check_tag_match(paper, value, case_sensitive=case_sensitive)
elif field == 'author': elif field == 'author':
return _check_author_match(paper, value, case_sensitive=case_sensitive) return _check_author_match(paper, value, case_sensitive=case_sensitive)
elif field in paper.bibentry.fields: elif field in paper.bibentry:
return _check_field_match(paper, field, value, return _check_field_match(paper, field, value,
case_sensitive=case_sensitive) case_sensitive=case_sensitive)
else: else:

@ -68,7 +68,7 @@ class DataCache(object):
def real_docpath(self, docpath): def real_docpath(self, docpath):
return self.databroker.real_docpath(docpath) return self.databroker.real_docpath(docpath)
def copy_doc(self, citekey, source_path, overwrite=False): def add_doc(self, citekey, source_path, overwrite=False):
return self.databroker.add_doc(citekey, source_path, overwrite=overwrite) return self.databroker.add_doc(citekey, source_path, overwrite=overwrite)
def remove_doc(self, docpath, silent=True): def remove_doc(self, docpath, silent=True):

@ -1,5 +1,5 @@
import color from __future__ import print_function, absolute_import, division, unicode_literals
import yaml import copy
try: try:
import cStringIO as StringIO import cStringIO as StringIO
@ -7,19 +7,44 @@ except ImportError:
import StringIO import StringIO
try: try:
import pybtex.database.input.bibtex import bibtexparser as bp
import pybtex.database.input.bibtexml
import pybtex.database.input.bibyaml
import pybtex.database.output.bibtex
import pybtex.database.output.bibtexml
import pybtex.database.output.bibyaml
except ImportError: except ImportError:
print(color.dye('error', color.error) + print(color.dye('error', color.error) +
": you need to install Pybtex; try running 'pip install " ": you need to install bibterxparser; try running 'pip install "
"pybtex' or 'easy_install pybtex'") "bibtexparser'.")
exit(-1) exit(-1)
import yaml
from . import color
def sanitize_citekey(record):
record['id'] = record['id'].strip('\n')
return record
def customizations(record):
""" Use some functions delivered by the library
:param record: a record
:returns: -- customized record
"""
record = bp.customization.convert_to_unicode(record)
record = bp.customization.type(record)
record = bp.customization.author(record)
record = bp.customization.editor(record)
record = bp.customization.journal(record)
record = bp.customization.keyword(record)
record = bp.customization.link(record)
record = bp.customization.page_double_hyphen(record)
record = bp.customization.doi(record)
record = sanitize_citekey(record)
return record
bibfield_order = ['author', 'title', 'journal', 'institution', 'publisher', 'year', 'month', 'number', 'pages', 'link', 'doi', 'id', 'note', 'abstract']
class EnDecoder(object): class EnDecoder(object):
""" Encode and decode content. """ Encode and decode content.
@ -32,45 +57,55 @@ class EnDecoder(object):
* encode_bibdata will try to recognize exceptions * encode_bibdata will try to recognize exceptions
""" """
decode_fmt = {'bibtex' : pybtex.database.input.bibtex,
'bibyaml' : pybtex.database.input.bibyaml,
'bib' : pybtex.database.input.bibtex,
'bibtexml': pybtex.database.input.bibtexml}
encode_fmt = {'bibtex' : pybtex.database.output.bibtex,
'bibyaml' : pybtex.database.output.bibyaml,
'bib' : pybtex.database.output.bibtex,
'bibtexml': pybtex.database.output.bibtexml}
def encode_metadata(self, metadata): def encode_metadata(self, metadata):
return yaml.safe_dump(metadata, allow_unicode=True, encoding='UTF-8', indent = 4) return yaml.safe_dump(metadata, allow_unicode=True, encoding='UTF-8', indent = 4)
def decode_metadata(self, metadata_raw): def decode_metadata(self, metadata_raw):
return yaml.safe_load(metadata_raw) return yaml.safe_load(metadata_raw)
def encode_bibdata(self, bibdata, fmt='bib'): def encode_bibdata(self, bibdata):
"""Encode bibdata """ """Encode bibdata """
s = StringIO.StringIO() return '\n'.join(self._encode_bibentry(citekey, entry)
EnDecoder.encode_fmt[fmt].Writer().write_stream(bibdata, s) for citekey, entry in bibdata.items())
return s.getvalue()
@staticmethod
def _encode_field(key, value):
if key == 'link':
return ', '.join(link['url'] for link in value)
elif key == 'author':
return ' and '.join(author for author in value)
elif key == 'journal':
return value['name']
else:
return value
@staticmethod
def _encode_bibentry(citekey, bibentry):
bibraw = '@{}{{{},\n'.format(bibentry['type'], citekey)
bibentry = copy.copy(bibentry)
for key in bibfield_order:
if key in bibentry:
value = bibentry.pop(key)
bibraw += ' {} = {{{}}},\n'.format(key, EnDecoder._encode_field(key, value))
for key, value in bibentry.items():
if key != 'type':
bibraw += ' {} = {{{}}},\n'.format(key, EnDecoder._encode_field(key, value))
bibraw += '}\n'
return bibraw
def decode_bibdata(self, bibdata_raw): def decode_bibdata(self, bibdata_raw):
"""""" """"""
bibdata_rawutf8 = bibdata_raw bibdata_rawutf8 = bibdata_raw
# bibdata_rawutf8 = unicode(bibdata_raw, 'utf8') # FIXME this doesn't work #bibdata_rawutf8 = unicode(bibdata_raw, 'utf8') # FIXME this doesn't work
for fmt in EnDecoder.decode_fmt.values():
try:
bibdata_stream = StringIO.StringIO(bibdata_rawutf8) bibdata_stream = StringIO.StringIO(bibdata_rawutf8)
return self._decode_bibdata(bibdata_stream, fmt.Parser()) return self._decode_bibdata(bibdata_stream)
except ValueError:
pass
raise ValueError('could not parse bibdata')
def _decode_bibdata(self, bibdata_stream, parser): def _decode_bibdata(self, bibdata_stream):
try: try:
entry = parser.parse_stream(bibdata_stream) entries = bp.bparser.BibTexParser(bibdata_stream, customization=customizations).get_entry_dict()
if len(entry.entries) > 0: if len(entries) > 0:
return entry return entries
except Exception: except Exception:
pass import traceback
traceback.print_exc()
raise ValueError('could not parse bibdata') raise ValueError('could not parse bibdata')

@ -131,7 +131,7 @@ class DocBroker(object):
# return check_file(os.path.join(self.docdir, citekey + ext), fail=False) # return check_file(os.path.join(self.docdir, citekey + ext), fail=False)
def real_docpath(self, docpath): def real_docpath(self, docpath):
"""Return the full path """ Return the full path
Essentially transform pubsdir://doc/{citekey}.{ext} to /path/to/pubsdir/doc/{citekey}.{ext}. Essentially transform pubsdir://doc/{citekey}.{ext} to /path/to/pubsdir/doc/{citekey}.{ext}.
Return absoluted paths of regular ones otherwise. Return absoluted paths of regular ones otherwise.
""" """

@ -11,7 +11,7 @@ class Paper(object):
""" Paper class. """ Paper class.
The object is not responsible of any disk I/O. The object is not responsible of any disk I/O.
self.bibdata is a pybtex.database.BibliographyData object self.bibdata is a dictionary of bibligraphic fields
self.metadata is a dictionary self.metadata is a dictionary
The paper class provides methods to access the fields for its metadata The paper class provides methods to access the fields for its metadata
@ -43,10 +43,18 @@ class Paper(object):
return 'Paper(%s, %s, %s)' % ( return 'Paper(%s, %s, %s)' % (
self.citekey, self.bibentry, self.metadata) self.citekey, self.bibentry, self.metadata)
def deepcopy(self): def __deepcopy__(self, memo):
return Paper(citekey =self.citekey,
metadata=copy.deepcopy(self.metadata, memo),
bibdata=copy.deepcopy(self.bibdata, memo))
def __copy__(self):
return Paper(citekey =self.citekey, return Paper(citekey =self.citekey,
metadata=copy.deepcopy(self.metadata), metadata=self.metadata,
bibdata=copy.deepcopy(self.bibdata)) bibdata=self.bibdata)
def deepcopy(self):
return self.__deepcopy__({})
# docpath # docpath

@ -1,23 +1,19 @@
# display formatting # display formatting
from . import color from . import color
from pybtex.bibtex.utils import bibtex_purify
# A bug in pybtex makes the abbreviation wrong here # should be adaptated to bibtexparser dicts
# (Submitted with racker ID: ID: 3605659)
# The purification should also be applied to names but unfortunately
# it removes dots which is annoying on abbreviations.
def person_repr(p): def person_repr(p):
raise NotImplementedError
return ' '.join(s for s in [ return ' '.join(s for s in [
' '.join(p.first(abbr=True)), ' '.join(p.first(abbr=True)),
' '.join(p.last(abbr=False)), ' '.join(p.last(abbr=False)),
' '.join(p.lineage(abbr=True))] if s) ' '.join(p.lineage(abbr=True))] if s)
def short_authors(bibentry): def short_authors(bibentry):
try: try:
authors = [person_repr(p) for p in bibentry.persons['author']] authors = [p for p in bibentry['author']]
if len(authors) < 3: if len(authors) < 3:
return ', '.join(authors) return ', '.join(authors)
else: else:
@ -28,27 +24,26 @@ def short_authors(bibentry):
def bib_oneliner(bibentry): def bib_oneliner(bibentry):
authors = short_authors(bibentry) authors = short_authors(bibentry)
title = bibtex_purify(bibentry.fields['title']) journal, journal_field = '', 'journal'
year = bibtex_purify(bibentry.fields.get('year', '')) if 'journal' in bibentry:
journal = '' journal = bibentry['journal']['name']
field = 'journal' elif bibentry['type'] == 'inproceedings':
if bibentry.type == 'inproceedings': journal = bibentry.get('booktitle', '')
field = 'booktitle'
journal = bibtex_purify(bibentry.fields.get(field, ''))
return u'{authors} \"{title}\" {journal} ({year})'.format( return u'{authors} \"{title}\" {journal} ({year})'.format(
authors=color.dye(authors, color.cyan), authors=color.dye(authors, color.cyan),
title=title, title=bibentry['title'],
journal=color.dye(journal, color.yellow), journal=color.dye(journal, color.yellow),
year=year, year=bibentry['year'],
) )
def bib_desc(bib_data): def bib_desc(bib_data):
article = bib_data.entries[list(bib_data.entries.keys())[0]] article = bib_data[list(bib_data.keys())[0]]
s = '\n'.join('author: {}'.format(person_repr(p)) s = '\n'.join('author: {}'.format(p)
for p in article.persons['author']) for p in article['author'])
s += '\n' s += '\n'
s += '\n'.join('{}: {}'.format(k, v) for k, v in article.fields.items()) s += '\n'.join('{}: {}'.format(k, v) for k, v in article.items())
return s return s

@ -2,8 +2,6 @@ import shutil
import glob import glob
import itertools import itertools
from pybtex.database import BibliographyData
from . import bibstruct from . import bibstruct
from . import events from . import events
from . import datacache from . import datacache
@ -105,9 +103,8 @@ class Repository(object):
if self.databroker.exists(new_citekey, both=False): if self.databroker.exists(new_citekey, both=False):
raise IOError("can't rename paper to {}, conflicting files exists".format(new_citekey)) raise IOError("can't rename paper to {}, conflicting files exists".format(new_citekey))
# modify bibdata (__delitem__ not implementd by pybtex) new_bibdata = {}
new_bibdata = BibliographyData() new_bibdata[new_citekey] = paper.bibdata[old_citekey]
new_bibdata.entries[new_citekey] = paper.bibdata.entries[old_citekey]
paper.bibdata = new_bibdata paper.bibdata = new_bibdata
# move doc file if necessary # move doc file if necessary

@ -1,11 +1,10 @@
Papers # Pubs
======
Papers brings your bibliography to the command line. Pubs brings your bibliography to the command line.
Papers organizes your bibliographic documents together with the bibliographic data associated to them and provides command line access to basic and advanced manipulation of your library. Pubs organizes your bibliographic documents together with the bibliographic data associated to them and provides command line access to basic and advanced manipulation of your library.
Papers is built with the following principles in mind: Pubs is built with the following principles in mind:
- all papers are referenced using unique citation keys, - all papers are referenced using unique citation keys,
- bibliographic data (i.e. pure bibtex information) is kept separated from metadata (including links to pdf or tags), - bibliographic data (i.e. pure bibtex information) is kept separated from metadata (including links to pdf or tags),
@ -19,14 +18,14 @@ Getting started
--------------- ---------------
Create your library (by default, goes to '~/.papers/'). Create your library (by default, goes to '~/.papers/').
papers init pubs init
Import existing data from bibtex (papers will try to automatically copy documents defined as 'file' in bibtex): Import existing data from bibtex (papers will try to automatically copy documents defined as 'file' in bibtex):
papers import path/to/collection.bib pubss import path/to/collection.bib
or for bibtex containing a single file: or for bibtex containing a single file:
papers add --bibfile article.bib --docfile article.pdf pubs add --bibfile article.bib --docfile article.pdf
Authors Authors

@ -8,7 +8,7 @@ setup(name='pubs',
author_email='fabien.benureau+inria@gmail.com', author_email='fabien.benureau+inria@gmail.com',
url='', url='',
description='research papers manager', description='research papers manager',
requires=['pybtex'], requires=['bibtexparser'],
packages=find_packages(), packages=find_packages(),
package_data={'': ['*.tex', '*.sty']}, package_data={'': ['*.tex', '*.sty']},
scripts=['pubs/pubs'] scripts=['pubs/pubs']

@ -6,7 +6,7 @@ import unittest
import pkgutil import pkgutil
import re import re
import testenv import dotdot
import fake_filesystem import fake_filesystem
import fake_filesystem_shutil import fake_filesystem_shutil
import fake_filesystem_glob import fake_filesystem_glob

@ -1,8 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from pybtex.database import Person import dotdot
import testenv
from pubs import endecoder from pubs import endecoder
import str_fixtures import str_fixtures
@ -22,7 +20,17 @@ doe_bib = """
""" """
franny_bibdata = coder.decode_bibdata(franny_bib) franny_bibdata = coder.decode_bibdata(franny_bib)
franny_bibentry = franny_bibdata['Franny1961']
doe_bibdata = coder.decode_bibdata(doe_bib) doe_bibdata = coder.decode_bibdata(doe_bib)
doe_bibentry = doe_bibdata['Doe2013']
turing_bibdata = coder.decode_bibdata(str_fixtures.turing_bib) turing_bibdata = coder.decode_bibdata(str_fixtures.turing_bib)
turing_bibentry = turing_bibdata['turing1950computing']
turing_metadata = coder.decode_metadata(str_fixtures.turing_meta)
page_bibdata = coder.decode_bibdata(str_fixtures.bibtex_raw0) page_bibdata = coder.decode_bibdata(str_fixtures.bibtex_raw0)
page_bibentry = page_bibdata['Page99']
page_metadata = coder.decode_metadata(str_fixtures.metadata_raw0)
page_metadata = coder.decode_metadata(str_fixtures.metadata_raw0) page_metadata = coder.decode_metadata(str_fixtures.metadata_raw0)

@ -1,71 +1,3 @@
bibyaml_raw0 = """entries:
Page99:
abstract: The importance of a Web page is an inherently subjective matter,
which depends on the readers interests, knowledge and attitudes. But there
is still much that can be said objectively about the relative importance
of Web pages. This paper describes PageRank, a mathod for rating Web pages
objectively and mechanically, effectively measuring the human interest
and attention devoted to them. We compare PageRank to an idealized random
Web surfer. We show how to efficiently compute PageRank for large numbers
of pages. And, we show how to apply PageRank to search and to user navigation.
author:
- first: Lawrence
last: Page
- first: Sergey
last: Brin
- first: Rajeev
last: Motwani
- first: Terry
last: Winograd
institution: Stanford InfoLab
month: November
note: Previous number = SIDL-WP-1999-0120
number: 1999-66
publisher: Stanford InfoLab
title: 'The PageRank Citation Ranking: Bringing Order to the Web.'
type: techreport
url: http://ilpubs.stanford.edu:8090/422/
year: '1999'
"""
bibtexml_raw0 = """<?xml version='1.0' encoding='UTF-8'?>
<bibtex:file xmlns:bibtex="http://bibtexml.sf.net/">
<bibtex:entry id="Page99">
<bibtex:techreport>
<bibtex:publisher>Stanford InfoLab</bibtex:publisher>
<bibtex:title>The PageRank Citation Ranking: Bringing Order to the Web.</bibtex:title>
<bibtex:url>http://ilpubs.stanford.edu:8090/422/</bibtex:url>
<bibtex:abstract>The importance of a Web page is an inherently subjective matter, which depends on the readers interests, knowledge and attitudes. But there is still much that can be said objectively about the relative importance of Web pages. This paper describes PageRank, a mathod for rating Web pages objectively and mechanically, effectively measuring the human interest and attention devoted to them. We compare PageRank to an idealized random Web surfer. We show how to efficiently compute PageRank for large numbers of pages. And, we show how to apply PageRank to search and to user navigation.</bibtex:abstract>
<bibtex:number>1999-66</bibtex:number>
<bibtex:month>November</bibtex:month>
<bibtex:note>Previous number = SIDL-WP-1999-0120</bibtex:note>
<bibtex:year>1999</bibtex:year>
<bibtex:institution>Stanford InfoLab</bibtex:institution>
<bibtex:author>
<bibtex:person>
<bibtex:first>Lawrence</bibtex:first>
<bibtex:last>Page</bibtex:last>
</bibtex:person>
<bibtex:person>
<bibtex:first>Sergey</bibtex:first>
<bibtex:last>Brin</bibtex:last>
</bibtex:person>
<bibtex:person>
<bibtex:first>Rajeev</bibtex:first>
<bibtex:last>Motwani</bibtex:last>
</bibtex:person>
<bibtex:person>
<bibtex:first>Terry</bibtex:first>
<bibtex:last>Winograd</bibtex:last>
</bibtex:person>
</bibtex:author>
</bibtex:techreport>
</bibtex:entry>
</bibtex:file>
"""
bibtex_external0 = """ bibtex_external0 = """
@techreport{Page99, @techreport{Page99,
number = {1999-66}, number = {1999-66},
@ -116,3 +48,8 @@ turing_bib = """@article{turing1950computing,
} }
""" """
turing_meta = """\
tags: [AI, computer]
added: '2013-11-14 13:14:20'
"""

@ -3,9 +3,7 @@ import os
import unittest import unittest
import copy import copy
from pybtex.database import Person import dotdot
import testenv
from pubs import bibstruct from pubs import bibstruct
import fixtures import fixtures
@ -20,7 +18,7 @@ class TestGenerateCitekey(unittest.TestCase):
def test_escapes_chars(self): def test_escapes_chars(self):
doe_bibdata = copy.deepcopy(fixtures.doe_bibdata) doe_bibdata = copy.deepcopy(fixtures.doe_bibdata)
citekey, entry = bibstruct.get_entry(doe_bibdata) citekey, entry = bibstruct.get_entry(doe_bibdata)
entry.persons['author'] = [Person(string=u'Zôu\\@/ , John')] entry['author'] = [u'Zôu\\@/ , John']
key = bibstruct.generate_citekey(doe_bibdata) key = bibstruct.generate_citekey(doe_bibdata)
def test_simple(self): def test_simple(self):
@ -31,3 +29,7 @@ class TestGenerateCitekey(unittest.TestCase):
bibdata = copy.deepcopy(fixtures.franny_bibdata) bibdata = copy.deepcopy(fixtures.franny_bibdata)
key = bibstruct.generate_citekey(bibdata) key = bibstruct.generate_citekey(bibdata)
self.assertEqual(key, 'Salinger1961') self.assertEqual(key, 'Salinger1961')
if __name__ == '__main__':
unittest.main()

@ -1,4 +1,4 @@
import testenv import dotdot
from pubs import color from pubs import color
def perf_color(): def perf_color():

@ -1,7 +1,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import unittest import unittest
import testenv import dotdot
from pubs import configs from pubs import configs
from pubs.configs import config from pubs.configs import config
from pubs.p3 import configparser from pubs.p3 import configparser
@ -67,3 +67,7 @@ class TestConfig(unittest.TestCase):
def test_keywords(self): def test_keywords(self):
a = configs.Config(pubs_dir = '/blabla') a = configs.Config(pubs_dir = '/blabla')
self.assertEqual(a.pubs_dir, '/blabla') self.assertEqual(a.pubs_dir, '/blabla')
if __name__ == '__main__':
unittest.main()

@ -2,7 +2,7 @@
import unittest import unittest
import os import os
import testenv import dotdot
import fake_env import fake_env
from pubs import content, filebroker, databroker, datacache from pubs import content, filebroker, databroker, datacache
@ -20,13 +20,13 @@ class TestFakeFs(unittest.TestCase):
fake_env.unset_fake_fs([content, filebroker]) fake_env.unset_fake_fs([content, filebroker])
class TestDataBroker(TestFakeFs): class TestDataBroker(unittest.TestCase):
def test_databroker(self): def test_databroker(self):
ende = endecoder.EnDecoder() ende = endecoder.EnDecoder()
page99_metadata = ende.decode_metadata(str_fixtures.metadata_raw0) page99_metadata = ende.decode_metadata(str_fixtures.metadata_raw0)
page99_bibdata = ende.decode_bibdata(str_fixtures.bibyaml_raw0) page99_bibdata = ende.decode_bibdata(str_fixtures.bibtex_raw0)
for db_class in [databroker.DataBroker, datacache.DataCache]: for db_class in [databroker.DataBroker, datacache.DataCache]:
self.fs = fake_env.create_fake_fs([content, filebroker]) self.fs = fake_env.create_fake_fs([content, filebroker])
@ -41,12 +41,17 @@ class TestDataBroker(TestFakeFs):
self.assertTrue(db.exists('citekey1', both=True)) self.assertTrue(db.exists('citekey1', both=True))
self.assertEqual(db.pull_metadata('citekey1'), page99_metadata) self.assertEqual(db.pull_metadata('citekey1'), page99_metadata)
pulled = db.pull_bibdata('citekey1')['Page99']
for key, value in pulled.items():
self.assertEqual(pulled[key], page99_bibdata['Page99'][key])
self.assertEqual(db.pull_bibdata('citekey1'), page99_bibdata) self.assertEqual(db.pull_bibdata('citekey1'), page99_bibdata)
fake_env.unset_fake_fs([content, filebroker])
def test_existing_data(self): def test_existing_data(self):
ende = endecoder.EnDecoder() ende = endecoder.EnDecoder()
page99_bibdata = ende.decode_bibdata(str_fixtures.bibyaml_raw0) page99_bibdata = ende.decode_bibdata(str_fixtures.bibtex_raw0)
for db_class in [databroker.DataBroker, datacache.DataCache]: for db_class in [databroker.DataBroker, datacache.DataCache]:
self.fs = fake_env.create_fake_fs([content, filebroker]) self.fs = fake_env.create_fake_fs([content, filebroker])
@ -67,8 +72,14 @@ class TestDataBroker(TestFakeFs):
with self.assertRaises(IOError): with self.assertRaises(IOError):
db.pull_metadata('citekey') db.pull_metadata('citekey')
db.copy_doc('Larry99', 'docsdir://Page99.pdf') db.add_doc('Larry99', 'docsdir://Page99.pdf')
self.assertTrue(content.check_file('repo/doc/Page99.pdf', fail=False)) self.assertTrue(content.check_file('repo/doc/Page99.pdf', fail=False))
self.assertTrue(content.check_file('repo/doc/Larry99.pdf', fail=False)) self.assertTrue(content.check_file('repo/doc/Larry99.pdf', fail=False))
db.remove_doc('docsdir://Page99.pdf') db.remove_doc('docsdir://Page99.pdf')
fake_env.unset_fake_fs([content, filebroker])
if __name__ == '__main__':
unittest.main()

@ -3,10 +3,10 @@ import unittest
import yaml import yaml
import testenv import dotdot
from pubs import endecoder from pubs import endecoder
from str_fixtures import bibyaml_raw0, bibtexml_raw0, bibtex_raw0, metadata_raw0 from str_fixtures import bibtex_raw0, metadata_raw0
def compare_yaml_str(s1, s2): def compare_yaml_str(s1, s2):
if s1 == s2: if s1 == s2:
@ -19,30 +19,23 @@ def compare_yaml_str(s1, s2):
class TestEnDecode(unittest.TestCase): class TestEnDecode(unittest.TestCase):
def test_endecode_bibyaml(self):
decoder = endecoder.EnDecoder()
entry = decoder.decode_bibdata(bibyaml_raw0)
bibyaml_output0 = decoder.encode_bibdata(entry)
self.assertEqual(bibyaml_raw0, bibyaml_output0)
self.assertTrue(compare_yaml_str(bibyaml_raw0, bibyaml_output0))
def test_endecode_bibtexml(self):
decoder = endecoder.EnDecoder()
entry = decoder.decode_bibdata(bibtexml_raw0)
bibyaml_output0 = decoder.encode_bibdata(entry)
self.assertTrue(compare_yaml_str(bibyaml_raw0, bibyaml_output0))
def test_endecode_bibtex(self): def test_endecode_bibtex(self):
decoder = endecoder.EnDecoder() decoder = endecoder.EnDecoder()
entry = decoder.decode_bibdata(bibtex_raw0) entry = decoder.decode_bibdata(bibtex_raw0)
bibyaml_output0 = decoder.encode_bibdata(entry)
self.assertTrue(compare_yaml_str(bibyaml_raw0, bibyaml_output0)) bibraw1 = decoder.encode_bibdata(entry)
entry1 = decoder.decode_bibdata(bibraw1)
bibraw2 = decoder.encode_bibdata(entry1)
entry2 = decoder.decode_bibdata(bibraw2)
for citekey in entry1.keys():
bibentry1 = entry1[citekey]
bibentry2 = entry2[citekey]
for key, value in bibentry1.items():
self.assertEqual(bibentry1[key], bibentry2[key])
self.assertEqual(bibraw1, bibraw2)
def test_endecode_metadata(self): def test_endecode_metadata(self):
@ -50,5 +43,8 @@ class TestEnDecode(unittest.TestCase):
entry = decoder.decode_metadata(metadata_raw0) entry = decoder.decode_metadata(metadata_raw0)
metadata_output0 = decoder.encode_metadata(entry) metadata_output0 = decoder.encode_metadata(entry)
self.assertEqual(metadata_raw0, metadata_output0) self.assertEqual(set(metadata_raw0.split('\n')), set(metadata_output0.split('\n')))
if __name__ == '__main__':
unittest.main()

@ -1,6 +1,6 @@
from unittest import TestCase import unittest
import testenv import dotdot
from pubs.events import Event from pubs.events import Event
@ -62,7 +62,7 @@ def test_info_instance(infoevent):
_output.append(infoevent.specific) _output.append(infoevent.specific)
class TestEvents(TestCase): class TestEvents(unittest.TestCase):
def setUp(self): def setUp(self):
global _output global _output
@ -88,3 +88,7 @@ class TestEvents(TestCase):
SpecificInfo('info', 'specific').send() SpecificInfo('info', 'specific').send()
correct = ['info', 'info', 'specific'] correct = ['info', 'info', 'specific']
self.assertEquals(_output, correct) self.assertEquals(_output, correct)
if __name__ == '__main__':
unittest.main()

@ -2,7 +2,7 @@
import unittest import unittest
import os import os
import testenv import dotdot
import fake_env import fake_env
from pubs import content, filebroker from pubs import content, filebroker
@ -38,7 +38,7 @@ class TestFileBroker(TestFakeFs):
fake_env.copy_dir(self.fs, os.path.join(os.path.dirname(__file__), 'tmpdir'), 'tmpdir') fake_env.copy_dir(self.fs, os.path.join(os.path.dirname(__file__), 'tmpdir'), 'tmpdir')
fb = filebroker.FileBroker('tmpdir', create = True) fb = filebroker.FileBroker('tmpdir', create = True)
with open('tmpdir/bib/Page99.bibyaml', 'r') as f: with open('tmpdir/bib/Page99.bib', 'r') as f:
self.assertEqual(fb.pull_bibfile('Page99'), f.read()) self.assertEqual(fb.pull_bibfile('Page99'), f.read())
with open('tmpdir/meta/Page99.yaml', 'r') as f: with open('tmpdir/meta/Page99.yaml', 'r') as f:
@ -98,7 +98,7 @@ class TestDocBroker(TestFakeFs):
fb = filebroker.FileBroker('tmpdir', create = True) fb = filebroker.FileBroker('tmpdir', create = True)
docb = filebroker.DocBroker('tmpdir') docb = filebroker.DocBroker('tmpdir')
docpath = docb.copy_doc('Page99', 'data/pagerank.pdf') docpath = docb.add_doc('Page99', 'data/pagerank.pdf')
self.assertTrue(content.check_file(os.path.join('tmpdir', 'doc/Page99.pdf'))) self.assertTrue(content.check_file(os.path.join('tmpdir', 'doc/Page99.pdf')))
self.assertTrue(docb.in_docsdir(docpath)) self.assertTrue(docb.in_docsdir(docpath))
@ -108,3 +108,7 @@ class TestDocBroker(TestFakeFs):
self.assertFalse(content.check_file(os.path.join('tmpdir', 'doc/Page99.pdf'), fail=False)) self.assertFalse(content.check_file(os.path.join('tmpdir', 'doc/Page99.pdf'), fail=False))
with self.assertRaises(IOError): with self.assertRaises(IOError):
self.assertFalse(content.check_file(os.path.join('tmpdir', 'doc/Page99.pdf'), fail=True)) self.assertFalse(content.check_file(os.path.join('tmpdir', 'doc/Page99.pdf'), fail=True))
if __name__ == '__main__':
unittest.main()

@ -2,7 +2,7 @@
import os import os
import unittest import unittest
import testenv import dotdot
import fixtures import fixtures
from pubs.paper import Paper from pubs.paper import Paper
@ -41,3 +41,7 @@ class TestAttributes(unittest.TestCase):
p.remove_tag('ranking') p.remove_tag('ranking')
self.assertEqual(p.tags, set()) self.assertEqual(p.tags, set())
p.remove_tag('ranking') p.remove_tag('ranking')
if __name__ == '__main__':
unittest.main()

@ -0,0 +1,21 @@
# -*- coding: utf-8 -*-
import unittest
import os
import dotdot
import fake_env
from pubs import endecoder, pretty
from str_fixtures import bibtex_raw0
class TestPretty(unittest.TestCase):
def test_oneliner(self):
decoder = endecoder.EnDecoder()
bibdata = decoder.decode_bibdata(bibtex_raw0)
pretty.bib_oneliner(bibdata['Page99'])
if __name__ == '__main__':
unittest.main()

@ -1,91 +1,101 @@
from unittest import TestCase import unittest
import testenv import dotdot
import fixtures from pubs.commands.list_cmd import (_check_author_match,
from papers.commands.list_cmd import (_check_author_match,
_check_field_match, _check_field_match,
_check_query_block, _check_query_block,
filter_paper, filter_paper,
InvalidQuery) InvalidQuery)
from pubs.paper import Paper
import fixtures
doe_paper = Paper(fixtures.doe_bibdata)
page_paper = Paper(fixtures.page_bibdata)
turing_paper = Paper(fixtures.turing_bibdata, metadata=fixtures.turing_metadata)
class TestAuthorFilter(TestCase): class TestAuthorFilter(unittest.TestCase):
def test_fails_if_no_author(self): def test_fails_if_no_author(self):
no_doe = fixtures.doe2013.copy() no_doe = doe_paper.deepcopy()
no_doe.bibentry.persons = {} no_doe.bibentry['author'] = []
self.assertTrue(not _check_author_match(no_doe, 'whatever')) self.assertTrue(not _check_author_match(no_doe, 'whatever'))
def test_match_case(self): def test_match_case(self):
self.assertTrue(_check_author_match(fixtures.doe2013, 'doe')) self.assertTrue(_check_author_match(doe_paper, 'doe'))
self.assertTrue(_check_author_match(fixtures.doe2013, 'doe', self.assertTrue(_check_author_match(doe_paper, 'doe',
case_sensitive=False)) case_sensitive=False))
def test_do_not_match_case(self): def test_do_not_match_case(self):
self.assertFalse(_check_author_match(fixtures.doe2013, 'dOe')) self.assertFalse(_check_author_match(doe_paper, 'dOe'))
self.assertFalse(_check_author_match(fixtures.doe2013, 'doe', self.assertFalse(_check_author_match(doe_paper, 'doe',
case_sensitive=True)) case_sensitive=True))
def test_match_not_first_author(self): def test_match_not_first_author(self):
self.assertTrue(_check_author_match(fixtures.page99, 'wani')) self.assertTrue(_check_author_match(page_paper, 'motwani'))
def test_do_not_match_first_name(self): def test_do_not_match_first_name(self):
self.assertTrue(not _check_author_match(fixtures.page99, 'larry')) self.assertTrue(not _check_author_match(page_paper, 'larry'))
class TestCheckTag(TestCase): class TestCheckTag(unittest.TestCase):
pass pass
class TestCheckField(TestCase): class TestCheckField(unittest.TestCase):
def test_match_case(self): def test_match_case(self):
self.assertTrue(_check_field_match(fixtures.doe2013, 'title', 'nice')) self.assertTrue(_check_field_match(doe_paper, 'title', 'nice'))
self.assertTrue(_check_field_match(fixtures.doe2013, 'title', 'nice', self.assertTrue(_check_field_match(doe_paper, 'title', 'nice',
case_sensitive=False)) case_sensitive=False))
self.assertTrue(_check_field_match(fixtures.doe2013, 'year', '2013')) self.assertTrue(_check_field_match(doe_paper, 'year', '2013'))
def test_do_not_match_case(self): def test_do_not_match_case(self):
self.assertFalse(_check_field_match(fixtures.doe2013, 'title', self.assertTrue(_check_field_match(doe_paper, 'title',
'Title', case_sensitive=True)) 'Title', case_sensitive=True))
self.assertFalse(_check_field_match(fixtures.doe2013, 'title', 'nice', self.assertFalse(_check_field_match(doe_paper, 'title', 'nice',
case_sensitive=True)) case_sensitive=True))
class TestCheckQueryBlock(TestCase): class TestCheckQueryBlock(unittest.TestCase):
def test_raise_invalid_if_no_value(self): def test_raise_invalid_if_no_value(self):
with self.assertRaises(InvalidQuery): with self.assertRaises(InvalidQuery):
_check_query_block(fixtures.doe2013, 'title') _check_query_block(doe_paper, 'title')
def test_raise_invalid_if_too_much(self): def test_raise_invalid_if_too_much(self):
with self.assertRaises(InvalidQuery): with self.assertRaises(InvalidQuery):
_check_query_block(fixtures.doe2013, 'whatever:value:too_much') _check_query_block(doe_paper, 'whatever:value:too_much')
class TestFilterPaper(TestCase): class TestFilterPaper(unittest.TestCase):
def test_case(self): def test_case(self):
self.assertTrue (filter_paper(fixtures.doe2013, ['title:nice'])) self.assertTrue (filter_paper(doe_paper, ['title:nice']))
self.assertTrue (filter_paper(fixtures.doe2013, ['title:Nice'])) self.assertTrue (filter_paper(doe_paper, ['title:Nice']))
self.assertFalse(filter_paper(fixtures.doe2013, ['title:nIce'])) self.assertFalse(filter_paper(doe_paper, ['title:nIce']))
def test_fields(self): def test_fields(self):
self.assertTrue (filter_paper(fixtures.doe2013, ['year:2013'])) self.assertTrue (filter_paper(doe_paper, ['year:2013']))
self.assertFalse(filter_paper(fixtures.doe2013, ['year:2014'])) self.assertFalse(filter_paper(doe_paper, ['year:2014']))
self.assertTrue (filter_paper(fixtures.doe2013, ['author:doe'])) self.assertTrue (filter_paper(doe_paper, ['author:doe']))
self.assertTrue (filter_paper(fixtures.doe2013, ['author:Doe'])) self.assertTrue (filter_paper(doe_paper, ['author:Doe']))
def test_tags(self): def test_tags(self):
self.assertTrue (filter_paper(fixtures.turing1950, ['tag:computer'])) self.assertTrue (filter_paper(turing_paper, ['tag:computer']))
self.assertFalse(filter_paper(fixtures.turing1950, ['tag:Ai'])) self.assertFalse(filter_paper(turing_paper, ['tag:Ai']))
self.assertTrue (filter_paper(fixtures.turing1950, ['tag:AI'])) self.assertTrue (filter_paper(turing_paper, ['tag:AI']))
self.assertTrue (filter_paper(fixtures.turing1950, ['tag:ai'])) self.assertTrue (filter_paper(turing_paper, ['tag:ai']))
def test_multiple(self): def test_multiple(self):
self.assertTrue (filter_paper(fixtures.doe2013, self.assertTrue (filter_paper(doe_paper,
['author:doe', 'year:2013'])) ['author:doe', 'year:2013']))
self.assertFalse(filter_paper(fixtures.doe2013, self.assertFalse(filter_paper(doe_paper,
['author:doe', 'year:2014'])) ['author:doe', 'year:2014']))
self.assertFalse(filter_paper(fixtures.doe2013, self.assertFalse(filter_paper(doe_paper,
['author:doee', 'year:2014'])) ['author:doee', 'year:2014']))
if __name__ == '__main__':
unittest.main()

@ -4,7 +4,7 @@ import shutil
import os import os
import fixtures import fixtures
from pubs.repo import (Repository, _base27, BIB_DIR, META_DIR, from pubs.repo import (Repository, _base27,
CiteKeyCollision) CiteKeyCollision)
from pubs.paper import PaperInRepo from pubs.paper import PaperInRepo
from pubs import configs, files from pubs import configs, files
@ -107,3 +107,76 @@ class TestUpdatePaper(TestRepo):
self.repo.doc_dir, 'Turing1950.pdf'))) self.repo.doc_dir, 'Turing1950.pdf')))
self.assertTrue(os.path.exists(os.path.join( self.assertTrue(os.path.exists(os.path.join(
self.repo.doc_dir, 'Doe2003.pdf'))) self.repo.doc_dir, 'Doe2003.pdf')))
class TestSaveLoad(unittest.TestCase):
def setUp(self):
self.tmpdir = tempfile.mkdtemp()
os.makedirs(os.path.join(self.tmpdir, 'bibdata'))
os.makedirs(os.path.join(self.tmpdir, 'meta'))
self.bibfile = os.path.join(self.tmpdir, 'bib.bibyaml')
with open(self.bibfile, 'w') as f:
f.write(BIB)
self.metafile = os.path.join(self.tmpdir, 'meta.meta')
with open(self.metafile, 'w') as f:
f.write(META)
self.dest_bibfile = os.path.join(self.tmpdir, 'written_bib.yaml')
self.dest_metafile = os.path.join(self.tmpdir, 'written_meta.yaml')
def test_load_valid(self):
p = Paper.load(self.bibfile, metapath=self.metafile)
self.assertEqual(fixtures.turing1950, p)
def test_save_fails_with_no_citekey(self):
p = Paper()
with self.assertRaises(ValueError):
p.save(self.dest_bibfile, self.dest_metafile)
def test_save_creates_bib(self):
fixtures.turing1950.save(self.dest_bibfile, self.dest_metafile)
self.assertTrue(os.path.exists(self.dest_bibfile))
def test_save_creates_meta(self):
fixtures.turing1950.save(self.dest_bibfile, self.dest_metafile)
self.assertTrue(os.path.exists(self.dest_metafile))
def test_save_right_bib(self):
fixtures.turing1950.save(self.dest_bibfile, self.dest_metafile)
with open(self.dest_bibfile, 'r') as f:
written = yaml.load(f)
ok = yaml.load(BIB)
self.assertEqual(written, ok)
def test_save_right_meta(self):
fixtures.turing1950.save(self.dest_bibfile, self.dest_metafile)
with open(self.dest_metafile, 'r') as f:
written = yaml.load(f)
ok = yaml.load(META)
self.assertEqual(written, ok)
def tearDown(self):
shutil.rmtree(self.tmpdir)
class TestCopy(unittest.TestCase):
def setUp(self):
self.orig = Paper()
self.orig.bibentry.fields['title'] = u'Nice title.'
self.orig.bibentry.fields['year'] = u'2013'
self.orig.bibentry.persons['author'] = [Person(u'John Doe')]
self.orig.citekey = self.orig.generate_citekey()
def test_copy_equal(self):
copy = self.orig.copy()
self.assertEqual(copy, self.orig)
def test_copy_can_be_changed(self):
copy = self.orig.copy()
copy.bibentry.fields['year'] = 2014
self.assertEqual(self.orig.bibentry.fields['year'], u'2013')
if __name__ == '__main__':
unittest.main()

@ -1,18 +1,22 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import unittest import unittest
import testenv import dotdot
from pubs.commands.tag_cmd import _parse_tags, _tag_groups from pubs.commands.tag_cmd import _parse_tags, _tag_groups
class TestTag(unittest.TestCase): class TestTag(unittest.TestCase):
def test_tag_parsing(self): def test_tag_parsing(self):
self.assertEqual(['+abc', '+def9'], _parse_tags( 'abc+def9')) self.assertEqual(['+abc', '+def9'], _parse_tags([ 'abc+def9']))
self.assertEqual(['+abc', '-def9'], _parse_tags( 'abc-def9')) self.assertEqual(['+abc', '-def9'], _parse_tags([ 'abc-def9']))
self.assertEqual(['-abc', '-def9'], _parse_tags('-abc-def9')) self.assertEqual(['-abc', '-def9'], _parse_tags(['-abc-def9']))
self.assertEqual(['+abc', '-def9'], _parse_tags('+abc-def9')) self.assertEqual(['+abc', '-def9'], _parse_tags(['+abc-def9']))
self.assertEqual(({'math', 'romance'}, {'war'}), _tag_groups(_parse_tags('-war+math+romance'))) self.assertEqual(({'math', 'romance'}, {'war'}), _tag_groups(_parse_tags(['-war+math+romance'])))
self.assertEqual(({'math', 'romance'}, {'war'}), _tag_groups(_parse_tags('+math+romance-war'))) self.assertEqual(({'math', 'romance'}, {'war'}), _tag_groups(_parse_tags(['+math+romance-war'])))
self.assertEqual(({'math', 'romance'}, {'war'}), _tag_groups(_parse_tags('math+romance-war'))) self.assertEqual(({'math', 'romance'}, {'war'}), _tag_groups(_parse_tags(['math+romance-war'])))
if __name__ == '__main__':
unittest.main()

@ -2,7 +2,7 @@ import unittest
import re import re
import os import os
import testenv import dotdot
import fake_env import fake_env
from pubs import pubs_cmd from pubs import pubs_cmd
@ -19,7 +19,6 @@ from pubs.commands import init_cmd, import_cmd
class TestFakeInput(unittest.TestCase): class TestFakeInput(unittest.TestCase):
def test_input(self): def test_input(self):
input = fake_env.FakeInput(['yes', 'no']) input = fake_env.FakeInput(['yes', 'no'])
self.assertEqual(input(), 'yes') self.assertEqual(input(), 'yes')
self.assertEqual(input(), 'no') self.assertEqual(input(), 'no')
@ -50,7 +49,7 @@ class CommandTestCase(unittest.TestCase):
def setUp(self): def setUp(self):
self.fs = fake_env.create_fake_fs([content, filebroker, init_cmd, import_cmd]) self.fs = fake_env.create_fake_fs([content, filebroker, init_cmd, import_cmd])
def execute_cmds(self, cmds, fs=None): def execute_cmds(self, cmds, fs=None, capture_output=True):
""" Execute a list of commands, and capture their output """ Execute a list of commands, and capture their output
A command can be a string, or a tuple of size 2 or 3. A command can be a string, or a tuple of size 2 or 3.
@ -67,16 +66,23 @@ class CommandTestCase(unittest.TestCase):
input = fake_env.FakeInput(cmd[1], [content, uis, beets_ui, p3]) input = fake_env.FakeInput(cmd[1], [content, uis, beets_ui, p3])
input.as_global() input.as_global()
if capture_output:
_, stdout, stderr = fake_env.redirect(pubs_cmd.execute)(cmd[0].split()) _, stdout, stderr = fake_env.redirect(pubs_cmd.execute)(cmd[0].split())
if len(cmd) == 3: if len(cmd) == 3 and capture_output:
actual_out = color.undye(stdout.getvalue()) actual_out = color.undye(stdout.getvalue())
correct_out = color.undye(cmd[2]) correct_out = color.undye(cmd[2])
self.assertEqual(actual_out, correct_out) self.assertEqual(actual_out, correct_out)
else:
pubs_cmd.execute(cmd.split())
else: else:
assert type(cmd) == str if capture_output:
assert isinstance(cmd, str)
_, stdout, stderr = fake_env.redirect(pubs_cmd.execute)(cmd.split()) _, stdout, stderr = fake_env.redirect(pubs_cmd.execute)(cmd.split())
else:
pubs_cmd.execute(cmd.split())
if capture_output:
assert(stderr.getvalue() == '') assert(stderr.getvalue() == '')
outs.append(color.undye(stdout.getvalue())) outs.append(color.undye(stdout.getvalue()))
return outs return outs
@ -171,11 +177,11 @@ class TestUsecase(DataCommandTestCase):
def test_first(self): def test_first(self):
correct = ['Initializing pubs in /paper_first.\n', correct = ['Initializing pubs in /paper_first.\n',
'', '',
'[Page99] L. Page et al. "The PageRank Citation Ranking Bringing Order to the Web" (1999) \n', '[Page99] Page, Lawrence et al. "The PageRank Citation Ranking: Bringing Order to the Web." (1999) \n',
'', '',
'', '',
'search network\n', 'search network\n',
'[Page99] L. Page et al. "The PageRank Citation Ranking Bringing Order to the Web" (1999) search network\n' '[Page99] Page, Lawrence et al. "The PageRank Citation Ranking: Bringing Order to the Web." (1999) search network\n'
] ]
cmds = ['pubs init -p paper_first/', cmds = ['pubs init -p paper_first/',
@ -236,7 +242,7 @@ class TestUsecase(DataCommandTestCase):
bib2 = re.sub('Lawrence Page', 'Lawrence Ridge', bib1) bib2 = re.sub('Lawrence Page', 'Lawrence Ridge', bib1)
bib3 = re.sub('Page99', 'Ridge07', bib2) bib3 = re.sub('Page99', 'Ridge07', bib2)
line = '[Page99] L. Page et al. "The PageRank Citation Ranking Bringing Order to the Web" (1999) \n' line = '[Page99] Page, Lawrence et al. "The PageRank Citation Ranking: Bringing Order to the Web." (1999) \n'
line1 = re.sub('1999', '2007', line) line1 = re.sub('1999', '2007', line)
line2 = re.sub('L. Page', 'L. Ridge', line1) line2 = re.sub('L. Page', 'L. Ridge', line1)
line3 = re.sub('Page99', 'Ridge07', line2) line3 = re.sub('Page99', 'Ridge07', line2)
@ -258,12 +264,10 @@ class TestUsecase(DataCommandTestCase):
cmds = ['pubs init', cmds = ['pubs init',
('pubs add', [str_fixtures.bibtex_external0]), ('pubs add', [str_fixtures.bibtex_external0]),
'pubs export Page99', 'pubs export Page99',
('pubs export Page99 -f bibtex', []),
'pubs export Page99 -f bibyaml',
] ]
outs = self.execute_cmds(cmds) outs = self.execute_cmds(cmds)
self.assertEqual(endecoder.EnDecoder().decode_bibdata(outs[3]), fixtures.page_bibdata) self.assertEqual(endecoder.EnDecoder().decode_bibdata(outs[2]), fixtures.page_bibdata)
def test_import(self): def test_import(self):
cmds = ['pubs init', cmds = ['pubs init',
@ -304,3 +308,7 @@ class TestUsecase(DataCommandTestCase):
with self.assertRaises(SystemExit): with self.assertRaises(SystemExit):
self.execute_cmds(cmds) self.execute_cmds(cmds)
if __name__ == '__main__':
unittest.main()

@ -0,0 +1,15 @@
@article{10.1371_journal.pone.0038236,
author = {Caroline Lyon AND Chrystopher L. Nehaniv AND Joe Saunders},
journal = {PLoS ONE},
publisher = {Public Library of Science},
title = {Interactive Language Learning by Robots: The Transition from Babbling to Word Forms},
year = {2012},
month = {06},
volume = {7},
url = {http://dx.doi.org/10.1371%2Fjournal.pone.0038236},
pages = {e38236},
abstract = {<p>The advent of humanoid robots has enabled a new approach to investigating the acquisition of language, and we report on the development of robots able to acquire rudimentary linguistic skills. Our work focuses on early stages analogous to some characteristics of a human child of about 6 to 14 months, the transition from babbling to first word forms. We investigate one mechanism among many that may contribute to this process, a key factor being the sensitivity of learners to the statistical distribution of linguistic elements. As well as being necessary for learning word meanings, the acquisition of anchor word forms facilitates the segmentation of an acoustic stream through other mechanisms. In our experiments some salient one-syllable word forms are learnt by a humanoid robot in real-time interactions with naive participants. Words emerge from random syllabic babble through a learning process based on a dialogue between the robot and the human participant, whose speech is perceived by the robot as a stream of phonemes. Numerous ways of representing the speech as syllabic segments are possible. Furthermore, the pronunciation of many words in spontaneous speech is variable. However, in line with research elsewhere, we observe that salient content words are more likely than function words to have consistent canonical representations; thus their relative frequency increases, as does their influence on the learner. Variable pronunciation may contribute to early word form acquisition. The importance of contingent interaction in real-time between teacher and learner is reflected by a reinforcement process, with variable success. The examination of individual cases may be more informative than group results. Nevertheless, word forms are usually produced by the robot after a few minutes of dialogue, employing a simple, real-time, frequency dependent mechanism. This work shows the potential of human-robot interaction systems in studies of the dynamics of early language acquisition.</p>},
number = {6},
doi = {10.1371/journal.pone.0038236}
}

@ -1,45 +0,0 @@
entries:
10.1371_journal.pone.0038236:
abstract: <p>The advent of humanoid robots has enabled a new approach to investigating
the acquisition of language, and we report on the development of robots
able to acquire rudimentary linguistic skills. Our work focuses on early
stages analogous to some characteristics of a human child of about 6 to
14 months, the transition from babbling to first word forms. We investigate
one mechanism among many that may contribute to this process, a key factor
being the sensitivity of learners to the statistical distribution of linguistic
elements. As well as being necessary for learning word meanings, the acquisition
of anchor word forms facilitates the segmentation of an acoustic stream
through other mechanisms. In our experiments some salient one-syllable
word forms are learnt by a humanoid robot in real-time interactions with
naive participants. Words emerge from random syllabic babble through a
learning process based on a dialogue between the robot and the human participant,
whose speech is perceived by the robot as a stream of phonemes. Numerous
ways of representing the speech as syllabic segments are possible. Furthermore,
the pronunciation of many words in spontaneous speech is variable. However,
in line with research elsewhere, we observe that salient content words
are more likely than function words to have consistent canonical representations;
thus their relative frequency increases, as does their influence on the
learner. Variable pronunciation may contribute to early word form acquisition.
The importance of contingent interaction in real-time between teacher
and learner is reflected by a reinforcement process, with variable success.
The examination of individual cases may be more informative than group
results. Nevertheless, word forms are usually produced by the robot after
a few minutes of dialogue, employing a simple, real-time, frequency dependent
mechanism. This work shows the potential of human-robot interaction systems
in studies of the dynamics of early language acquisition.</p>
author:
- first: Caroline
last: Saunders
middle: Lyon AND Chrystopher L. Nehaniv AND Joe
doi: 10.1371/journal.pone.0038236
journal: PLoS ONE
month: '06'
number: '6'
pages: e38236
publisher: Public Library of Science
title: 'Interactive Language Learning by Robots: The Transition from Babbling
to Word Forms'
type: article
url: http://dx.doi.org/10.1371%2Fjournal.pone.0038236
volume: '7'
year: '2012'

@ -0,0 +1,15 @@
@article{10.1371/journal.pone.0063400,
author = {Martius, , Georg AND Der, , Ralf AND Ay, , Nihat},
journal = {PLoS ONE},
publisher = {Public Library of Science},
title = {Information Driven Self-Organization of Complex Robotic Behaviors},
year = {2013},
month = {05},
volume = {8},
url = {http://dx.doi.org/10.1371%2Fjournal.pone.0063400},
pages = {e63400},
abstract = {<p>Information theory is a powerful tool to express principles to drive autonomous systems because it is domain invariant and allows for an intuitive interpretation. This paper studies the use of the predictive information (PI), also called excess entropy or effective measure complexity, of the sensorimotor process as a driving force to generate behavior. We study nonlinear and nonstationary systems and introduce the time-local predicting information (TiPI) which allows us to derive exact results together with explicit update rules for the parameters of the controller in the dynamical systems framework. In this way the information principle, formulated at the level of behavior, is translated to the dynamics of the synapses. We underpin our results with a number of case studies with high-dimensional robotic systems. We show the spontaneous cooperativity in a complex physical system with decentralized control. Moreover, a jointly controlled humanoid robot develops a high behavioral variety depending on its physics and the environment it is dynamically embedded into. The behavior can be decomposed into a succession of low-dimensional modes that increasingly explore the behavior space. This is a promising way to avoid the curse of dimensionality which hinders learning systems to scale well.</p>},
number = {5},
doi = {10.1371/journal.pone.0063400}
}

@ -1,36 +0,0 @@
entries:
10.1371journal.pone.0063400:
abstract: <p>Information theory is a powerful tool to express principles to
drive autonomous systems because it is domain invariant and allows for
an intuitive interpretation. This paper studies the use of the predictive
information (PI), also called excess entropy or effective measure complexity,
of the sensorimotor process as a driving force to generate behavior. We
study nonlinear and nonstationary systems and introduce the time-local
predicting information (TiPI) which allows us to derive exact results
together with explicit update rules for the parameters of the controller
in the dynamical systems framework. In this way the information principle,
formulated at the level of behavior, is translated to the dynamics of
the synapses. We underpin our results with a number of case studies with
high-dimensional robotic systems. We show the spontaneous cooperativity
in a complex physical system with decentralized control. Moreover, a jointly
controlled humanoid robot develops a high behavioral variety depending
on its physics and the environment it is dynamically embedded into. The
behavior can be decomposed into a succession of low-dimensional modes
that increasingly explore the behavior space. This is a promising way
to avoid the curse of dimensionality which hinders learning systems to
scale well.</p>
author:
- first: Georg
last: Ay
middle: Martius AND Ralf Der AND Nihat
doi: 10.1371/journal.pone.0063400
journal: PLoS ONE
month: '05'
number: '5'
pages: e63400
publisher: Public Library of Science
title: Information Driven Self-Organization of Complex Robotic Behaviors
type: article
url: http://dx.doi.org/10.1371%2Fjournal.pone.0063400
volume: '8'
year: '2013'

@ -0,0 +1,13 @@
@techreport{Page99,
number = {1999-66},
month = {November},
author = {Lawrence Page and Sergey Brin and Rajeev Motwani and Terry Winograd},
note = {Previous number = SIDL-WP-1999-0120},
title = {The PageRank Citation Ranking: Bringing Order to the Web.},
type = {Technical Report},
publisher = {Stanford InfoLab},
year = {1999},
institution = {Stanford InfoLab},
url = {http://ilpubs.stanford.edu:8090/422/},
abstract = {The importance of a Web page is an inherently subjective matter, which depends on the readers interests, knowledge and attitudes. But there is still much that can be said objectively about the relative importance of Web pages. This paper describes PageRank, a mathod for rating Web pages objectively and mechanically, effectively measuring the human interest and attention devoted to them. We compare PageRank to an idealized random Web surfer. We show how to efficiently compute PageRank for large numbers of pages. And, we show how to apply PageRank to search and to user navigation.}
}

@ -1,28 +0,0 @@
entries:
Page99:
abstract: The importance of a Web page is an inherently subjective matter,
which depends on the readers interests, knowledge and attitudes. But there
is still much that can be said objectively about the relative importance
of Web pages. This paper describes PageRank, a mathod for rating Web pages
objectively and mechanically, effectively measuring the human interest
and attention devoted to them. We compare PageRank to an idealized random
Web surfer. We show how to efficiently compute PageRank for large numbers
of pages. And, we show how to apply PageRank to search and to user navigation.
author:
- first: Lawrence
last: Page
- first: Sergey
last: Brin
- first: Rajeev
last: Motwani
- first: Terry
last: Winograd
institution: Stanford InfoLab
month: November
note: Previous number = SIDL-WP-1999-0120
number: 1999-66
publisher: Stanford InfoLab
title: 'The PageRank Citation Ranking: Bringing Order to the Web.'
type: techreport
url: http://ilpubs.stanford.edu:8090/422/
year: '1999'

@ -0,0 +1,6 @@
@article{10.1371/journal.pone.0063400,
author = {Martius, , Georg AND Der, , Ralf AND Ay, , Nihat},
journal = {PLoS ONE},
publisher = {Public Library of Science},
title = {Information Driven Self-Organization of Complex Robotic Behaviors},
}

@ -1,15 +0,0 @@
entries:
journal0063400:
author:
- first: Lawrence
last: Page
- first: Sergey
last: Brin
- first: Rajeev
last: Motwani
- first: Terry
last: Winograd
journal: PLoS ONE
publisher: Public Library of Science
title: Information Driven Self-Organization of Complex Robotic Behaviors
type: article
Loading…
Cancel
Save