Merge branch 'master' into pre0.8.0

2018-08-20 15:25:24 +02:00 · 2018-08-20 15:25:24 +02:00 · 7d8e87a484
commit 7d8e87a484
parent 01b0358f2c 4f57aecfa4
24 changed files with 901 additions and 149 deletions
--- a/.travis.yml
+++ b/.travis.yml
@ -16,6 +16,11 @@ matrix:
    - os: linux
      language: python
      python: 3.6
+    - os: linux
+      language: python
+      python: 3.7
+      dist: xenial
+      sudo: true
    - os: osx
      language: generic
      python: 2.7
@ -36,8 +41,9 @@ matrix:
 # command to install dependencies
 install:
    - python --version
-    - pip install -r tests/requirements.txt
-    - python setup.py install
+    - export PUBS_TESTS_MODE=ONLINE

 # command to run tests
-script: python -m unittest discover
+script:
+  - PUBS_TESTS_MODE=MOCK python setup.py test
+  - PUBS_TESTS_MODE=COLLECT python setup.py test
--- a/changelog.md
+++ b/changelog.md
@ -19,6 +19,12 @@

 ### Implemented enhancements

+- Adds `move`, and `link` options for handling of documents during `import` (copy being the default). Makes `copy` the default for document handling during `add`. [(#159)](https://github.com/pubs/pubs/pull/159)
+
+- Support for downloading arXiv reference from their ID ([#146](https://github.com/pubs/pubs/issues/146) by [joe-antognini](https://github.com/joe-antognini))
+
+- Better feedback when an error is encountered while adding a reference from a DOI, ISBN or arXiv ID [#155](https://github.com/pubs/pubs/issues/155)
+
 - Better dialog after editing paper [(#142)](https://github.com/pubs/pubs/issues/142)

 - Add a command to open urls ([#139](https://github.com/pubs/pubs/issues/139) by [ksunden](https://github.com/ksunden))
@ -37,9 +43,12 @@

 - Support year ranges in query [(#102)](https://github.com/pubs/pubs/issues/102)

+- Tests can now be run with `python setup.py test` [#155](https://github.com/pubs/pubs/issues/155)

 ### Fixed bugs

+- [[#144]](https://github.com/pubs/pubs/issues/144) More robust handling of the `doc_add` options [(#159)](https://github.com/pubs/pubs/pull/159)
+
 - [[#149]](https://github.com/pubs/pubs/issues/149) More robust handling of parsing and citekey errors [(#87)](https://github.com/pubs/pubs/pull/87)

 - [[#148]](https://github.com/pubs/pubs/issues/148) Fix compatibility with Pyfakefs 3.7 [(#151)](https://github.com/pubs/pubs/pull/151)
--- a/dev_requirements.txt
+++ b/dev_requirements.txt
@ -0,0 +1,24 @@
+# if you want to setup your environment for development of the pytest code,
+# doing `pip install -r dev_requirements.txt` is the single thing you have to do.
+# Alternatively, and perhaps more conveniently, running `python setup.py test`
+# will do the same *and* run the tests, but without installing the packages on
+# the system.
+# Note that if you introduce a new dependency, you need to add it here and, more
+# importantly, to the setup.py script so that it is taken into account when
+# installing from PyPi.
+
+-e .
+pyyaml
+bibtexparser>=1.0
+python-dateutil
+requests
+configobj
+beautifulsoup4
+feedparser
+six
+
+# those are the additional packages required to run the tests
+pyfakefs
+ddt
+mock
+pytest # optional (python setup.py test works without it), but possible nonetheless
--- a/pubs/apis.py
+++ b/pubs/apis.py
@ -1,27 +1,198 @@
 """Interface for Remote Bibliographic APIs"""
+import re
+import datetime

 import requests
+import bibtexparser
+from bibtexparser.bibdatabase import BibDatabase
+import feedparser
 from bs4 import BeautifulSoup


-def doi2bibtex(doi):
+class ReferenceNotFoundError(Exception):
+    pass
+
+
+def get_bibentry_from_api(id_str, id_type, try_doi=True, ui=None):
+    """Return a bibtex string from various ID methods.
+
+    This is a wrapper around functions that will return a bibtex string given
+    one of:
+
+    * DOI
+    * IBSN
+    * arXiv ID
+
+    Args:
+        id_str: A string with the ID.
+        id_type: Name of the ID type.  Must be one of `doi`, `isbn`, or `arxiv`.
+        rp: A `Repository` object.
+        ui: A UI object.
+
+    Returns:
+        A bibtex string.
+
+    Raises:
+        ValueError: if `id_type` is not one of `doi`, `isbn`, or `arxiv`.
+        apis.ReferenceNotFoundException: if no valid reference could be found.
+    """
+
+    id_fns = {
+        'doi': doi2bibtex,
+        'isbn': isbn2bibtex,
+        'arxiv': arxiv2bibtex,
+    }
+
+    if id_type not in id_fns.keys():
+        raise ValueError('id_type must be one of `doi`, `isbn`, or `arxiv`.')
+
+    bibentry_raw = id_fns[id_type](id_str, try_doi=try_doi, ui=ui)
+    endecoder.EnDecoder().decode_bibdata(bibentry_raw)
+    if bibentry is None:
+        raise ReferenceNotFoundException(
+            'invalid {} {} or unable to retrieve bibfile from it.'.format(id_type, id_str))
+    return bibentry
+
+
+
+def _get_request(url, headers=None):
+    """GET requests to a url. Return the `requests` object.
+
+    :raise ConnectionError:  if anything goes bad (connection refused, timeout
+                             http status error (401, 404, etc)).
+    """
+    try:
+        r = requests.get(url, headers=headers)
+        r.raise_for_status()
+        return r
+    except requests.exceptions.RequestException as e:
+        raise ReferenceNotFoundError(e.args)
+
+
+    ## DOI support
+
+def doi2bibtex(doi, **kwargs):
    """Return a bibtex string of metadata from a DOI"""

-    url = 'http://dx.doi.org/{}'.format(doi)
+    url = 'https://dx.doi.org/{}'.format(doi)
    headers = {'accept': 'application/x-bibtex'}
-    r = requests.get(url, headers=headers)
+    r = _get_request(url, headers=headers)
    if r.encoding is None:
        r.encoding = 'utf8'  # Do not rely on guessing from request

    return r.text


-def isbn2bibtex(isbn):
+    ## ISBN support
+
+
+def isbn2bibtex(isbn, **kwargs):
    """Return a bibtex string of metadata from an ISBN"""

-    url = 'http://www.ottobib.com/isbn/{}/bibtex'.format(isbn)
-    r = requests.get(url)
+    url = 'https://www.ottobib.com/isbn/{}/bibtex'.format(isbn)
+    r = _get_request(url)
    soup = BeautifulSoup(r.text, "html.parser")
    citation = soup.find("textarea").text

    return citation
+
+    # Note: apparently ottobib.com uses caracter modifiers for accents instead
+    # of the correct unicode characters. TODO: Should we convert them?
+
+
+    ## arXiv support
+
+_months = ['jan', 'feb', 'mar', 'apr', 'may', 'jun',
+           'jul', 'aug', 'sep', 'oct', 'nov', 'dec']
+
+def _is_arxiv_oldstyle(arxiv_id):
+    return re.match(r"(arXiv\:)?[a-z\-]+\/[0-9]+(v[0-9]+)?", arxiv_id) is not None
+
+def _extract_arxiv_id(entry):
+    pattern = r"http[s]?://arxiv.org/abs/(?P<entry_id>.+)"
+    return re.search(pattern, entry['id']).groupdict()['entry_id']
+
+
+def arxiv2bibtex(arxiv_id, try_doi=True, ui=None):
+    """Return a bibtex string of metadata from an arXiv ID
+
+    :param arxiv_id: arXiv id, with or without the `arXiv:` prefix and version
+                     suffix (e.g. `v1`). Old an new style are accepted. Here are
+                     example of accepted identifiers: `1510.00322`,
+                     `arXiv:1510.00322`, `0901.0512`, `arXiv:0901.0512`,
+                     `hep-ph/9409201` or `arXiv:hep-ph/9409201`.
+                     Note that the `arXiv:` prefix will be automatically
+                     removed, and the version suffix automatically added if
+                     missing.
+    :param try_doi:  if a DOI is referenced in the arXiv metadata,
+                     try to download it instead. If that fails for any reason,
+                     falls back to the arXiv, with a warning message, if the
+                     UI is provided.
+    :param ui:       if not None, will display a warning if the doi request
+                     fails.
+    """
+    ## handle errors
+    url = 'https://export.arxiv.org/api/query?id_list={}'.format(arxiv_id)
+    try:
+        r = requests.get(url)
+        if r.status_code == 400:  # bad request
+            msg = ("the arXiv server returned a bad request error. The "
+                   "arXiv id {} is possibly invalid or malformed.".format(arxiv_id))
+            raise ReferenceNotFoundError(msg)
+        r.raise_for_status()  # raise an exception for HTTP errors:
+                              # 401, 404, 400 if `ui` is None, etc.
+    except requests.exceptions.RequestException as e:
+        msg = ("connection error while retrieving arXiv data for "
+               "'{}': {}".format(arxiv_id, e))
+        raise ReferenceNotFoundError(msg)
+
+    feed = feedparser.parse(r.text)
+    if len(feed.entries) == 0:  # no results.
+        msg = "no results for arXiv id {}".format(arxiv_id)
+        raise ReferenceNotFoundError(msg)
+    if len(feed.entries) > 1:  # I don't know how that could happen, but let's
+                               # be ready for it.
+        results = '\n'.join('{}. {}'.format(i, entry['title'])
+                            for entry in feed.entries)
+        msg = ("multiple results for arXiv id {}:\n{}\nThis is unexpected. "
+               "Please submit an issue at "
+               "https://github.com/pubs/pubs/issues").format(arxiv_id, choices)
+        raise ReferenceNotFoundError(msg)
+
+    entry = feed.entries[0]
+
+    ## try to return a doi instead of the arXiv reference
+    if try_doi and 'arxiv_doi' in entry:
+        try:
+            return doi2bibtex(entry['arxiv_doi'])
+        except ReferenceNotFoundError as e:
+            if ui is not None:
+                ui.warning(str(e))
+
+    ## create a bibentry from the arXiv response.
+    db = BibDatabase()
+    entry_id = _extract_arxiv_id(entry)
+    author_str = ' and '.join(
+        [author['name'] for author in entry['authors']])
+    db.entries = [{
+        'ENTRYTYPE': 'article',
+        'ID': entry_id,
+        'author': author_str,
+        'title': entry['title'],
+        'year': str(entry['published_parsed'].tm_year),
+        'month': _months[entry['published_parsed'].tm_mon-1],
+        'eprint': entry_id,
+        'eprinttype': 'arxiv',
+        'date': entry['published'], # not really standard, but a resolution more
+                                    # granular than months is increasinlgy relevant.
+        'url': entry['link'],
+        'urldate': datetime.datetime.utcnow().isoformat() + 'Z' # can't hurt.
+    }]
+    # we don't add eprintclass for old-style ids, as it is in the id already.
+    if not _is_arxiv_oldstyle(entry_id):
+        db.entries[0]['eprintclass'] = entry['arxiv_primary_category']['term']
+    if 'arxiv_doi' in entry:
+        db.entries[0]['arxiv_doi'] = entry['arxiv_doi']
+
+    bibtex = bibtexparser.dumps(db)
+    return bibtex
--- a/pubs/command_utils.py
+++ b/pubs/command_utils.py
@ -0,0 +1,18 @@
+"""Contains code that is reused over commands, like argument definition
+or help messages.
+"""
+
+
+def add_doc_copy_arguments(parser, copy=True):
+    doc_add_group = parser.add_mutually_exclusive_group()
+    doc_add_group.add_argument(
+        '-L', '--link', action='store_const', dest='doc_copy', const='link',
+        default=None,
+        help="don't copy document files, just create a link.")
+    if copy:
+        doc_add_group.add_argument(
+            '-C', '--copy', action='store_const', dest='doc_copy', const='copy',
+            help="copy document (keep source).")
+    doc_add_group.add_argument(
+        '-M', '--move', action='store_const', dest='doc_copy', const='move',
+        help="move document (copy and remove source).")
--- a/pubs/commands/init.py
+++ b/pubs/commands/init.py
@ -6,9 +6,11 @@ from . import add_cmd
 from . import rename_cmd
 from . import remove_cmd
 from . import list_cmd
+from . import edit_cmd
+from . import tag_cmd
+from . import statistics_cmd
 # doc
 from . import doc_cmd
-from . import tag_cmd
 from . import note_cmd
 # bulk
 from . import export_cmd
@ -16,5 +18,3 @@ from . import import_cmd
 # bonus
 from . import websearch_cmd
 from . import url_cmd
-
-from . import edit_cmd
--- a/pubs/commands/add_cmd.py
+++ b/pubs/commands/add_cmd.py
@ -12,6 +12,7 @@ from .. import apis
 from .. import pretty
 from .. import utils
 from .. import endecoder
+from ..command_utils import add_doc_copy_arguments
 from ..completion import CommaSeparatedTagsCompletion


@ -26,37 +27,39 @@ def parser(subparsers, conf):
    parser = subparsers.add_parser('add', help='add a paper to the repository')
    parser.add_argument('bibfile', nargs='?', default=None,
                        help='bibtex file')
-    parser.add_argument('-D', '--doi', help='doi number to retrieve the bibtex entry, if it is not provided', default=None, action=ValidateDOI)
-    parser.add_argument('-I', '--isbn', help='isbn number to retrieve the bibtex entry, if it is not provided', default=None)
+    id_arg = parser.add_mutually_exclusive_group()
+    id_arg.add_argument('-D', '--doi', help='doi number to retrieve the bibtex entry, if it is not provided', default=None, action=ValidateDOI)
+    id_arg.add_argument('-I', '--isbn', help='isbn number to retrieve the bibtex entry, if it is not provided', default=None)
+    id_arg.add_argument('-X', '--arxiv', help='arXiv ID to retrieve the bibtex entry, if it is not provided', default=None)
    parser.add_argument('-d', '--docfile', help='pdf or ps file', default=None)
    parser.add_argument('-t', '--tags', help='tags associated to the paper, separated by commas',
                        default=None
                        ).completer = CommaSeparatedTagsCompletion(conf)
    parser.add_argument('-k', '--citekey', help='citekey associated with the paper;\nif not provided, one will be generated automatically.',
                        default=None, type=p3.u_maybe)
-    parser.add_argument('-L', '--link', action='store_false', dest='copy', default=True,
-                        help="don't copy document files, just create a link.")
-    parser.add_argument('-M', '--move', action='store_true', dest='move', default=False,
-                        help="move document instead of of copying (ignored if --link).")
+    add_doc_copy_arguments(parser)
    return parser


-def bibentry_from_editor(conf, ui, rp):
+def bibentry_from_editor(conf, ui):
+
    again = True
-    bibstr = templates.add_bib
+    bibentry_raw = templates.add_bib
+    decoder = endecoder.EnDecoder()
+
    while again:
        try:
-            bibstr = ui.editor_input(initial=bibstr, suffix='.bib')
-            if bibstr == templates.add_bib:
+            bibentry_raw = ui.editor_input(initial=bibentry_raw, suffix='.bib')
+            if bibentry_raw == templates.add_bib:
                again = ui.input_yn(
                    question='Bibfile not edited. Edit again ?',
                    default='y')
                if not again:
                    ui.exit(0)
            else:
-                bibentry = rp.databroker.verify(bibstr)
+                bibentry = decoder.decode_bibdata(bibentry_raw)
                bibstruct.verify_bibdata(bibentry)
-                # REFACTOR Generate citykey
+                # REFACTOR Generate citekey
                again = False

        except endecoder.EnDecoder.BibDecodingError:
@ -82,30 +85,29 @@ def command(conf, args):
    citekey = args.citekey

    rp = repo.Repository(conf)
+    decoder = endecoder.EnDecoder()

    # get bibtex entry
    if bibfile is None:
-        if args.doi is None and args.isbn is None:
-            bibentry = bibentry_from_editor(conf, ui, rp)
+        if args.doi is None and args.isbn is None and args.arxiv is None:
+            bibentry = bibentry_from_editor(conf, ui)
        else:
-            if args.doi is not None:
-                bibentry_raw = apis.doi2bibtex(args.doi)
-                bibentry = rp.databroker.verify(bibentry_raw)
-                if bibentry is None:
-                    ui.error('invalid doi {} or unable to retrieve bibfile from it.'.format(args.doi))
-                    if args.isbn is None:
-                        ui.exit(1)
-            if args.isbn is not None:
-                bibentry_raw = apis.isbn2bibtex(args.isbn)
-                bibentry = rp.databroker.verify(bibentry_raw)
-                if bibentry is None:
-                    ui.error('invalid isbn {} or unable to retrieve bibfile from it.'.format(args.isbn))
-                    ui.exit(1)
-                # TODO distinguish between cases, offer to open the error page in a webbrowser.
-                # TODO offer to confirm/change citekey
+            bibentry = None
+            try:
+                if args.doi is not None:
+                    bibentry = apis.get_bibentry_from_api(args.doi, 'doi', ui=ui)
+                elif args.isbn is not None:
+                    bibentry = apis.get_bibentry_from_api(args.isbn, 'isbn', ui=ui)
+                    # TODO distinguish between cases, offer to open the error page in a webbrowser.
+                    # TODO offer to confirm/change citekey
+                elif args.arxiv is not None:
+                    bibentry = apis.get_bibentry_from_api(args.arxiv, 'arxiv', ui=ui)
+            except apis.ReferenceNotFoundException as e:
+                ui.error(e.message)
+                ui.exit(1)
    else:
        bibentry_raw = content.get_content(bibfile, ui=ui)
-        bibentry = rp.databroker.verify(bibentry_raw)
+        bibentry = decoder.decode_bibdata(bibentry_raw)
        if bibentry is None:
            ui.error('invalid bibfile {}.'.format(bibfile))

@ -136,25 +138,20 @@ def command(conf, args):
                    '{}, using {} instead.').format(bib_docfile, docfile))

    # create the paper
-    copy = args.copy
-    if copy is None:
-        copy = conf['main']['doc_add'] in ('copy', 'move')
-    move = args.move
-    if move is None:
-        move = conf['main']['doc_add'] == 'move'
+    doc_add = args.doc_copy
+    if doc_add is None:
+        doc_add = conf['main']['doc_add']

    rp.push_paper(p)
    ui.message('added to pubs:\n{}'.format(pretty.paper_oneliner(p)))
    if docfile is not None:
-        rp.push_doc(p.citekey, docfile, copy=copy or args.move)
-        if copy:
-            if move:
-                content.remove_file(docfile)
+        rp.push_doc(p.citekey, docfile, copy=(doc_add in ('copy', 'move')))
+        if doc_add == 'move' and content.content_type(docfile) != 'url':
+            content.remove_file(docfile)

-        if copy:
-            if move:
-                ui.message('{} was moved to the pubs repository.'.format(docfile))
-            else:
+        if doc_add == 'move':
+            ui.message('{} was moved to the pubs repository.'.format(docfile))
+        elif doc_add == 'copy':
                ui.message('{} was copied to the pubs repository.'.format(docfile))

    rp.close()
--- a/pubs/commands/import_cmd.py
+++ b/pubs/commands/import_cmd.py
@ -7,10 +7,11 @@ from .. import repo
 from .. import endecoder
 from .. import bibstruct
 from .. import color
+from .. import content
 from ..paper import Paper
-
 from ..uis import get_ui
 from ..content import system_path, read_text_file
+from ..command_utils import add_doc_copy_arguments


 _ABORT_USE_IGNORE_MSG = "Aborting import. Use --ignore-malformed to ignore."
@ -18,18 +19,22 @@ _IGNORING_MSG = " Ignoring it."


 def parser(subparsers, conf):
-    parser = subparsers.add_parser('import',
-            help='import paper(s) to the repository')
-    parser.add_argument('bibpath',
-            help='path to bibtex, bibtexml or bibyaml file (or directory)')
-    parser.add_argument('-L', '--link', action='store_false', dest='copy', default=True,
-            help="don't copy document files, just create a link.")
-    parser.add_argument('keys', nargs='*',
-            help="one or several keys to import from the file")
-    parser.add_argument('-O', '--overwrite', action='store_true', default=False,
-            help="Overwrite keys already in the database")
-    parser.add_argument('-i', '--ignore-malformed', action='store_true', default=False,
-            help="Ignore malformed and unreadable files and entries")
+    parser = subparsers.add_parser(
+        'import',
+        help='import paper(s) to the repository')
+    parser.add_argument(
+        'bibpath',
+        help='path to bibtex, bibtexml or bibyaml file (or directory)')
+    parser.add_argument(
+        'keys', nargs='*',
+        help="one or several keys to import from the file")
+    parser.add_argument(
+        '-O', '--overwrite', action='store_true', default=False,
+        help="Overwrite keys already in the database")
+    parser.add_argument(
+        '-i', '--ignore-malformed', action='store_true', default=False,
+        help="Ignore malformed and unreadable files and entries")
+    add_doc_copy_arguments(parser, copy=False)
    return parser


@ -90,9 +95,7 @@ def command(conf, args):

    ui = get_ui()
    bibpath = args.bibpath
-    copy = args.copy
-    if copy is None:
-        copy = conf['main']['doc_add'] in ('copy', 'move')
+    doc_import = args.doc_copy or 'copy'

    rp = repo.Repository(conf)
    # Extract papers from bib
@ -106,7 +109,9 @@ def command(conf, args):
        if docfile is None:
            ui.warning("No file for {}.".format(p.citekey))
        else:
-            rp.push_doc(p.citekey, docfile, copy=copy)
-            # FIXME should move the file if configured to do so.
+            rp.push_doc(p.citekey, docfile,
+                        copy=(doc_import in ('copy', 'move')))
+            if doc_import == 'move' and content.content_type(docfile) != 'url':
+                content.remove_file(docfile)

    rp.close()
--- a/pubs/commands/statistics_cmd.py
+++ b/pubs/commands/statistics_cmd.py
@ -0,0 +1,33 @@
+from ..repo import Repository
+from ..uis import get_ui
+from .. import color
+
+
+def parser(subparsers, conf):
+    parser = subparsers.add_parser(
+        'statistics',
+        help="show statistics on the repository.")
+    return parser
+
+
+def command(conf, args):
+    ui = get_ui()
+    rp = Repository(conf)
+    papers = list(rp.all_papers())
+
+    paper_count = len(papers)
+    doc_count = sum([0 if p.docpath is None else 1 for p in papers])
+    tag_count = len(list(rp.get_tags()))
+    papers_with_tags = sum([0 if p.tags else 1 for p in papers])
+
+    ui.message(color.dye_out('Repository statistics:', 'bold'))
+    ui.message('Total papers: {}, {} ({}) have a document attached'.format(
+        color.dye_out('{:d}'.format(paper_count), 'bgreen'),
+        color.dye_out('{:d}'.format(doc_count), 'bold'),
+        '{:.0f}%'.format(100. * doc_count / paper_count),
+    ))
+    ui.message('Total tags: {}, {} ({}) of papers have at least one tag'.format(
+        color.dye_out('{:d}'.format(tag_count), 'bgreen'),
+        color.dye_out('{:d}'.format(papers_with_tags), 'bold'),
+        '{:.0f}%'.format(100. * papers_with_tags / paper_count),
+    ))
--- a/pubs/config/spec.py
+++ b/pubs/config/spec.py
@ -11,7 +11,7 @@ docsdir = string(default="docsdir://")

 # Specify if a document should be copied or moved in the docdir, or only
 # linked when adding a publication.
-doc_add = option('copy', 'move', 'link', default='move')
+doc_add = option('copy', 'move', 'link', default='copy')

 # the command to use when opening document files
 open_cmd = string(default=None)
--- a/pubs/databroker.py
+++ b/pubs/databroker.py
@ -79,16 +79,6 @@ class DataBroker(object):
    def listing(self, filestats=True):
        return self.filebroker.listing(filestats=filestats)

-    def verify(self, bibdata_raw):
-        """Will return None if bibdata_raw can't be decoded"""
-        try:
-            if bibdata_raw.startswith('\ufeff'):
-                # remove BOM, because bibtexparser does not support it.
-                bibdata_raw = bibdata_raw[1:]
-            return self.endecoder.decode_bibdata(bibdata_raw)
-        except ValueError as e:
-            return None
-
    # docbroker

    def in_docsdir(self, docpath):
--- a/pubs/datacache.py
+++ b/pubs/datacache.py
@ -163,9 +163,6 @@ class DataCache(object):
    def listing(self, filestats=True):
        return self.databroker.listing(filestats=filestats)

-    def verify(self, bibdata_raw):
-        return self.databroker.verify(bibdata_raw)
-
    # docbroker

    def in_docsdir(self, docpath):
--- a/pubs/endecoder.py
+++ b/pubs/endecoder.py
@ -1,9 +1,16 @@
 from __future__ import absolute_import, unicode_literals

 import copy
+import logging
+
+# both needed to intercept exceptions.
+import pyparsing
+import bibtexparser

 try:
    import bibtexparser as bp
+    # don't let bibtexparser display stuff
+    bp.bparser.logger.setLevel(level=logging.CRITICAL)
 except ImportError:
    print("error: you need to install bibterxparser; try running 'pip install "
          "bibtexparser'.")
@ -68,14 +75,14 @@ class EnDecoder(object):

    class BibDecodingError(Exception):

-        message = "Could not parse provided bibdata:\n---\n{}\n---"
-
-        def __init__(self, bibdata):
+        def __init__(self, error_msg, bibdata):
+            """
+            :param error_msg: specific message about what went wrong
+            :param bibdata:   the data that was unsuccessfully decoded.
+            """
+            super(Exception, self).__init__(error_msg) # make `str(self)` work.
            self.data = bibdata

-        def __str__(self):
-            return self.message.format(self.data)
-
    bwriter = bp.bwriter.BibTexWriter()
    bwriter.display_order = BIBFIELD_ORDER

@ -117,10 +124,12 @@ class EnDecoder(object):

        If the decoding fails, returns a BibParseError.
        """
+        if len(bibdata) == 0:
+            error_msg = 'parsing error: the provided string has length zero.'
+            raise self.BibDecodingError(error_msg, bibdata)
        try:
            entries = bp.bparser.BibTexParser(
-                bibdata, common_strings=True,
-                customization=customizations,
+                bibdata, common_strings=True, customization=customizations,
                homogenize_fields=True).get_entry_dict()

            # Remove id from bibtexparser attribute which is stored as citekey
@ -131,8 +140,18 @@ class EnDecoder(object):
                entries[e][TYPE_KEY] = t
            if len(entries) > 0:
                return entries
-        except Exception:
-            import traceback
-            traceback.print_exc()
-        raise self.BibDecodingError(bibdata)
-        # TODO: filter exceptions from pyparsing and pass reason upstream
+        except (pyparsing.ParseException, pyparsing.ParseSyntaxException) as e:
+            error_msg = self._format_parsing_error(e)
+            raise self.BibDecodingError(error_msg, bibdata)
+        except bibtexparser.bibdatabase.UndefinedString as e:
+            error_msg = 'parsing error: undefined string in provided data: {}'.format(e)
+            raise self.BibDecodingError(error_msg, bibdata)
+
+
+    @classmethod
+    def _format_parsing_error(cls, e):
+        """Transform a pyparsing exception into an error message
+
+        Does a best effort to be useful, but might need to be improved.
+        """
+        return '{}\n{}^\n{}'.format(e.line, (e.column - 1) * ' ', e)
--- a/pubs/pubs_cmd.py
+++ b/pubs/pubs_cmd.py
@ -20,9 +20,11 @@ CORE_CMDS = collections.OrderedDict([
    ('rename', commands.rename_cmd),
    ('remove', commands.remove_cmd),
    ('list', commands.list_cmd),
+    ('edit', commands.edit_cmd),
+    ('tag', commands.tag_cmd),
+    ('statistics', commands.statistics_cmd),

    ('doc', commands.doc_cmd),
-    ('tag', commands.tag_cmd),
    ('note', commands.note_cmd),

    ('export', commands.export_cmd),
@ -30,7 +32,6 @@ CORE_CMDS = collections.OrderedDict([

    ('websearch', commands.websearch_cmd),
    ('url', commands.url_cmd),
-    ('edit', commands.edit_cmd),
 ])


--- a/readme.md
+++ b/readme.md
@ -56,6 +56,10 @@ or an ISBN (dashes are ignored):

    pubs add -I 978-0822324669 -d article.pdf

+or an arXiv id (automatically downloading arXiv article is in the works):
+
+    pubs add -X math/9501234 -d article.pdf
+

 ## References always up-to-date

--- a/requirements.txt
+++ b/requirements.txt
@ -1,6 +0,0 @@
-pyyaml
-bibtexparser>=1.0
-python-dateutil
-requests
-configobj
-beautifulsoup4
--- a/setup.py
+++ b/setup.py
@ -1,10 +1,16 @@
 #!/usr/bin/env python
+import unittest

 from setuptools import setup

 with open('pubs/version.py') as f:
    exec(f.read())  # defines __version__

+def pubs_test_suite():
+    test_loader = unittest.TestLoader()
+    test_suite = test_loader.discover('tests', pattern='test_*.py')
+    return test_suite
+
 setup(
    name='pubs',
    version=__version__,
@ -26,9 +32,8 @@ setup(
            ],
        },

-    install_requires=['pyyaml', 'bibtexparser>=1.0', 'python-dateutil',
-                      'requests', 'configobj', 'beautifulsoup4'],
-    tests_require=['pyfakefs>=2.7', 'mock'],
+    install_requires=['pyyaml', 'bibtexparser>=1.0', 'python-dateutil', 'six',
+                      'requests', 'configobj', 'beautifulsoup4', 'feedparser'],
    extras_require={'autocompletion': ['argcomplete'],
                    },

@ -41,6 +46,9 @@ setup(
        'Intended Audience :: Science/Research',
    ],

+    test_suite= 'tests',
+    tests_require=['pyfakefs>=3.4', 'mock', 'ddt'],
+
    # in order to avoid 'zipimport.ZipImportError: bad local file header'
    zip_safe=False,

--- a/tests/mock_requests.py
+++ b/tests/mock_requests.py
@ -0,0 +1,106 @@
+"""
+Mock the `requests.get` function, and handle collecting data to do so.
+
+Three modes are available, and controlled via the `PUBS_TESTS_MODE` environment
+variable. To modify the variable, under linux or macos, do one of:
+$ export PUBS_TESTS_MODE=MOCK
+$ export PUBS_TESTS_MODE=COLLECT
+$ export PUBS_TESTS_MODE=ONLINE
+
+The MOCK mode is the default one, active even if PUBS_TESTS_MODE has not been
+set. It uses saved data to run pubs units tests relying on the `requests.get`
+function without the need of an internet connection (it is also much faster).
+The prefected data is save in the `test_apis_data.pickle` file.
+
+The COLLECT mode does real GET requests, and updates the `test_apis_data.pickle`
+file. It is needed if you add or modify the test relying on `requests.get`.
+
+The ONLINE mode bypasses all this and use the original `requests.get` without
+accessing or updating the `test_apis_data.pickle` data. It might be useful when
+running tests on Travis for instance.
+"""
+
+
+import os
+import json
+import mock
+
+import requests
+
+
+_orgininal_requests_get = requests.get
+_collected_responses = []
+_data_filepath = os.path.join(os.path.dirname(__file__), 'test_apis_data.json')
+
+
+class MockingResponse:
+    def __init__(self, text, status_code=200, error_msg=None):
+        self.text = text
+        self.status_code = status_code
+        self.error_msg = error_msg
+        self.encoding = 'utf8'
+
+    def raise_for_status(self):
+        if self.status_code != 200:
+            raise requests.exceptions.RequestException(self.error_msg)
+
+
+def intercept_text(text):
+    try:
+        if '10.1103/PhysRevD.89.084044' in text:
+            # replace with wrong DOI
+            text = text.replace('PhysRevD', 'INVALIDDOI')
+    except TypeError:
+        if b'10.1103/PhysRevD.89.084044' in text:
+            # replace with wrong DOI
+            text = text.replace(b'PhysRevD', b'INVALIDDOI')
+
+    return text
+
+
+mode = os.environ.get('PUBS_TESTS_MODE', 'MOCK')
+
+if mode == 'MOCK':
+
+    with open(os.path.join(_data_filepath), 'r') as fd:
+        _collected_responses = json.load(fd)
+
+    def mock_requests_get(*args, **kwargs):
+        for args2, kwargs2, text, status_code, error_msg in _collected_responses:
+            if list(args) == list(args2) and kwargs == kwargs2:
+                return MockingResponse(text, status_code, error_msg)
+        raise KeyError(('No stub data found for requests.get({}, {}).\n You may'
+                        ' need to update the mock data. Look at the '
+                        'tests/mock_requests.py file for an explanation').format(args, kwargs))
+
+elif mode == 'COLLECT':
+
+    def mock_requests_get(*args, **kwargs):
+        text, status_code, error_msg = None, None, None
+        try:
+            r = _orgininal_requests_get(*args, **kwargs)
+            text, status_code = r.text, r.status_code
+            r.raise_for_status()
+        except requests.exceptions.RequestException as e:
+            error_msg = str(e)
+
+        text = intercept_text(text)
+        _collected_responses.append((args, kwargs, text, status_code, error_msg))
+        _save_collected_responses() # yes, we save everytime, because it's not
+                                    # clear how to run once after all the tests
+                                    # have run. If you figure it out...
+
+        return MockingResponse(text, status_code, error_msg)
+
+    def _save_collected_responses():
+        with open(os.path.join(_data_filepath), 'w') as fd:
+            json.dump(sorted(_collected_responses), fd, indent=2)
+
+elif mode == 'ONLINE':
+    def mock_requests_get(*args, **kwargs):
+        # with mock.patch('requests.Response.text', new_callable=mock.PropertyMock) as mock_text:
+        r = _orgininal_requests_get(*args, **kwargs)
+        r._content = intercept_text(r.content)
+            # print(r.content.__class__)
+            # mock_text.return_value = intercept_text(r.text)
+        return r
--- a/tests/requirements.txt
+++ b/tests/requirements.txt
@ -1,5 +0,0 @@
-# those are the additional packages required to run the tests
-six
-pyfakefs
-ddt
-mock
--- a/tests/test_apis.py
+++ b/tests/test_apis.py
@ -3,24 +3,36 @@
 from __future__ import unicode_literals
 import unittest

+import mock
+
+
 import dotdot

 from pubs.p3 import ustr
 from pubs.endecoder import EnDecoder
-from pubs.apis import doi2bibtex, isbn2bibtex
+from pubs.apis import ReferenceNotFoundError, arxiv2bibtex, doi2bibtex, isbn2bibtex, _is_arxiv_oldstyle, _extract_arxiv_id
+
+from pubs import apis
+
+import mock_requests


-class TestDOI2Bibtex(unittest.TestCase):
+class APITests(unittest.TestCase):

    def setUp(self):
        self.endecoder = EnDecoder()

-    def test_unicode(self):
+
+class TestDOI2Bibtex(APITests):
+
+    @mock.patch('pubs.apis.requests.get', side_effect=mock_requests.mock_requests_get)
+    def test_unicode(self, reqget):
        bib = doi2bibtex('10.1007/BF01700692')
        self.assertIsInstance(bib, ustr)
        self.assertIn('Kurt Gödel', bib)

-    def test_parses_to_bibtex(self):
+    @mock.patch('pubs.apis.requests.get', side_effect=mock_requests.mock_requests_get)
+    def test_parses_to_bibtex(self, reqget):
        bib = doi2bibtex('10.1007/BF01700692')
        b = self.endecoder.decode_bibdata(bib)
        self.assertEqual(len(b), 1)
@ -30,23 +42,22 @@ class TestDOI2Bibtex(unittest.TestCase):
                         'Über formal unentscheidbare Sätze der Principia '
                         'Mathematica und verwandter Systeme I')

-    def test_parse_fails_on_incorrect_DOI(self):
-        bib = doi2bibtex('999999')
-        with self.assertRaises(EnDecoder.BibDecodingError):
-            self.endecoder.decode_bibdata(bib)
+    @mock.patch('pubs.apis.requests.get', side_effect=mock_requests.mock_requests_get)
+    def test_retrieve_fails_on_incorrect_DOI(self, reqget):
+        with self.assertRaises(apis.ReferenceNotFoundError):
+            doi2bibtex('999999')


-class TestISBN2Bibtex(unittest.TestCase):
+class TestISBN2Bibtex(APITests):

-    def setUp(self):
-        self.endecoder = EnDecoder()
-
-    def test_unicode(self):
+    @mock.patch('pubs.apis.requests.get', side_effect=mock_requests.mock_requests_get)
+    def test_unicode(self, reqget):
        bib = isbn2bibtex('9782081336742')
        self.assertIsInstance(bib, ustr)
        self.assertIn('Poincaré, Henri', bib)

-    def test_parses_to_bibtex(self):
+    @mock.patch('pubs.apis.requests.get', side_effect=mock_requests.mock_requests_get)
+    def test_parses_to_bibtex(self, reqget):
        bib = isbn2bibtex('9782081336742')
        b = self.endecoder.decode_bibdata(bib)
        self.assertEqual(len(b), 1)
@ -54,11 +65,97 @@ class TestISBN2Bibtex(unittest.TestCase):
        self.assertEqual(entry['author'][0], 'Poincaré, Henri')
        self.assertEqual(entry['title'], 'La science et l\'hypothèse')

-    def test_parse_fails_on_incorrect_ISBN(self):
-        bib = doi2bibtex('9' * 13)
+    @mock.patch('pubs.apis.requests.get', side_effect=mock_requests.mock_requests_get)
+    def test_retrieve_fails_on_incorrect_ISBN(self, reqget):
+        bib = isbn2bibtex('9' * 13)
        with self.assertRaises(EnDecoder.BibDecodingError):
            self.endecoder.decode_bibdata(bib)


-# Note: apparently ottobib.com uses caracter modifiers for accents instead
-# of the correct unicode characters. TODO: Should we convert them?
+class TestArxiv2Bibtex(APITests):
+
+    @mock.patch('pubs.apis.requests.get', side_effect=mock_requests.mock_requests_get)
+    def test_new_style(self, reqget):
+        bib = arxiv2bibtex('astro-ph/9812133')
+        b = self.endecoder.decode_bibdata(bib)
+        self.assertEqual(len(b), 1)
+        entry = b[list(b)[0]]
+        self.assertEqual(entry['author'][0], 'Perlmutter, S.')
+        self.assertEqual(entry['year'], '1999')
+
+    @mock.patch('pubs.apis.requests.get', side_effect=mock_requests.mock_requests_get)
+    def test_parses_to_bibtex_with_doi(self, reqget):
+        bib = arxiv2bibtex('astro-ph/9812133')
+        b = self.endecoder.decode_bibdata(bib)
+        self.assertEqual(len(b), 1)
+        entry = b[list(b)[0]]
+        self.assertEqual(entry['author'][0], 'Perlmutter, S.')
+        self.assertEqual(entry['year'], '1999')
+
+    @mock.patch('pubs.apis.requests.get', side_effect=mock_requests.mock_requests_get)
+    def test_parses_to_bibtex_without_doi(self, reqget):
+        bib = arxiv2bibtex('math/0211159')
+        b = self.endecoder.decode_bibdata(bib)
+        self.assertEqual(len(b), 1)
+        entry = b[list(b)[0]]
+        self.assertEqual(entry['author'][0], 'Perelman, Grisha')
+        self.assertEqual(entry['year'], '2002')
+        self.assertEqual(
+                entry['title'],
+                'The entropy formula for the Ricci flow and its geometric applications')
+
+    @mock.patch('pubs.apis.requests.get', side_effect=mock_requests.mock_requests_get)
+    def test_arxiv_wrong_id(self, reqget):
+        with self.assertRaises(ReferenceNotFoundError):
+            bib = arxiv2bibtex('INVALIDID')
+
+    @mock.patch('pubs.apis.requests.get', side_effect=mock_requests.mock_requests_get)
+    def test_arxiv_wrong_doi(self, reqget):
+        bib = arxiv2bibtex('1312.2021')
+        b = self.endecoder.decode_bibdata(bib)
+        entry = b[list(b)[0]]
+        self.assertEqual(entry['arxiv_doi'], '10.1103/INVALIDDOI.89.084044')
+
+    @mock.patch('pubs.apis.requests.get', side_effect=mock_requests.mock_requests_get)
+    def test_arxiv_good_doi(self, reqget):
+        """Get the DOI bibtex instead of the arXiv one if possible"""
+        bib = arxiv2bibtex('1710.08557')
+        b = self.endecoder.decode_bibdata(bib)
+        entry = b[list(b)[0]]
+        self.assertTrue(not 'arxiv_doi' in entry)
+        self.assertEqual(entry['doi'], '10.1186/s12984-017-0305-3')
+        self.assertEqual(entry['title'].lower(), 'on neuromechanical approaches for the study of biological and robotic grasp and manipulation')
+
+    @mock.patch('pubs.apis.requests.get', side_effect=mock_requests.mock_requests_get)
+    def test_arxiv_good_doi_force_arxiv(self, reqget):
+        bib = arxiv2bibtex('1710.08557', try_doi=False)
+        b = self.endecoder.decode_bibdata(bib)
+        entry = b[list(b)[0]]
+        self.assertEqual(entry['arxiv_doi'], '10.1186/s12984-017-0305-3')
+        self.assertEqual(entry['title'].lower(), 'on neuromechanical approaches for the study of biological grasp and\nmanipulation')
+
+
+class TestArxiv2BibtexLocal(unittest.TestCase):
+    """Test arXiv 2 Bibtex connection; those tests don't require a connection"""
+
+    def test_oldstyle_pattern(self):
+        """Test that we can accurately differentiate between old and new style arXiv ids."""
+        # old-style arXiv ids
+        for arxiv_id in ['cs/9301113', 'math/9201277v3', 'astro-ph/9812133',
+                         'cond-mat/0604612', 'hep-ph/0702007v10', 'arXiv:physics/9403001'
+                        ]:
+            self.assertTrue(_is_arxiv_oldstyle(arxiv_id))
+        # new-style arXiv ids
+        for arxiv_id in ['1808.00954', 'arXiv:1808.00953', '1808.0953',
+                         '1808.00954v1', 'arXiv:1808.00953v2', '1808.0953v42']:
+            self.assertFalse(_is_arxiv_oldstyle(arxiv_id))
+
+    def test_extract_id(self):
+        """Test that ids are correctly extracted"""
+        self.assertEqual(_extract_arxiv_id({'id': "http://arxiv.org/abs/0704.0010v1"}), "0704.0010v1")
+        self.assertEqual(_extract_arxiv_id({'id': "https://arxiv.org/abs/0704.0010v1"}), "0704.0010v1")
+        self.assertEqual(_extract_arxiv_id({'id': "https://arxiv.org/abs/astro-ph/9812133v2"}), "astro-ph/9812133v2")
+
+
+if __name__ == '__main__':
+    unittest.main(verbosity=2)
--- a/tests/test_apis_data.json
+++ b/tests/test_apis_data.json
--- a/tests/test_endecoder.py
+++ b/tests/test_endecoder.py
@ -23,6 +23,11 @@ def compare_yaml_str(s1, s2):

 class TestEnDecode(unittest.TestCase):

+    def test_decode_emptystring(self):
+        decoder = endecoder.EnDecoder()
+        with self.assertRaises(decoder.BibDecodingError):
+            entry = decoder.decode_bibdata('')
+
    def test_encode_bibtex_is_unicode(self):
        decoder = endecoder.EnDecoder()
        entry = decoder.decode_bibdata(bibtex_raw0)
@ -52,6 +57,18 @@ class TestEnDecode(unittest.TestCase):

        self.assertEqual(bibraw1, bibraw2)

+    def test_endecode_bibtex_BOM(self):
+        """Test that bibtexparser if fine with BOM-prefixed data"""
+        decoder = endecoder.EnDecoder()
+        bom_str = '\ufeff'
+
+        entry_1  = decoder.decode_bibdata(bibtex_raw0)
+        bibraw_1 = decoder.encode_bibdata(entry_1)
+        entry_2  = decoder.decode_bibdata(bom_str + bibraw_1)
+        bibraw_2 = decoder.encode_bibdata(entry_2)
+
+        self.assertEqual(bibraw_1, bibraw_2)
+
    def test_endecode_bibtex_converts_month_string(self):
        """Test if `month=dec` is correctly recognized and transformed into
        `month={December}`"""
--- a/tests/test_events.py
+++ b/tests/test_events.py
@ -7,7 +7,7 @@ from pubs.events import Event
 _output = None


-class TestEvent(Event):
+class StringEvent(Event):
    def __init__(self, string):
        self.string = string

@ -34,20 +34,20 @@ class SpecificInfo(Info):
        self.specific = specific


-@TestEvent.listen(12, 15)
-def display(TestEventInstance, nb1, nb2):
+@StringEvent.listen(12, 15)
+def display(StringEventInstance, nb1, nb2):
    _output.append("%s %s %s"
-                    % (TestEventInstance.string, nb1, nb2))
+                    % (StringEventInstance.string, nb1, nb2))


-@TestEvent.listen()
-def hello_word(TestEventInstance):
+@StringEvent.listen()
+def hello_word(StringEventInstance):
    _output.append('Helloword')


-@TestEvent.listen()
-def print_it(TestEventInstance):
-    TestEventInstance.print_one()
+@StringEvent.listen()
+def print_it(StringEventInstance):
+    StringEventInstance.print_one()


@AddEvent.listen()
@ -56,7 +56,7 @@ def do_it(AddEventInstance):


@Info.listen()
-def test_info_instance(infoevent):
+def collect_info_instance(infoevent):
    _output.append(infoevent.info)
    if isinstance(infoevent, SpecificInfo):
        _output.append(infoevent.specific)
@ -68,9 +68,9 @@ class TestEvents(unittest.TestCase):
        global _output
        _output = []

-    def test_listen_TestEvent(self):
+    def test_listen_StringEvent(self):
        # using the callback system
-        myevent = TestEvent('abcdefghijklmnopqrstuvwxyz')
+        myevent = StringEvent('abcdefghijklmnopqrstuvwxyz')
        myevent.send()  # this one call three function
        correct = ['abcdefghijklmnopqrstuvwxyz 12 15',
                   'Helloword',
--- a/tests/test_usecase.py
+++ b/tests/test_usecase.py
@ -292,9 +292,66 @@ class TestAdd(URLContentTestCase):
                'pubs add data/pagerank.bib --link -d data/pagerank.pdf',
                ]
        self.execute_cmds(cmds)
-        self.assertEqual(os.listdir(
-                os.path.join(self.default_pubs_dir, 'doc')),
+        self.assertEqual(
+            os.listdir(os.path.join(self.default_pubs_dir, 'doc')),
            [])
+        self.assertTrue(os.path.exists('data/pagerank.pdf'))
+
+    def test_add_doc_nocopy_from_config_does_not_copy(self):
+        self.execute_cmds(['pubs init'])
+        config = conf.load_conf()
+        config['main']['doc_add'] = 'link'
+        conf.save_conf(config)
+        cmds = ['pubs add data/pagerank.bib -d data/pagerank.pdf']
+        self.execute_cmds(cmds)
+        self.assertEqual(
+            os.listdir(os.path.join(self.default_pubs_dir, 'doc')),
+            [])
+        self.assertTrue(os.path.exists('data/pagerank.pdf'))
+
+    def test_add_doc_copy(self):
+        cmds = ['pubs init',
+                'pubs add data/pagerank.bib --copy -d data/pagerank.pdf',
+                ]
+        self.execute_cmds(cmds)
+        self.assertEqual(
+            os.listdir(os.path.join(self.default_pubs_dir, 'doc')),
+            ['Page99.pdf'])
+        self.assertTrue(os.path.exists('data/pagerank.pdf'))
+
+    def test_add_doc_copy_from_config(self):
+        self.execute_cmds(['pubs init'])
+        config = conf.load_conf()
+        config['main']['doc_add'] = 'copy'
+        conf.save_conf(config)
+        cmds = ['pubs add data/pagerank.bib -d data/pagerank.pdf']
+        self.execute_cmds(cmds)
+        self.assertEqual(
+            os.listdir(os.path.join(self.default_pubs_dir, 'doc')),
+            ['Page99.pdf'])
+        self.assertTrue(os.path.exists('data/pagerank.pdf'))
+
+    def test_add_doc_move(self):
+        cmds = ['pubs init',
+                'pubs add data/pagerank.bib --move -d data/pagerank.pdf',
+                ]
+        self.execute_cmds(cmds)
+        self.assertEqual(
+            os.listdir(os.path.join(self.default_pubs_dir, 'doc')),
+            ['Page99.pdf'])
+        self.assertFalse(os.path.exists('data/pagerank.pdf'))
+
+    def test_add_doc_move_from_config(self):
+        self.execute_cmds(['pubs init'])
+        config = conf.load_conf()
+        config['main']['doc_add'] = 'move'
+        conf.save_conf(config)
+        cmds = ['pubs add data/pagerank.bib -d data/pagerank.pdf']
+        self.execute_cmds(cmds)
+        self.assertEqual(
+            os.listdir(os.path.join(self.default_pubs_dir, 'doc')),
+            ['Page99.pdf'])
+        self.assertFalse(os.path.exists('data/pagerank.pdf'))

    def test_add_move_removes_doc(self):
        cmds = ['pubs init',
@ -925,6 +982,21 @@ class TestUsecase(DataCommandTestCase):
        self.assertFalse(os.path.isfile(self.default_conf_path))
        self.assertTrue(os.path.isfile(alt_conf))

+    def test_statistics(self):
+        cmds = ['pubs init',
+                'pubs add data/pagerank.bib',
+                'pubs add -d data/turing-mind-1950.pdf data/turing1950.bib',
+                'pubs add data/martius.bib',
+                'pubs add data/10.1371%2Fjournal.pone.0038236.bib',
+                'pubs tag Page99 A+B',
+                'pubs tag turing1950computing C',
+                'pubs statistics',
+                ]
+        out = self.execute_cmds(cmds)
+        lines = out[-1].splitlines()
+        self.assertEqual(lines[0], 'Repository statistics:')
+        self.assertEqual(lines[1], 'Total papers: 4, 1 (25%) have a document attached')
+        self.assertEqual(lines[2], 'Total tags: 3, 2 (50%) of papers have at least one tag')


@ddt.ddt