Allow bibtex to be added from arxiv id.

This allows the user to add a reference via an arXiv ID similarly to how a reference can be added from a DOI or ISBN. If the arXiv ID has a DOI associated with it (according to the arXiv server), the DOI will be used. If it does not (perhaps the paper is unpublished), then a bibtex entry will automatically be generated from the reference's metadata. Note that a potential issue with this addition is that if a paper is added before it is published (i.e., there is no DOI associated with it), and the paper is later published, the updated information will have to be manually added.
2018-07-23 18:23:36 -06:00 · 2018-07-23 18:23:36 -06:00 · f3c83668f9
commit f3c83668f9
parent c513870132
4 changed files with 80 additions and 4 deletions
--- a/pubs/apis.py
+++ b/pubs/apis.py
@ -1,7 +1,9 @@
 """Interface for Remote Bibliographic APIs"""

 import requests
+import feedparser
 from bs4 import BeautifulSoup
+from uis import get_ui


 def doi2bibtex(doi):
@ -25,3 +27,32 @@ def isbn2bibtex(isbn):
    citation = soup.find("textarea").text

    return citation
+
+
+def arxiv2bibtex(arxiv_id):
+    """Return a bibtex string of metadata from an arXiv ID"""
+
+    url = 'https://export.arxiv.org/api/query?id_list=' + arxiv_id
+    r = requests.get(url)
+    feed = feedparser.parse(r.text)
+    entry = feed.entries[0]
+
+    if 'title' not in entry:
+        ui = get_ui()
+        ui.error('malformed arXiv ID: {}'.format(arxiv_id))
+    if 'arxiv_doi' in entry:
+        return doi2bibtex(entry['arxiv_doi'])
+    else:
+        # Create a bibentry from the metadata.
+        bibtext = '@misc{{{},\n'.format(arxiv_id)
+        bibtext += 'Author = {'
+        for i, author in enumerate(entry['authors']):
+            bibtext += author['name']
+            if i < len(entry['authors']) - 1:
+                bibtext += ' and '
+        bibtext += '},\n'
+        bibtext += 'Title = {{{}}},\n'.format(entry['title'].strip('\n'))
+        bibtext += 'Year = {{{}}},\n'.format(entry['published_parsed'].tm_year)
+        bibtext += 'Eprint = {{arXiv:{}}},\n'.format(arxiv_id)
+        bibtext += '}'
+        return bibtext
--- a/pubs/commands/add_cmd.py
+++ b/pubs/commands/add_cmd.py
@ -27,6 +27,7 @@ def parser(subparsers, conf):
                        help='bibtex file')
    parser.add_argument('-D', '--doi', help='doi number to retrieve the bibtex entry, if it is not provided', default=None, action=ValidateDOI)
    parser.add_argument('-I', '--isbn', help='isbn number to retrieve the bibtex entry, if it is not provided', default=None)
+    parser.add_argument('-X', '--arxiv', help='arXiv ID to retrieve the bibtex entry, if it is not provided', default=None)
    parser.add_argument('-d', '--docfile', help='pdf or ps file', default=None)
    parser.add_argument('-t', '--tags', help='tags associated to the paper, separated by commas',
                        default=None
@ -67,6 +68,19 @@ def bibentry_from_editor(conf, ui, rp):
    return bibentry


+def api_call(fn, arg):
+    """Calls the appropriate API command.
+
+    :param fn: The API function to call.
+    :param arg: The argument to give the API call.
+    """
+    bibentry_raw = fn(arg)
+    bibentry = rp.databroker.verify(bibentry_raw)
+    return bibentry
+    if bibentry is None:
+        ui.error('invalid doi {} or unable to retrieve bibfile from it.'.format(args.doi))
+
+
 def command(conf, args):
    """
    :param bibfile: bibtex file (in .bib, .bibml or .yaml format.
@ -86,21 +100,26 @@ def command(conf, args):
        if args.doi is None and args.isbn is None:
            bibentry = bibentry_from_editor(conf, ui, rp)
        else:
+            bibentry = None
            if args.doi is not None:
                bibentry_raw = apis.doi2bibtex(args.doi)
                bibentry = rp.databroker.verify(bibentry_raw)
                if bibentry is None:
                    ui.error('invalid doi {} or unable to retrieve bibfile from it.'.format(args.doi))
-                    if args.isbn is None:
-                        ui.exit(1)
            if args.isbn is not None:
                bibentry_raw = apis.isbn2bibtex(args.isbn)
                bibentry = rp.databroker.verify(bibentry_raw)
                if bibentry is None:
                    ui.error('invalid isbn {} or unable to retrieve bibfile from it.'.format(args.isbn))
-                    ui.exit(1)
                # TODO distinguish between cases, offer to open the error page in a webbrowser.
                # TODO offer to confirm/change citekey
+            if args.arxiv is not None:
+                bibentry_raw = apis.arxiv2bibtex(args.arxiv)
+                bibentry = rp.databroker.verify(bibentry_raw)
+                if bibentry is None:
+                    ui.error('invalid arxiv id {} or unable to retrieve bibfile from it.'.format(args.arxiv_id))
+            if bibentry is None:
+                ui.exit(1)
    else:
        bibentry_raw = content.get_content(bibfile, ui=ui)
        bibentry = rp.databroker.verify(bibentry_raw)
--- a/requirements.txt
+++ b/requirements.txt
@ -4,3 +4,4 @@ python-dateutil
 requests
 configobj
 beautifulsoup4
+feedparser
--- a/tests/test_apis.py
+++ b/tests/test_apis.py
@ -7,7 +7,7 @@ import dotdot

 from pubs.p3 import ustr
 from pubs.endecoder import EnDecoder
-from pubs.apis import doi2bibtex, isbn2bibtex
+from pubs.apis import arxiv2bibtex, doi2bibtex, isbn2bibtex


 class TestDOI2Bibtex(unittest.TestCase):
@ -60,5 +60,30 @@ class TestISBN2Bibtex(unittest.TestCase):
            self.endecoder.decode_bibdata(bib)


+class TestArxiv2Bibtex(unittest.TestCase):
+
+    def setUp(self):
+        self.endecoder = EnDecoder()
+
+    def test_parses_to_bibtex_with_doi(self):
+        bib = arxiv2bibtex('astro-ph/9812133')
+        b = self.endecoder.decode_bibdata(bib)
+        self.assertEqual(len(b), 1)
+        entry = b[list(b)[0]]
+        self.assertEqual(entry['author'][0], 'Perlmutter, S.')
+        self.assertEqual(entry['year'], '1999')
+
+    def test_parses_to_bibtex_without_doi(self):
+        bib = arxiv2bibtex('math/0211159')
+        b = self.endecoder.decode_bibdata(bib)
+        self.assertEqual(len(b), 1)
+        entry = b[list(b)[0]]
+        self.assertEqual(entry['author'][0], 'Perelman, Grisha')
+        self.assertEqual(entry['year'], '2002')
+        self.assertEqual(
+                entry['title'],
+                'The entropy formula for the Ricci flow and its geometric applications')
+
+
 # Note: apparently ottobib.com uses caracter modifiers for accents instead
 # of the correct unicode characters. TODO: Should we convert them?