Allow bibtex to be added from arxiv id.

This allows the user to add a reference via an arXiv ID similarly to how a reference can be added
from a DOI or ISBN.  If the arXiv ID has a DOI associated with it (according to the arXiv server),
the DOI will be used.  If it does not (perhaps the paper is unpublished), then a bibtex entry will
automatically be generated from the reference's metadata.

Note that a potential issue with this addition is that if a paper is added before it is published
(i.e., there is no DOI associated with it), and the paper is later published, the updated
information will have to be manually added.
main
Joe Antognini 7 years ago
parent c513870132
commit f3c83668f9

@ -1,7 +1,9 @@
"""Interface for Remote Bibliographic APIs"""
import requests
import feedparser
from bs4 import BeautifulSoup
from uis import get_ui
def doi2bibtex(doi):
@ -25,3 +27,32 @@ def isbn2bibtex(isbn):
citation = soup.find("textarea").text
return citation
def arxiv2bibtex(arxiv_id):
"""Return a bibtex string of metadata from an arXiv ID"""
url = 'https://export.arxiv.org/api/query?id_list=' + arxiv_id
r = requests.get(url)
feed = feedparser.parse(r.text)
entry = feed.entries[0]
if 'title' not in entry:
ui = get_ui()
ui.error('malformed arXiv ID: {}'.format(arxiv_id))
if 'arxiv_doi' in entry:
return doi2bibtex(entry['arxiv_doi'])
else:
# Create a bibentry from the metadata.
bibtext = '@misc{{{},\n'.format(arxiv_id)
bibtext += 'Author = {'
for i, author in enumerate(entry['authors']):
bibtext += author['name']
if i < len(entry['authors']) - 1:
bibtext += ' and '
bibtext += '},\n'
bibtext += 'Title = {{{}}},\n'.format(entry['title'].strip('\n'))
bibtext += 'Year = {{{}}},\n'.format(entry['published_parsed'].tm_year)
bibtext += 'Eprint = {{arXiv:{}}},\n'.format(arxiv_id)
bibtext += '}'
return bibtext

@ -27,6 +27,7 @@ def parser(subparsers, conf):
help='bibtex file')
parser.add_argument('-D', '--doi', help='doi number to retrieve the bibtex entry, if it is not provided', default=None, action=ValidateDOI)
parser.add_argument('-I', '--isbn', help='isbn number to retrieve the bibtex entry, if it is not provided', default=None)
parser.add_argument('-X', '--arxiv', help='arXiv ID to retrieve the bibtex entry, if it is not provided', default=None)
parser.add_argument('-d', '--docfile', help='pdf or ps file', default=None)
parser.add_argument('-t', '--tags', help='tags associated to the paper, separated by commas',
default=None
@ -67,6 +68,19 @@ def bibentry_from_editor(conf, ui, rp):
return bibentry
def api_call(fn, arg):
"""Calls the appropriate API command.
:param fn: The API function to call.
:param arg: The argument to give the API call.
"""
bibentry_raw = fn(arg)
bibentry = rp.databroker.verify(bibentry_raw)
return bibentry
if bibentry is None:
ui.error('invalid doi {} or unable to retrieve bibfile from it.'.format(args.doi))
def command(conf, args):
"""
:param bibfile: bibtex file (in .bib, .bibml or .yaml format.
@ -86,21 +100,26 @@ def command(conf, args):
if args.doi is None and args.isbn is None:
bibentry = bibentry_from_editor(conf, ui, rp)
else:
bibentry = None
if args.doi is not None:
bibentry_raw = apis.doi2bibtex(args.doi)
bibentry = rp.databroker.verify(bibentry_raw)
if bibentry is None:
ui.error('invalid doi {} or unable to retrieve bibfile from it.'.format(args.doi))
if args.isbn is None:
ui.exit(1)
if args.isbn is not None:
bibentry_raw = apis.isbn2bibtex(args.isbn)
bibentry = rp.databroker.verify(bibentry_raw)
if bibentry is None:
ui.error('invalid isbn {} or unable to retrieve bibfile from it.'.format(args.isbn))
ui.exit(1)
# TODO distinguish between cases, offer to open the error page in a webbrowser.
# TODO offer to confirm/change citekey
if args.arxiv is not None:
bibentry_raw = apis.arxiv2bibtex(args.arxiv)
bibentry = rp.databroker.verify(bibentry_raw)
if bibentry is None:
ui.error('invalid arxiv id {} or unable to retrieve bibfile from it.'.format(args.arxiv_id))
if bibentry is None:
ui.exit(1)
else:
bibentry_raw = content.get_content(bibfile, ui=ui)
bibentry = rp.databroker.verify(bibentry_raw)

@ -4,3 +4,4 @@ python-dateutil
requests
configobj
beautifulsoup4
feedparser

@ -7,7 +7,7 @@ import dotdot
from pubs.p3 import ustr
from pubs.endecoder import EnDecoder
from pubs.apis import doi2bibtex, isbn2bibtex
from pubs.apis import arxiv2bibtex, doi2bibtex, isbn2bibtex
class TestDOI2Bibtex(unittest.TestCase):
@ -60,5 +60,30 @@ class TestISBN2Bibtex(unittest.TestCase):
self.endecoder.decode_bibdata(bib)
class TestArxiv2Bibtex(unittest.TestCase):
def setUp(self):
self.endecoder = EnDecoder()
def test_parses_to_bibtex_with_doi(self):
bib = arxiv2bibtex('astro-ph/9812133')
b = self.endecoder.decode_bibdata(bib)
self.assertEqual(len(b), 1)
entry = b[list(b)[0]]
self.assertEqual(entry['author'][0], 'Perlmutter, S.')
self.assertEqual(entry['year'], '1999')
def test_parses_to_bibtex_without_doi(self):
bib = arxiv2bibtex('math/0211159')
b = self.endecoder.decode_bibdata(bib)
self.assertEqual(len(b), 1)
entry = b[list(b)[0]]
self.assertEqual(entry['author'][0], 'Perelman, Grisha')
self.assertEqual(entry['year'], '2002')
self.assertEqual(
entry['title'],
'The entropy formula for the Ricci flow and its geometric applications')
# Note: apparently ottobib.com uses caracter modifiers for accents instead
# of the correct unicode characters. TODO: Should we convert them?

Loading…
Cancel
Save