Merge pull request #146 from joe-antognini/jma/arxiv

Arxiv support for pubs add.
main
Fabien C. Y. Benureau 7 years ago committed by GitHub
commit 20c5ed7446
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -1,9 +1,55 @@
"""Interface for Remote Bibliographic APIs"""
import requests
import bibtexparser
from bibtexparser.bibdatabase import BibDatabase
import feedparser
from bs4 import BeautifulSoup
class ReferenceNotFoundException(Exception):
pass
def get_bibentry_from_api(id_str, id_type, rp):
"""Return a bibtex string from various ID methods.
This is a wrapper around functions that will return a bibtex string given
one of:
* DOI
* IBSN
* arXiv ID
Args:
id_str: A string with the ID.
id_type: Name of the ID type. Must be one of `doi`, `isbn`, or `arxiv`.
rp: A `Repository` object.
Returns:
A bibtex string.
Raises:
ValueError: if `id_type` is not one of `doi`, `isbn`, or `arxiv`.
"""
id_fns = {
'doi': doi2bibtex,
'isbn': isbn2bibtex,
'arxiv': arxiv2bibtex,
}
if id_type not in id_fns.keys():
raise ValueError('id_type must be one of `doi`, `isbn`, or `arxiv`.')
bibentry_raw = id_fns[id_type](id_str)
bibentry = rp.databroker.verify(bibentry_raw)
if bibentry is None:
raise ReferenceNotFoundException(
'invalid {} {} or unable to retrieve bibfile from it.'.format(id_type, id_str))
return bibentry
def doi2bibtex(doi):
"""Return a bibtex string of metadata from a DOI"""
@ -25,3 +71,32 @@ def isbn2bibtex(isbn):
citation = soup.find("textarea").text
return citation
def arxiv2bibtex(arxiv_id):
"""Return a bibtex string of metadata from an arXiv ID"""
url = 'https://export.arxiv.org/api/query?id_list=' + arxiv_id
r = requests.get(url)
feed = feedparser.parse(r.text)
entry = feed.entries[0]
if 'title' not in entry:
raise ReferenceNotFoundException('arXiv ID not found.')
elif 'arxiv_doi' in entry:
bibtex = doi2bibtex(entry['arxiv_doi'])
else:
# Create a bibentry from the metadata.
db = BibDatabase()
author_str = ' and '.join(
[author['name'] for author in entry['authors']])
db.entries = [{
'ENTRYTYPE': 'article',
'ID': arxiv_id,
'author': author_str,
'title': entry['title'],
'year': str(entry['published_parsed'].tm_year),
'Eprint': arxiv_id,
}]
bibtex = bibtexparser.dumps(db)
return bibtex

@ -26,8 +26,10 @@ def parser(subparsers, conf):
parser = subparsers.add_parser('add', help='add a paper to the repository')
parser.add_argument('bibfile', nargs='?', default=None,
help='bibtex file')
parser.add_argument('-D', '--doi', help='doi number to retrieve the bibtex entry, if it is not provided', default=None, action=ValidateDOI)
parser.add_argument('-I', '--isbn', help='isbn number to retrieve the bibtex entry, if it is not provided', default=None)
id_arg = parser.add_mutually_exclusive_group()
id_arg.add_argument('-D', '--doi', help='doi number to retrieve the bibtex entry, if it is not provided', default=None, action=ValidateDOI)
id_arg.add_argument('-I', '--isbn', help='isbn number to retrieve the bibtex entry, if it is not provided', default=None)
id_arg.add_argument('-X', '--arxiv', help='arXiv ID to retrieve the bibtex entry, if it is not provided', default=None)
parser.add_argument('-d', '--docfile', help='pdf or ps file', default=None)
parser.add_argument('-t', '--tags', help='tags associated to the paper, separated by commas',
default=None
@ -85,24 +87,22 @@ def command(conf, args):
# get bibtex entry
if bibfile is None:
if args.doi is None and args.isbn is None:
if args.doi is None and args.isbn is None and args.arxiv is None:
bibentry = bibentry_from_editor(conf, ui, rp)
else:
bibentry = None
try:
if args.doi is not None:
bibentry_raw = apis.doi2bibtex(args.doi)
bibentry = rp.databroker.verify(bibentry_raw)
if bibentry is None:
ui.error('invalid doi {} or unable to retrieve bibfile from it.'.format(args.doi))
if args.isbn is None:
ui.exit(1)
if args.isbn is not None:
bibentry_raw = apis.isbn2bibtex(args.isbn)
bibentry = rp.databroker.verify(bibentry_raw)
if bibentry is None:
ui.error('invalid isbn {} or unable to retrieve bibfile from it.'.format(args.isbn))
ui.exit(1)
bibentry = apis.get_bibentry_from_api(args.doi, 'doi', rp)
elif args.isbn is not None:
bibentry = apis.get_bibentry_from_api(args.isbn, 'isbn', rp)
# TODO distinguish between cases, offer to open the error page in a webbrowser.
# TODO offer to confirm/change citekey
elif args.arxiv is not None:
bibentry = apis.get_bibentry_from_api(args.arxiv, 'arxiv', rp)
except apis.ReferenceNotFoundException as e:
ui.error(e.message)
ui.exit(1)
else:
bibentry_raw = content.get_content(bibfile, ui=ui)
bibentry = rp.databroker.verify(bibentry_raw)

@ -4,3 +4,4 @@ python-dateutil
requests
configobj
beautifulsoup4
feedparser

@ -7,7 +7,7 @@ import dotdot
from pubs.p3 import ustr
from pubs.endecoder import EnDecoder
from pubs.apis import doi2bibtex, isbn2bibtex
from pubs.apis import arxiv2bibtex, doi2bibtex, isbn2bibtex
class TestDOI2Bibtex(unittest.TestCase):
@ -60,5 +60,30 @@ class TestISBN2Bibtex(unittest.TestCase):
self.endecoder.decode_bibdata(bib)
class TestArxiv2Bibtex(unittest.TestCase):
def setUp(self):
self.endecoder = EnDecoder()
def test_parses_to_bibtex_with_doi(self):
bib = arxiv2bibtex('astro-ph/9812133')
b = self.endecoder.decode_bibdata(bib)
self.assertEqual(len(b), 1)
entry = b[list(b)[0]]
self.assertEqual(entry['author'][0], 'Perlmutter, S.')
self.assertEqual(entry['year'], '1999')
def test_parses_to_bibtex_without_doi(self):
bib = arxiv2bibtex('math/0211159')
b = self.endecoder.decode_bibdata(bib)
self.assertEqual(len(b), 1)
entry = b[list(b)[0]]
self.assertEqual(entry['author'][0], 'Perelman, Grisha')
self.assertEqual(entry['year'], '2002')
self.assertEqual(
entry['title'],
'The entropy formula for the Ricci flow and its geometric applications')
# Note: apparently ottobib.com uses caracter modifiers for accents instead
# of the correct unicode characters. TODO: Should we convert them?

Loading…
Cancel
Save