Merge pull request #146 from joe-antognini/jma/arxiv

Arxiv support for pubs add.
main
Fabien C. Y. Benureau 7 years ago committed by GitHub
commit 20c5ed7446
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -1,9 +1,55 @@
"""Interface for Remote Bibliographic APIs""" """Interface for Remote Bibliographic APIs"""
import requests import requests
import bibtexparser
from bibtexparser.bibdatabase import BibDatabase
import feedparser
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
class ReferenceNotFoundException(Exception):
pass
def get_bibentry_from_api(id_str, id_type, rp):
"""Return a bibtex string from various ID methods.
This is a wrapper around functions that will return a bibtex string given
one of:
* DOI
* IBSN
* arXiv ID
Args:
id_str: A string with the ID.
id_type: Name of the ID type. Must be one of `doi`, `isbn`, or `arxiv`.
rp: A `Repository` object.
Returns:
A bibtex string.
Raises:
ValueError: if `id_type` is not one of `doi`, `isbn`, or `arxiv`.
"""
id_fns = {
'doi': doi2bibtex,
'isbn': isbn2bibtex,
'arxiv': arxiv2bibtex,
}
if id_type not in id_fns.keys():
raise ValueError('id_type must be one of `doi`, `isbn`, or `arxiv`.')
bibentry_raw = id_fns[id_type](id_str)
bibentry = rp.databroker.verify(bibentry_raw)
if bibentry is None:
raise ReferenceNotFoundException(
'invalid {} {} or unable to retrieve bibfile from it.'.format(id_type, id_str))
return bibentry
def doi2bibtex(doi): def doi2bibtex(doi):
"""Return a bibtex string of metadata from a DOI""" """Return a bibtex string of metadata from a DOI"""
@ -25,3 +71,32 @@ def isbn2bibtex(isbn):
citation = soup.find("textarea").text citation = soup.find("textarea").text
return citation return citation
def arxiv2bibtex(arxiv_id):
"""Return a bibtex string of metadata from an arXiv ID"""
url = 'https://export.arxiv.org/api/query?id_list=' + arxiv_id
r = requests.get(url)
feed = feedparser.parse(r.text)
entry = feed.entries[0]
if 'title' not in entry:
raise ReferenceNotFoundException('arXiv ID not found.')
elif 'arxiv_doi' in entry:
bibtex = doi2bibtex(entry['arxiv_doi'])
else:
# Create a bibentry from the metadata.
db = BibDatabase()
author_str = ' and '.join(
[author['name'] for author in entry['authors']])
db.entries = [{
'ENTRYTYPE': 'article',
'ID': arxiv_id,
'author': author_str,
'title': entry['title'],
'year': str(entry['published_parsed'].tm_year),
'Eprint': arxiv_id,
}]
bibtex = bibtexparser.dumps(db)
return bibtex

@ -26,8 +26,10 @@ def parser(subparsers, conf):
parser = subparsers.add_parser('add', help='add a paper to the repository') parser = subparsers.add_parser('add', help='add a paper to the repository')
parser.add_argument('bibfile', nargs='?', default=None, parser.add_argument('bibfile', nargs='?', default=None,
help='bibtex file') help='bibtex file')
parser.add_argument('-D', '--doi', help='doi number to retrieve the bibtex entry, if it is not provided', default=None, action=ValidateDOI) id_arg = parser.add_mutually_exclusive_group()
parser.add_argument('-I', '--isbn', help='isbn number to retrieve the bibtex entry, if it is not provided', default=None) id_arg.add_argument('-D', '--doi', help='doi number to retrieve the bibtex entry, if it is not provided', default=None, action=ValidateDOI)
id_arg.add_argument('-I', '--isbn', help='isbn number to retrieve the bibtex entry, if it is not provided', default=None)
id_arg.add_argument('-X', '--arxiv', help='arXiv ID to retrieve the bibtex entry, if it is not provided', default=None)
parser.add_argument('-d', '--docfile', help='pdf or ps file', default=None) parser.add_argument('-d', '--docfile', help='pdf or ps file', default=None)
parser.add_argument('-t', '--tags', help='tags associated to the paper, separated by commas', parser.add_argument('-t', '--tags', help='tags associated to the paper, separated by commas',
default=None default=None
@ -85,24 +87,22 @@ def command(conf, args):
# get bibtex entry # get bibtex entry
if bibfile is None: if bibfile is None:
if args.doi is None and args.isbn is None: if args.doi is None and args.isbn is None and args.arxiv is None:
bibentry = bibentry_from_editor(conf, ui, rp) bibentry = bibentry_from_editor(conf, ui, rp)
else: else:
if args.doi is not None: bibentry = None
bibentry_raw = apis.doi2bibtex(args.doi) try:
bibentry = rp.databroker.verify(bibentry_raw) if args.doi is not None:
if bibentry is None: bibentry = apis.get_bibentry_from_api(args.doi, 'doi', rp)
ui.error('invalid doi {} or unable to retrieve bibfile from it.'.format(args.doi)) elif args.isbn is not None:
if args.isbn is None: bibentry = apis.get_bibentry_from_api(args.isbn, 'isbn', rp)
ui.exit(1) # TODO distinguish between cases, offer to open the error page in a webbrowser.
if args.isbn is not None: # TODO offer to confirm/change citekey
bibentry_raw = apis.isbn2bibtex(args.isbn) elif args.arxiv is not None:
bibentry = rp.databroker.verify(bibentry_raw) bibentry = apis.get_bibentry_from_api(args.arxiv, 'arxiv', rp)
if bibentry is None: except apis.ReferenceNotFoundException as e:
ui.error('invalid isbn {} or unable to retrieve bibfile from it.'.format(args.isbn)) ui.error(e.message)
ui.exit(1) ui.exit(1)
# TODO distinguish between cases, offer to open the error page in a webbrowser.
# TODO offer to confirm/change citekey
else: else:
bibentry_raw = content.get_content(bibfile, ui=ui) bibentry_raw = content.get_content(bibfile, ui=ui)
bibentry = rp.databroker.verify(bibentry_raw) bibentry = rp.databroker.verify(bibentry_raw)

@ -4,3 +4,4 @@ python-dateutil
requests requests
configobj configobj
beautifulsoup4 beautifulsoup4
feedparser

@ -7,7 +7,7 @@ import dotdot
from pubs.p3 import ustr from pubs.p3 import ustr
from pubs.endecoder import EnDecoder from pubs.endecoder import EnDecoder
from pubs.apis import doi2bibtex, isbn2bibtex from pubs.apis import arxiv2bibtex, doi2bibtex, isbn2bibtex
class TestDOI2Bibtex(unittest.TestCase): class TestDOI2Bibtex(unittest.TestCase):
@ -60,5 +60,30 @@ class TestISBN2Bibtex(unittest.TestCase):
self.endecoder.decode_bibdata(bib) self.endecoder.decode_bibdata(bib)
class TestArxiv2Bibtex(unittest.TestCase):
def setUp(self):
self.endecoder = EnDecoder()
def test_parses_to_bibtex_with_doi(self):
bib = arxiv2bibtex('astro-ph/9812133')
b = self.endecoder.decode_bibdata(bib)
self.assertEqual(len(b), 1)
entry = b[list(b)[0]]
self.assertEqual(entry['author'][0], 'Perlmutter, S.')
self.assertEqual(entry['year'], '1999')
def test_parses_to_bibtex_without_doi(self):
bib = arxiv2bibtex('math/0211159')
b = self.endecoder.decode_bibdata(bib)
self.assertEqual(len(b), 1)
entry = b[list(b)[0]]
self.assertEqual(entry['author'][0], 'Perelman, Grisha')
self.assertEqual(entry['year'], '2002')
self.assertEqual(
entry['title'],
'The entropy formula for the Ricci flow and its geometric applications')
# Note: apparently ottobib.com uses caracter modifiers for accents instead # Note: apparently ottobib.com uses caracter modifiers for accents instead
# of the correct unicode characters. TODO: Should we convert them? # of the correct unicode characters. TODO: Should we convert them?

Loading…
Cancel
Save