Merge pull request #146 from joe-antognini/jma/arxiv
Arxiv support for pubs add.
This commit is contained in:
commit
20c5ed7446
75
pubs/apis.py
75
pubs/apis.py
@ -1,9 +1,55 @@
|
||||
"""Interface for Remote Bibliographic APIs"""
|
||||
|
||||
import requests
|
||||
import bibtexparser
|
||||
from bibtexparser.bibdatabase import BibDatabase
|
||||
import feedparser
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
|
||||
class ReferenceNotFoundException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
def get_bibentry_from_api(id_str, id_type, rp):
|
||||
"""Return a bibtex string from various ID methods.
|
||||
|
||||
This is a wrapper around functions that will return a bibtex string given
|
||||
one of:
|
||||
|
||||
* DOI
|
||||
* IBSN
|
||||
* arXiv ID
|
||||
|
||||
Args:
|
||||
id_str: A string with the ID.
|
||||
id_type: Name of the ID type. Must be one of `doi`, `isbn`, or `arxiv`.
|
||||
rp: A `Repository` object.
|
||||
|
||||
Returns:
|
||||
A bibtex string.
|
||||
|
||||
Raises:
|
||||
ValueError: if `id_type` is not one of `doi`, `isbn`, or `arxiv`.
|
||||
"""
|
||||
|
||||
id_fns = {
|
||||
'doi': doi2bibtex,
|
||||
'isbn': isbn2bibtex,
|
||||
'arxiv': arxiv2bibtex,
|
||||
}
|
||||
|
||||
if id_type not in id_fns.keys():
|
||||
raise ValueError('id_type must be one of `doi`, `isbn`, or `arxiv`.')
|
||||
|
||||
bibentry_raw = id_fns[id_type](id_str)
|
||||
bibentry = rp.databroker.verify(bibentry_raw)
|
||||
if bibentry is None:
|
||||
raise ReferenceNotFoundException(
|
||||
'invalid {} {} or unable to retrieve bibfile from it.'.format(id_type, id_str))
|
||||
return bibentry
|
||||
|
||||
|
||||
def doi2bibtex(doi):
|
||||
"""Return a bibtex string of metadata from a DOI"""
|
||||
|
||||
@ -25,3 +71,32 @@ def isbn2bibtex(isbn):
|
||||
citation = soup.find("textarea").text
|
||||
|
||||
return citation
|
||||
|
||||
|
||||
def arxiv2bibtex(arxiv_id):
|
||||
"""Return a bibtex string of metadata from an arXiv ID"""
|
||||
|
||||
url = 'https://export.arxiv.org/api/query?id_list=' + arxiv_id
|
||||
r = requests.get(url)
|
||||
feed = feedparser.parse(r.text)
|
||||
entry = feed.entries[0]
|
||||
|
||||
if 'title' not in entry:
|
||||
raise ReferenceNotFoundException('arXiv ID not found.')
|
||||
elif 'arxiv_doi' in entry:
|
||||
bibtex = doi2bibtex(entry['arxiv_doi'])
|
||||
else:
|
||||
# Create a bibentry from the metadata.
|
||||
db = BibDatabase()
|
||||
author_str = ' and '.join(
|
||||
[author['name'] for author in entry['authors']])
|
||||
db.entries = [{
|
||||
'ENTRYTYPE': 'article',
|
||||
'ID': arxiv_id,
|
||||
'author': author_str,
|
||||
'title': entry['title'],
|
||||
'year': str(entry['published_parsed'].tm_year),
|
||||
'Eprint': arxiv_id,
|
||||
}]
|
||||
bibtex = bibtexparser.dumps(db)
|
||||
return bibtex
|
||||
|
@ -26,8 +26,10 @@ def parser(subparsers, conf):
|
||||
parser = subparsers.add_parser('add', help='add a paper to the repository')
|
||||
parser.add_argument('bibfile', nargs='?', default=None,
|
||||
help='bibtex file')
|
||||
parser.add_argument('-D', '--doi', help='doi number to retrieve the bibtex entry, if it is not provided', default=None, action=ValidateDOI)
|
||||
parser.add_argument('-I', '--isbn', help='isbn number to retrieve the bibtex entry, if it is not provided', default=None)
|
||||
id_arg = parser.add_mutually_exclusive_group()
|
||||
id_arg.add_argument('-D', '--doi', help='doi number to retrieve the bibtex entry, if it is not provided', default=None, action=ValidateDOI)
|
||||
id_arg.add_argument('-I', '--isbn', help='isbn number to retrieve the bibtex entry, if it is not provided', default=None)
|
||||
id_arg.add_argument('-X', '--arxiv', help='arXiv ID to retrieve the bibtex entry, if it is not provided', default=None)
|
||||
parser.add_argument('-d', '--docfile', help='pdf or ps file', default=None)
|
||||
parser.add_argument('-t', '--tags', help='tags associated to the paper, separated by commas',
|
||||
default=None
|
||||
@ -85,24 +87,22 @@ def command(conf, args):
|
||||
|
||||
# get bibtex entry
|
||||
if bibfile is None:
|
||||
if args.doi is None and args.isbn is None:
|
||||
if args.doi is None and args.isbn is None and args.arxiv is None:
|
||||
bibentry = bibentry_from_editor(conf, ui, rp)
|
||||
else:
|
||||
if args.doi is not None:
|
||||
bibentry_raw = apis.doi2bibtex(args.doi)
|
||||
bibentry = rp.databroker.verify(bibentry_raw)
|
||||
if bibentry is None:
|
||||
ui.error('invalid doi {} or unable to retrieve bibfile from it.'.format(args.doi))
|
||||
if args.isbn is None:
|
||||
ui.exit(1)
|
||||
if args.isbn is not None:
|
||||
bibentry_raw = apis.isbn2bibtex(args.isbn)
|
||||
bibentry = rp.databroker.verify(bibentry_raw)
|
||||
if bibentry is None:
|
||||
ui.error('invalid isbn {} or unable to retrieve bibfile from it.'.format(args.isbn))
|
||||
ui.exit(1)
|
||||
# TODO distinguish between cases, offer to open the error page in a webbrowser.
|
||||
# TODO offer to confirm/change citekey
|
||||
bibentry = None
|
||||
try:
|
||||
if args.doi is not None:
|
||||
bibentry = apis.get_bibentry_from_api(args.doi, 'doi', rp)
|
||||
elif args.isbn is not None:
|
||||
bibentry = apis.get_bibentry_from_api(args.isbn, 'isbn', rp)
|
||||
# TODO distinguish between cases, offer to open the error page in a webbrowser.
|
||||
# TODO offer to confirm/change citekey
|
||||
elif args.arxiv is not None:
|
||||
bibentry = apis.get_bibentry_from_api(args.arxiv, 'arxiv', rp)
|
||||
except apis.ReferenceNotFoundException as e:
|
||||
ui.error(e.message)
|
||||
ui.exit(1)
|
||||
else:
|
||||
bibentry_raw = content.get_content(bibfile, ui=ui)
|
||||
bibentry = rp.databroker.verify(bibentry_raw)
|
||||
|
@ -4,3 +4,4 @@ python-dateutil
|
||||
requests
|
||||
configobj
|
||||
beautifulsoup4
|
||||
feedparser
|
||||
|
@ -7,7 +7,7 @@ import dotdot
|
||||
|
||||
from pubs.p3 import ustr
|
||||
from pubs.endecoder import EnDecoder
|
||||
from pubs.apis import doi2bibtex, isbn2bibtex
|
||||
from pubs.apis import arxiv2bibtex, doi2bibtex, isbn2bibtex
|
||||
|
||||
|
||||
class TestDOI2Bibtex(unittest.TestCase):
|
||||
@ -60,5 +60,30 @@ class TestISBN2Bibtex(unittest.TestCase):
|
||||
self.endecoder.decode_bibdata(bib)
|
||||
|
||||
|
||||
class TestArxiv2Bibtex(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
self.endecoder = EnDecoder()
|
||||
|
||||
def test_parses_to_bibtex_with_doi(self):
|
||||
bib = arxiv2bibtex('astro-ph/9812133')
|
||||
b = self.endecoder.decode_bibdata(bib)
|
||||
self.assertEqual(len(b), 1)
|
||||
entry = b[list(b)[0]]
|
||||
self.assertEqual(entry['author'][0], 'Perlmutter, S.')
|
||||
self.assertEqual(entry['year'], '1999')
|
||||
|
||||
def test_parses_to_bibtex_without_doi(self):
|
||||
bib = arxiv2bibtex('math/0211159')
|
||||
b = self.endecoder.decode_bibdata(bib)
|
||||
self.assertEqual(len(b), 1)
|
||||
entry = b[list(b)[0]]
|
||||
self.assertEqual(entry['author'][0], 'Perelman, Grisha')
|
||||
self.assertEqual(entry['year'], '2002')
|
||||
self.assertEqual(
|
||||
entry['title'],
|
||||
'The entropy formula for the Ricci flow and its geometric applications')
|
||||
|
||||
|
||||
# Note: apparently ottobib.com uses caracter modifiers for accents instead
|
||||
# of the correct unicode characters. TODO: Should we convert them?
|
||||
|
Loading…
x
Reference in New Issue
Block a user