diff --git a/pubs/apis.py b/pubs/apis.py index 0812ba5..a627cf5 100644 --- a/pubs/apis.py +++ b/pubs/apis.py @@ -1,7 +1,9 @@ """Interface for Remote Bibliographic APIs""" import requests +import feedparser from bs4 import BeautifulSoup +from uis import get_ui def doi2bibtex(doi): @@ -25,3 +27,32 @@ def isbn2bibtex(isbn): citation = soup.find("textarea").text return citation + + +def arxiv2bibtex(arxiv_id): + """Return a bibtex string of metadata from an arXiv ID""" + + url = 'https://export.arxiv.org/api/query?id_list=' + arxiv_id + r = requests.get(url) + feed = feedparser.parse(r.text) + entry = feed.entries[0] + + if 'title' not in entry: + ui = get_ui() + ui.error('malformed arXiv ID: {}'.format(arxiv_id)) + if 'arxiv_doi' in entry: + return doi2bibtex(entry['arxiv_doi']) + else: + # Create a bibentry from the metadata. + bibtext = '@misc{{{},\n'.format(arxiv_id) + bibtext += 'Author = {' + for i, author in enumerate(entry['authors']): + bibtext += author['name'] + if i < len(entry['authors']) - 1: + bibtext += ' and ' + bibtext += '},\n' + bibtext += 'Title = {{{}}},\n'.format(entry['title'].strip('\n')) + bibtext += 'Year = {{{}}},\n'.format(entry['published_parsed'].tm_year) + bibtext += 'Eprint = {{arXiv:{}}},\n'.format(arxiv_id) + bibtext += '}' + return bibtext diff --git a/pubs/commands/add_cmd.py b/pubs/commands/add_cmd.py index a22c0a5..d4031f6 100644 --- a/pubs/commands/add_cmd.py +++ b/pubs/commands/add_cmd.py @@ -27,6 +27,7 @@ def parser(subparsers, conf): help='bibtex file') parser.add_argument('-D', '--doi', help='doi number to retrieve the bibtex entry, if it is not provided', default=None, action=ValidateDOI) parser.add_argument('-I', '--isbn', help='isbn number to retrieve the bibtex entry, if it is not provided', default=None) + parser.add_argument('-X', '--arxiv', help='arXiv ID to retrieve the bibtex entry, if it is not provided', default=None) parser.add_argument('-d', '--docfile', help='pdf or ps file', default=None) parser.add_argument('-t', '--tags', help='tags associated to the paper, separated by commas', default=None @@ -67,6 +68,19 @@ def bibentry_from_editor(conf, ui, rp): return bibentry +def api_call(fn, arg): + """Calls the appropriate API command. + + :param fn: The API function to call. + :param arg: The argument to give the API call. + """ + bibentry_raw = fn(arg) + bibentry = rp.databroker.verify(bibentry_raw) + return bibentry + if bibentry is None: + ui.error('invalid doi {} or unable to retrieve bibfile from it.'.format(args.doi)) + + def command(conf, args): """ :param bibfile: bibtex file (in .bib, .bibml or .yaml format. @@ -86,21 +100,26 @@ def command(conf, args): if args.doi is None and args.isbn is None: bibentry = bibentry_from_editor(conf, ui, rp) else: + bibentry = None if args.doi is not None: bibentry_raw = apis.doi2bibtex(args.doi) bibentry = rp.databroker.verify(bibentry_raw) if bibentry is None: ui.error('invalid doi {} or unable to retrieve bibfile from it.'.format(args.doi)) - if args.isbn is None: - ui.exit(1) if args.isbn is not None: bibentry_raw = apis.isbn2bibtex(args.isbn) bibentry = rp.databroker.verify(bibentry_raw) if bibentry is None: ui.error('invalid isbn {} or unable to retrieve bibfile from it.'.format(args.isbn)) - ui.exit(1) # TODO distinguish between cases, offer to open the error page in a webbrowser. # TODO offer to confirm/change citekey + if args.arxiv is not None: + bibentry_raw = apis.arxiv2bibtex(args.arxiv) + bibentry = rp.databroker.verify(bibentry_raw) + if bibentry is None: + ui.error('invalid arxiv id {} or unable to retrieve bibfile from it.'.format(args.arxiv_id)) + if bibentry is None: + ui.exit(1) else: bibentry_raw = content.get_content(bibfile, ui=ui) bibentry = rp.databroker.verify(bibentry_raw) diff --git a/requirements.txt b/requirements.txt index 4e10580..da1abec 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,3 +4,4 @@ python-dateutil requests configobj beautifulsoup4 +feedparser diff --git a/tests/test_apis.py b/tests/test_apis.py index 087f32f..8c2017c 100644 --- a/tests/test_apis.py +++ b/tests/test_apis.py @@ -7,7 +7,7 @@ import dotdot from pubs.p3 import ustr from pubs.endecoder import EnDecoder -from pubs.apis import doi2bibtex, isbn2bibtex +from pubs.apis import arxiv2bibtex, doi2bibtex, isbn2bibtex class TestDOI2Bibtex(unittest.TestCase): @@ -60,5 +60,30 @@ class TestISBN2Bibtex(unittest.TestCase): self.endecoder.decode_bibdata(bib) +class TestArxiv2Bibtex(unittest.TestCase): + + def setUp(self): + self.endecoder = EnDecoder() + + def test_parses_to_bibtex_with_doi(self): + bib = arxiv2bibtex('astro-ph/9812133') + b = self.endecoder.decode_bibdata(bib) + self.assertEqual(len(b), 1) + entry = b[list(b)[0]] + self.assertEqual(entry['author'][0], 'Perlmutter, S.') + self.assertEqual(entry['year'], '1999') + + def test_parses_to_bibtex_without_doi(self): + bib = arxiv2bibtex('math/0211159') + b = self.endecoder.decode_bibdata(bib) + self.assertEqual(len(b), 1) + entry = b[list(b)[0]] + self.assertEqual(entry['author'][0], 'Perelman, Grisha') + self.assertEqual(entry['year'], '2002') + self.assertEqual( + entry['title'], + 'The entropy formula for the Ricci flow and its geometric applications') + + # Note: apparently ottobib.com uses caracter modifiers for accents instead # of the correct unicode characters. TODO: Should we convert them?