diff --git a/pubs/apis.py b/pubs/apis.py index 0812ba5..014217f 100644 --- a/pubs/apis.py +++ b/pubs/apis.py @@ -1,9 +1,55 @@ """Interface for Remote Bibliographic APIs""" import requests +import bibtexparser +from bibtexparser.bibdatabase import BibDatabase +import feedparser from bs4 import BeautifulSoup +class ReferenceNotFoundException(Exception): + pass + + +def get_bibentry_from_api(id_str, id_type, rp): + """Return a bibtex string from various ID methods. + + This is a wrapper around functions that will return a bibtex string given + one of: + + * DOI + * IBSN + * arXiv ID + + Args: + id_str: A string with the ID. + id_type: Name of the ID type. Must be one of `doi`, `isbn`, or `arxiv`. + rp: A `Repository` object. + + Returns: + A bibtex string. + + Raises: + ValueError: if `id_type` is not one of `doi`, `isbn`, or `arxiv`. + """ + + id_fns = { + 'doi': doi2bibtex, + 'isbn': isbn2bibtex, + 'arxiv': arxiv2bibtex, + } + + if id_type not in id_fns.keys(): + raise ValueError('id_type must be one of `doi`, `isbn`, or `arxiv`.') + + bibentry_raw = id_fns[id_type](id_str) + bibentry = rp.databroker.verify(bibentry_raw) + if bibentry is None: + raise ReferenceNotFoundException( + 'invalid {} {} or unable to retrieve bibfile from it.'.format(id_type, id_str)) + return bibentry + + def doi2bibtex(doi): """Return a bibtex string of metadata from a DOI""" @@ -25,3 +71,32 @@ def isbn2bibtex(isbn): citation = soup.find("textarea").text return citation + + +def arxiv2bibtex(arxiv_id): + """Return a bibtex string of metadata from an arXiv ID""" + + url = 'https://export.arxiv.org/api/query?id_list=' + arxiv_id + r = requests.get(url) + feed = feedparser.parse(r.text) + entry = feed.entries[0] + + if 'title' not in entry: + raise ReferenceNotFoundException('arXiv ID not found.') + elif 'arxiv_doi' in entry: + bibtex = doi2bibtex(entry['arxiv_doi']) + else: + # Create a bibentry from the metadata. + db = BibDatabase() + author_str = ' and '.join( + [author['name'] for author in entry['authors']]) + db.entries = [{ + 'ENTRYTYPE': 'article', + 'ID': arxiv_id, + 'author': author_str, + 'title': entry['title'], + 'year': str(entry['published_parsed'].tm_year), + 'Eprint': arxiv_id, + }] + bibtex = bibtexparser.dumps(db) + return bibtex diff --git a/pubs/commands/add_cmd.py b/pubs/commands/add_cmd.py index d193b5e..3ae8f70 100644 --- a/pubs/commands/add_cmd.py +++ b/pubs/commands/add_cmd.py @@ -26,8 +26,10 @@ def parser(subparsers, conf): parser = subparsers.add_parser('add', help='add a paper to the repository') parser.add_argument('bibfile', nargs='?', default=None, help='bibtex file') - parser.add_argument('-D', '--doi', help='doi number to retrieve the bibtex entry, if it is not provided', default=None, action=ValidateDOI) - parser.add_argument('-I', '--isbn', help='isbn number to retrieve the bibtex entry, if it is not provided', default=None) + id_arg = parser.add_mutually_exclusive_group() + id_arg.add_argument('-D', '--doi', help='doi number to retrieve the bibtex entry, if it is not provided', default=None, action=ValidateDOI) + id_arg.add_argument('-I', '--isbn', help='isbn number to retrieve the bibtex entry, if it is not provided', default=None) + id_arg.add_argument('-X', '--arxiv', help='arXiv ID to retrieve the bibtex entry, if it is not provided', default=None) parser.add_argument('-d', '--docfile', help='pdf or ps file', default=None) parser.add_argument('-t', '--tags', help='tags associated to the paper, separated by commas', default=None @@ -85,24 +87,22 @@ def command(conf, args): # get bibtex entry if bibfile is None: - if args.doi is None and args.isbn is None: + if args.doi is None and args.isbn is None and args.arxiv is None: bibentry = bibentry_from_editor(conf, ui, rp) else: - if args.doi is not None: - bibentry_raw = apis.doi2bibtex(args.doi) - bibentry = rp.databroker.verify(bibentry_raw) - if bibentry is None: - ui.error('invalid doi {} or unable to retrieve bibfile from it.'.format(args.doi)) - if args.isbn is None: - ui.exit(1) - if args.isbn is not None: - bibentry_raw = apis.isbn2bibtex(args.isbn) - bibentry = rp.databroker.verify(bibentry_raw) - if bibentry is None: - ui.error('invalid isbn {} or unable to retrieve bibfile from it.'.format(args.isbn)) - ui.exit(1) - # TODO distinguish between cases, offer to open the error page in a webbrowser. - # TODO offer to confirm/change citekey + bibentry = None + try: + if args.doi is not None: + bibentry = apis.get_bibentry_from_api(args.doi, 'doi', rp) + elif args.isbn is not None: + bibentry = apis.get_bibentry_from_api(args.isbn, 'isbn', rp) + # TODO distinguish between cases, offer to open the error page in a webbrowser. + # TODO offer to confirm/change citekey + elif args.arxiv is not None: + bibentry = apis.get_bibentry_from_api(args.arxiv, 'arxiv', rp) + except apis.ReferenceNotFoundException as e: + ui.error(e.message) + ui.exit(1) else: bibentry_raw = content.get_content(bibfile, ui=ui) bibentry = rp.databroker.verify(bibentry_raw) diff --git a/requirements.txt b/requirements.txt index 4e10580..da1abec 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,3 +4,4 @@ python-dateutil requests configobj beautifulsoup4 +feedparser diff --git a/tests/test_apis.py b/tests/test_apis.py index a83481a..c2893cc 100644 --- a/tests/test_apis.py +++ b/tests/test_apis.py @@ -7,7 +7,7 @@ import dotdot from pubs.p3 import ustr from pubs.endecoder import EnDecoder -from pubs.apis import doi2bibtex, isbn2bibtex +from pubs.apis import arxiv2bibtex, doi2bibtex, isbn2bibtex class TestDOI2Bibtex(unittest.TestCase): @@ -60,5 +60,30 @@ class TestISBN2Bibtex(unittest.TestCase): self.endecoder.decode_bibdata(bib) +class TestArxiv2Bibtex(unittest.TestCase): + + def setUp(self): + self.endecoder = EnDecoder() + + def test_parses_to_bibtex_with_doi(self): + bib = arxiv2bibtex('astro-ph/9812133') + b = self.endecoder.decode_bibdata(bib) + self.assertEqual(len(b), 1) + entry = b[list(b)[0]] + self.assertEqual(entry['author'][0], 'Perlmutter, S.') + self.assertEqual(entry['year'], '1999') + + def test_parses_to_bibtex_without_doi(self): + bib = arxiv2bibtex('math/0211159') + b = self.endecoder.decode_bibdata(bib) + self.assertEqual(len(b), 1) + entry = b[list(b)[0]] + self.assertEqual(entry['author'][0], 'Perelman, Grisha') + self.assertEqual(entry['year'], '2002') + self.assertEqual( + entry['title'], + 'The entropy formula for the Ricci flow and its geometric applications') + + # Note: apparently ottobib.com uses caracter modifiers for accents instead # of the correct unicode characters. TODO: Should we convert them?