From f3c83668f9b1689a166c6e8b0e1ebb4bd75a8d79 Mon Sep 17 00:00:00 2001 From: Joe Antognini Date: Mon, 23 Jul 2018 18:23:36 -0600 Subject: [PATCH 1/5] Allow bibtex to be added from arxiv id. This allows the user to add a reference via an arXiv ID similarly to how a reference can be added from a DOI or ISBN. If the arXiv ID has a DOI associated with it (according to the arXiv server), the DOI will be used. If it does not (perhaps the paper is unpublished), then a bibtex entry will automatically be generated from the reference's metadata. Note that a potential issue with this addition is that if a paper is added before it is published (i.e., there is no DOI associated with it), and the paper is later published, the updated information will have to be manually added. --- pubs/apis.py | 31 +++++++++++++++++++++++++++++++ pubs/commands/add_cmd.py | 25 ++++++++++++++++++++++--- requirements.txt | 1 + tests/test_apis.py | 27 ++++++++++++++++++++++++++- 4 files changed, 80 insertions(+), 4 deletions(-) diff --git a/pubs/apis.py b/pubs/apis.py index 0812ba5..a627cf5 100644 --- a/pubs/apis.py +++ b/pubs/apis.py @@ -1,7 +1,9 @@ """Interface for Remote Bibliographic APIs""" import requests +import feedparser from bs4 import BeautifulSoup +from uis import get_ui def doi2bibtex(doi): @@ -25,3 +27,32 @@ def isbn2bibtex(isbn): citation = soup.find("textarea").text return citation + + +def arxiv2bibtex(arxiv_id): + """Return a bibtex string of metadata from an arXiv ID""" + + url = 'https://export.arxiv.org/api/query?id_list=' + arxiv_id + r = requests.get(url) + feed = feedparser.parse(r.text) + entry = feed.entries[0] + + if 'title' not in entry: + ui = get_ui() + ui.error('malformed arXiv ID: {}'.format(arxiv_id)) + if 'arxiv_doi' in entry: + return doi2bibtex(entry['arxiv_doi']) + else: + # Create a bibentry from the metadata. + bibtext = '@misc{{{},\n'.format(arxiv_id) + bibtext += 'Author = {' + for i, author in enumerate(entry['authors']): + bibtext += author['name'] + if i < len(entry['authors']) - 1: + bibtext += ' and ' + bibtext += '},\n' + bibtext += 'Title = {{{}}},\n'.format(entry['title'].strip('\n')) + bibtext += 'Year = {{{}}},\n'.format(entry['published_parsed'].tm_year) + bibtext += 'Eprint = {{arXiv:{}}},\n'.format(arxiv_id) + bibtext += '}' + return bibtext diff --git a/pubs/commands/add_cmd.py b/pubs/commands/add_cmd.py index a22c0a5..d4031f6 100644 --- a/pubs/commands/add_cmd.py +++ b/pubs/commands/add_cmd.py @@ -27,6 +27,7 @@ def parser(subparsers, conf): help='bibtex file') parser.add_argument('-D', '--doi', help='doi number to retrieve the bibtex entry, if it is not provided', default=None, action=ValidateDOI) parser.add_argument('-I', '--isbn', help='isbn number to retrieve the bibtex entry, if it is not provided', default=None) + parser.add_argument('-X', '--arxiv', help='arXiv ID to retrieve the bibtex entry, if it is not provided', default=None) parser.add_argument('-d', '--docfile', help='pdf or ps file', default=None) parser.add_argument('-t', '--tags', help='tags associated to the paper, separated by commas', default=None @@ -67,6 +68,19 @@ def bibentry_from_editor(conf, ui, rp): return bibentry +def api_call(fn, arg): + """Calls the appropriate API command. + + :param fn: The API function to call. + :param arg: The argument to give the API call. + """ + bibentry_raw = fn(arg) + bibentry = rp.databroker.verify(bibentry_raw) + return bibentry + if bibentry is None: + ui.error('invalid doi {} or unable to retrieve bibfile from it.'.format(args.doi)) + + def command(conf, args): """ :param bibfile: bibtex file (in .bib, .bibml or .yaml format. @@ -86,21 +100,26 @@ def command(conf, args): if args.doi is None and args.isbn is None: bibentry = bibentry_from_editor(conf, ui, rp) else: + bibentry = None if args.doi is not None: bibentry_raw = apis.doi2bibtex(args.doi) bibentry = rp.databroker.verify(bibentry_raw) if bibentry is None: ui.error('invalid doi {} or unable to retrieve bibfile from it.'.format(args.doi)) - if args.isbn is None: - ui.exit(1) if args.isbn is not None: bibentry_raw = apis.isbn2bibtex(args.isbn) bibentry = rp.databroker.verify(bibentry_raw) if bibentry is None: ui.error('invalid isbn {} or unable to retrieve bibfile from it.'.format(args.isbn)) - ui.exit(1) # TODO distinguish between cases, offer to open the error page in a webbrowser. # TODO offer to confirm/change citekey + if args.arxiv is not None: + bibentry_raw = apis.arxiv2bibtex(args.arxiv) + bibentry = rp.databroker.verify(bibentry_raw) + if bibentry is None: + ui.error('invalid arxiv id {} or unable to retrieve bibfile from it.'.format(args.arxiv_id)) + if bibentry is None: + ui.exit(1) else: bibentry_raw = content.get_content(bibfile, ui=ui) bibentry = rp.databroker.verify(bibentry_raw) diff --git a/requirements.txt b/requirements.txt index 4e10580..da1abec 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,3 +4,4 @@ python-dateutil requests configobj beautifulsoup4 +feedparser diff --git a/tests/test_apis.py b/tests/test_apis.py index 087f32f..8c2017c 100644 --- a/tests/test_apis.py +++ b/tests/test_apis.py @@ -7,7 +7,7 @@ import dotdot from pubs.p3 import ustr from pubs.endecoder import EnDecoder -from pubs.apis import doi2bibtex, isbn2bibtex +from pubs.apis import arxiv2bibtex, doi2bibtex, isbn2bibtex class TestDOI2Bibtex(unittest.TestCase): @@ -60,5 +60,30 @@ class TestISBN2Bibtex(unittest.TestCase): self.endecoder.decode_bibdata(bib) +class TestArxiv2Bibtex(unittest.TestCase): + + def setUp(self): + self.endecoder = EnDecoder() + + def test_parses_to_bibtex_with_doi(self): + bib = arxiv2bibtex('astro-ph/9812133') + b = self.endecoder.decode_bibdata(bib) + self.assertEqual(len(b), 1) + entry = b[list(b)[0]] + self.assertEqual(entry['author'][0], 'Perlmutter, S.') + self.assertEqual(entry['year'], '1999') + + def test_parses_to_bibtex_without_doi(self): + bib = arxiv2bibtex('math/0211159') + b = self.endecoder.decode_bibdata(bib) + self.assertEqual(len(b), 1) + entry = b[list(b)[0]] + self.assertEqual(entry['author'][0], 'Perelman, Grisha') + self.assertEqual(entry['year'], '2002') + self.assertEqual( + entry['title'], + 'The entropy formula for the Ricci flow and its geometric applications') + + # Note: apparently ottobib.com uses caracter modifiers for accents instead # of the correct unicode characters. TODO: Should we convert them? From 35f209a98a09a4afacce1a5feff0421e5b4428e0 Mon Sep 17 00:00:00 2001 From: Joe Antognini Date: Mon, 23 Jul 2018 23:33:05 -0700 Subject: [PATCH 2/5] Remove unnecessary function from earlier commit. --- pubs/commands/add_cmd.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/pubs/commands/add_cmd.py b/pubs/commands/add_cmd.py index d4031f6..74522b0 100644 --- a/pubs/commands/add_cmd.py +++ b/pubs/commands/add_cmd.py @@ -68,19 +68,6 @@ def bibentry_from_editor(conf, ui, rp): return bibentry -def api_call(fn, arg): - """Calls the appropriate API command. - - :param fn: The API function to call. - :param arg: The argument to give the API call. - """ - bibentry_raw = fn(arg) - bibentry = rp.databroker.verify(bibentry_raw) - return bibentry - if bibentry is None: - ui.error('invalid doi {} or unable to retrieve bibfile from it.'.format(args.doi)) - - def command(conf, args): """ :param bibfile: bibtex file (in .bib, .bibml or .yaml format. From 30f5f86c9d7f7ebd496442325ccee8ccdc09283c Mon Sep 17 00:00:00 2001 From: Joe Antognini Date: Mon, 23 Jul 2018 23:37:12 -0700 Subject: [PATCH 3/5] Don't immediately exit on malformed arxiv id. To be consistent with errors in other uses of `pubs add`, we only exit when the returned bibentry is None. --- pubs/apis.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/pubs/apis.py b/pubs/apis.py index a627cf5..aa04fb9 100644 --- a/pubs/apis.py +++ b/pubs/apis.py @@ -40,19 +40,20 @@ def arxiv2bibtex(arxiv_id): if 'title' not in entry: ui = get_ui() ui.error('malformed arXiv ID: {}'.format(arxiv_id)) - if 'arxiv_doi' in entry: - return doi2bibtex(entry['arxiv_doi']) + bibtex = None + elif 'arxiv_doi' in entry: + bibtex = doi2bibtex(entry['arxiv_doi']) else: # Create a bibentry from the metadata. - bibtext = '@misc{{{},\n'.format(arxiv_id) - bibtext += 'Author = {' + bibtex = '@misc{{{},\n'.format(arxiv_id) + bibtex += 'Author = {' for i, author in enumerate(entry['authors']): - bibtext += author['name'] + bibtex += author['name'] if i < len(entry['authors']) - 1: - bibtext += ' and ' - bibtext += '},\n' - bibtext += 'Title = {{{}}},\n'.format(entry['title'].strip('\n')) - bibtext += 'Year = {{{}}},\n'.format(entry['published_parsed'].tm_year) - bibtext += 'Eprint = {{arXiv:{}}},\n'.format(arxiv_id) - bibtext += '}' - return bibtext + bibtex += ' and ' + bibtex += '},\n' + bibtex += 'Title = {{{}}},\n'.format(entry['title'].strip('\n')) + bibtex += 'Year = {{{}}},\n'.format(entry['published_parsed'].tm_year) + bibtex += 'Eprint = {{arXiv:{}}},\n'.format(arxiv_id) + bibtex += '}' + return bibtex From ccdbe72eb7bffa11c8cc8e9d5377ec519ce43486 Mon Sep 17 00:00:00 2001 From: "J. Antognini" Date: Sat, 28 Jul 2018 22:37:14 -0700 Subject: [PATCH 4/5] Address omangin's code review. * This fixes the logic in the `pubs add` command so that an arxiv ID doesn't overwrite a DOI. This also changes the logic so that if an invalid DOI, ISBN, or arXiv ID is provided the program will raise an error. * The code now uses the bibtexparser package to generate the bibtex file for arxiv papers. * A dedicated exception is added for references that can't be found. --- pubs/apis.py | 34 +++++++++++++++++-------------- pubs/commands/add_cmd.py | 43 ++++++++++++++++++++++------------------ 2 files changed, 43 insertions(+), 34 deletions(-) diff --git a/pubs/apis.py b/pubs/apis.py index aa04fb9..82cb8f1 100644 --- a/pubs/apis.py +++ b/pubs/apis.py @@ -1,9 +1,14 @@ """Interface for Remote Bibliographic APIs""" import requests +import bibtexparser +from bibtexparser.bibdatabase import BibDatabase import feedparser from bs4 import BeautifulSoup -from uis import get_ui + + +class ReferenceNotFoundException(Exception): + pass def doi2bibtex(doi): @@ -38,22 +43,21 @@ def arxiv2bibtex(arxiv_id): entry = feed.entries[0] if 'title' not in entry: - ui = get_ui() - ui.error('malformed arXiv ID: {}'.format(arxiv_id)) - bibtex = None + raise ReferenceNotFoundException('arXiv ID not found.') elif 'arxiv_doi' in entry: bibtex = doi2bibtex(entry['arxiv_doi']) else: # Create a bibentry from the metadata. - bibtex = '@misc{{{},\n'.format(arxiv_id) - bibtex += 'Author = {' - for i, author in enumerate(entry['authors']): - bibtex += author['name'] - if i < len(entry['authors']) - 1: - bibtex += ' and ' - bibtex += '},\n' - bibtex += 'Title = {{{}}},\n'.format(entry['title'].strip('\n')) - bibtex += 'Year = {{{}}},\n'.format(entry['published_parsed'].tm_year) - bibtex += 'Eprint = {{arXiv:{}}},\n'.format(arxiv_id) - bibtex += '}' + db = BibDatabase() + author_str = ' and '.join( + [author['name'] for author in entry['authors']]) + db.entries = [{ + 'ENTRYTYPE': 'misc', + 'ID': arxiv_id, + 'author': author_str, + 'title': entry['title'], + 'year': str(entry['published_parsed'].tm_year), + 'Eprint': arxiv_id, + }] + bibtex = bibtexparser.dumps(db) return bibtex diff --git a/pubs/commands/add_cmd.py b/pubs/commands/add_cmd.py index 74522b0..4930b6b 100644 --- a/pubs/commands/add_cmd.py +++ b/pubs/commands/add_cmd.py @@ -84,28 +84,33 @@ def command(conf, args): # get bibtex entry if bibfile is None: - if args.doi is None and args.isbn is None: + if args.doi is None and args.isbn is None and args.arxiv is None: bibentry = bibentry_from_editor(conf, ui, rp) else: bibentry = None - if args.doi is not None: - bibentry_raw = apis.doi2bibtex(args.doi) - bibentry = rp.databroker.verify(bibentry_raw) - if bibentry is None: - ui.error('invalid doi {} or unable to retrieve bibfile from it.'.format(args.doi)) - if args.isbn is not None: - bibentry_raw = apis.isbn2bibtex(args.isbn) - bibentry = rp.databroker.verify(bibentry_raw) - if bibentry is None: - ui.error('invalid isbn {} or unable to retrieve bibfile from it.'.format(args.isbn)) - # TODO distinguish between cases, offer to open the error page in a webbrowser. - # TODO offer to confirm/change citekey - if args.arxiv is not None: - bibentry_raw = apis.arxiv2bibtex(args.arxiv) - bibentry = rp.databroker.verify(bibentry_raw) - if bibentry is None: - ui.error('invalid arxiv id {} or unable to retrieve bibfile from it.'.format(args.arxiv_id)) - if bibentry is None: + try: + if args.doi is not None: + bibentry_raw = apis.doi2bibtex(args.doi) + bibentry = rp.databroker.verify(bibentry_raw) + if bibentry is None: + raise apis.ReferenceNotFoundException( + 'invalid doi {} or unable to retrieve bibfile from it.'.format(args.doi)) + elif args.isbn is not None: + bibentry_raw = apis.isbn2bibtex(args.isbn) + bibentry = rp.databroker.verify(bibentry_raw) + if bibentry is None: + raise apis.ReferenceNotFoundException( + 'invalid isbn {} or unable to retrieve bibfile from it.'.format(args.isbn)) + # TODO distinguish between cases, offer to open the error page in a webbrowser. + # TODO offer to confirm/change citekey + elif args.arxiv is not None: + bibentry_raw = apis.arxiv2bibtex(args.arxiv) + bibentry = rp.databroker.verify(bibentry_raw) + if bibentry is None: + raise apis.ReferenceNotFoundException( + 'invalid arxiv id {} or unable to retrieve bibfile from it.'.format(args.arxiv_id)) + except apis.ReferenceNotFoundException as e: + ui.error(e.message) ui.exit(1) else: bibentry_raw = content.get_content(bibfile, ui=ui) From d6ab091e5c7ead3f011b494d99ccd6ec1ce12817 Mon Sep 17 00:00:00 2001 From: "J. Antognini" Date: Sun, 5 Aug 2018 00:00:20 -0700 Subject: [PATCH 5/5] Only allow one of doi, arxiv, or isbn to pubs_add Also includes some minor refactoring. --- pubs/apis.py | 41 +++++++++++++++++++++++++++++++++++++++- pubs/commands/add_cmd.py | 25 +++++++----------------- 2 files changed, 47 insertions(+), 19 deletions(-) diff --git a/pubs/apis.py b/pubs/apis.py index 82cb8f1..014217f 100644 --- a/pubs/apis.py +++ b/pubs/apis.py @@ -11,6 +11,45 @@ class ReferenceNotFoundException(Exception): pass +def get_bibentry_from_api(id_str, id_type, rp): + """Return a bibtex string from various ID methods. + + This is a wrapper around functions that will return a bibtex string given + one of: + + * DOI + * IBSN + * arXiv ID + + Args: + id_str: A string with the ID. + id_type: Name of the ID type. Must be one of `doi`, `isbn`, or `arxiv`. + rp: A `Repository` object. + + Returns: + A bibtex string. + + Raises: + ValueError: if `id_type` is not one of `doi`, `isbn`, or `arxiv`. + """ + + id_fns = { + 'doi': doi2bibtex, + 'isbn': isbn2bibtex, + 'arxiv': arxiv2bibtex, + } + + if id_type not in id_fns.keys(): + raise ValueError('id_type must be one of `doi`, `isbn`, or `arxiv`.') + + bibentry_raw = id_fns[id_type](id_str) + bibentry = rp.databroker.verify(bibentry_raw) + if bibentry is None: + raise ReferenceNotFoundException( + 'invalid {} {} or unable to retrieve bibfile from it.'.format(id_type, id_str)) + return bibentry + + def doi2bibtex(doi): """Return a bibtex string of metadata from a DOI""" @@ -52,7 +91,7 @@ def arxiv2bibtex(arxiv_id): author_str = ' and '.join( [author['name'] for author in entry['authors']]) db.entries = [{ - 'ENTRYTYPE': 'misc', + 'ENTRYTYPE': 'article', 'ID': arxiv_id, 'author': author_str, 'title': entry['title'], diff --git a/pubs/commands/add_cmd.py b/pubs/commands/add_cmd.py index e3632f4..3ae8f70 100644 --- a/pubs/commands/add_cmd.py +++ b/pubs/commands/add_cmd.py @@ -26,9 +26,10 @@ def parser(subparsers, conf): parser = subparsers.add_parser('add', help='add a paper to the repository') parser.add_argument('bibfile', nargs='?', default=None, help='bibtex file') - parser.add_argument('-D', '--doi', help='doi number to retrieve the bibtex entry, if it is not provided', default=None, action=ValidateDOI) - parser.add_argument('-I', '--isbn', help='isbn number to retrieve the bibtex entry, if it is not provided', default=None) - parser.add_argument('-X', '--arxiv', help='arXiv ID to retrieve the bibtex entry, if it is not provided', default=None) + id_arg = parser.add_mutually_exclusive_group() + id_arg.add_argument('-D', '--doi', help='doi number to retrieve the bibtex entry, if it is not provided', default=None, action=ValidateDOI) + id_arg.add_argument('-I', '--isbn', help='isbn number to retrieve the bibtex entry, if it is not provided', default=None) + id_arg.add_argument('-X', '--arxiv', help='arXiv ID to retrieve the bibtex entry, if it is not provided', default=None) parser.add_argument('-d', '--docfile', help='pdf or ps file', default=None) parser.add_argument('-t', '--tags', help='tags associated to the paper, separated by commas', default=None @@ -92,25 +93,13 @@ def command(conf, args): bibentry = None try: if args.doi is not None: - bibentry_raw = apis.doi2bibtex(args.doi) - bibentry = rp.databroker.verify(bibentry_raw) - if bibentry is None: - raise apis.ReferenceNotFoundException( - 'invalid doi {} or unable to retrieve bibfile from it.'.format(args.doi)) + bibentry = apis.get_bibentry_from_api(args.doi, 'doi', rp) elif args.isbn is not None: - bibentry_raw = apis.isbn2bibtex(args.isbn) - bibentry = rp.databroker.verify(bibentry_raw) - if bibentry is None: - raise apis.ReferenceNotFoundException( - 'invalid isbn {} or unable to retrieve bibfile from it.'.format(args.isbn)) + bibentry = apis.get_bibentry_from_api(args.isbn, 'isbn', rp) # TODO distinguish between cases, offer to open the error page in a webbrowser. # TODO offer to confirm/change citekey elif args.arxiv is not None: - bibentry_raw = apis.arxiv2bibtex(args.arxiv) - bibentry = rp.databroker.verify(bibentry_raw) - if bibentry is None: - raise apis.ReferenceNotFoundException( - 'invalid arxiv id {} or unable to retrieve bibfile from it.'.format(args.arxiv_id)) + bibentry = apis.get_bibentry_from_api(args.arxiv, 'arxiv', rp) except apis.ReferenceNotFoundException as e: ui.error(e.message) ui.exit(1)