From 2f48f37199de5d65883c07b0626be95fe290f11a Mon Sep 17 00:00:00 2001 From: Olivier Mangin Date: Wed, 25 Jul 2018 18:30:26 +0200 Subject: [PATCH 1/8] Fixes duplicated test names in same TestCase (1st was not run). --- tests/test_endecoder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_endecoder.py b/tests/test_endecoder.py index 80a9f53..ab8dc07 100644 --- a/tests/test_endecoder.py +++ b/tests/test_endecoder.py @@ -52,7 +52,7 @@ class TestEnDecode(unittest.TestCase): self.assertEqual(bibraw1, bibraw2) - def test_endecode_bibtex(self): + def test_endecode_bibtex_converts_month_string(self): """Test if `month=dec` is correctly recognized and transformed into `month={December}`""" decoder = endecoder.EnDecoder() From d8dc386a182285d64fdd21d1123bcdab4b94d81a Mon Sep 17 00:00:00 2001 From: Olivier Mangin Date: Wed, 25 Jul 2018 19:53:55 +0200 Subject: [PATCH 2/8] Fix fake input behavior in tests on unexpected input. Because of the mechanism for catching exceptions in pubs, the UnexpectedInput exception raised by FakeInput never reached the catch statement in the CommandTestCase and raised a FakeSystemExit instead. This commit monkey-patches the exception handler in the ui at the same time as the patching of the input functions to ignore UnexptectedInput at the ui level. --- tests/fake_env.py | 10 ++++++++++ tests/test_usecase.py | 10 ++++------ 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/tests/fake_env.py b/tests/fake_env.py index 2e2a5c7..2bc9a51 100644 --- a/tests/fake_env.py +++ b/tests/fake_env.py @@ -77,6 +77,16 @@ class FakeInput(): if md.__name__ == 'pubs.uis': md.InputUI.editor_input = self md.InputUI.edit_file = self.input_to_file + # Do not catch UnexpectedInput + original_handler = md.InputUI.handle_exception + + def handler(ui, exc): + if isinstance(exc, self.UnexpectedInput): + raise + else: + original_handler(ui, exc) + + md.InputUI.handle_exception = handler def input_to_file(self, path_to_file, temporary=True): content.write_file(path_to_file, self()) diff --git a/tests/test_usecase.py b/tests/test_usecase.py index 4f3d805..bdc1de5 100644 --- a/tests/test_usecase.py +++ b/tests/test_usecase.py @@ -131,7 +131,7 @@ class CommandTestCase(fake_env.TestFakeFs): outs.append(color.undye(actual_out)) else: pubs_cmd.execute(actual_cmd.split()) - except fake_env.FakeInput.UnexpectedInput as e: + except fake_env.FakeInput.UnexpectedInput: self.fail('Unexpected input asked by command: {}.'.format( actual_cmd)) return outs @@ -200,7 +200,7 @@ class TestAlone(CommandTestCase): def test_alone_misses_command(self): with self.assertRaises(FakeSystemExit) as cm: self.execute_cmds(['pubs']) - self.assertEqual(cm.exception.code, 2) + self.assertEqual(cm.exception.code, 2) def test_alone_prints_help(self): # capturing the output of `pubs --help` is difficult because argparse @@ -682,13 +682,11 @@ class TestUsecase(DataCommandTestCase): self.assertFileContentEqual(os.path.expanduser('~/.pubsrc'), str_fixtures.sample_conf) - def test_editor_abort(self): + def test_editor_aborts(self): with self.assertRaises(FakeSystemExit): cmds = ['pubs init', - ('pubs add', ['abc', 'n']), - ('pubs add', ['abc', 'y', 'abc', 'n']), 'pubs add data/pagerank.bib', - ('pubs edit Page99', ['', 'a']), + ('pubs edit Page99', ['', 'n']), ] self.execute_cmds(cmds) From 8a7d1432617c6477f46f1c38cb0342e13c037182 Mon Sep 17 00:00:00 2001 From: Olivier Mangin Date: Wed, 25 Jul 2018 22:20:14 +0200 Subject: [PATCH 3/8] Improves behaviors related to bibtex decoding error. - from editor input in add and edit commands, - from files in import command. --- pubs/commands/add_cmd.py | 8 +++++--- pubs/commands/edit_cmd.py | 7 ++++++- pubs/commands/import_cmd.py | 12 +++++++++--- pubs/endecoder.py | 18 ++++++++++++++++-- pubs/uis.py | 3 ++- tests/test_apis.py | 4 ++-- tests/test_endecoder.py | 5 +++++ tests/test_usecase.py | 37 ++++++++++++++++++++++++++++++++++++- 8 files changed, 81 insertions(+), 13 deletions(-) diff --git a/pubs/commands/add_cmd.py b/pubs/commands/add_cmd.py index a22c0a5..d193b5e 100644 --- a/pubs/commands/add_cmd.py +++ b/pubs/commands/add_cmd.py @@ -11,6 +11,7 @@ from .. import templates from .. import apis from .. import pretty from .. import utils +from .. import endecoder from ..completion import CommaSeparatedTagsCompletion @@ -57,12 +58,13 @@ def bibentry_from_editor(conf, ui, rp): bibstruct.verify_bibdata(bibentry) # REFACTOR Generate citykey again = False - except ValueError: + + except endecoder.EnDecoder.BibDecodingError: again = ui.input_yn( - question='Invalid bibfile. Edit again ?', + question='Invalid bibfile. Edit again?', default='y') if not again: - ui.exit(0) + ui.exit() return bibentry diff --git a/pubs/commands/edit_cmd.py b/pubs/commands/edit_cmd.py index b09c885..8cc175b 100644 --- a/pubs/commands/edit_cmd.py +++ b/pubs/commands/edit_cmd.py @@ -64,9 +64,14 @@ def command(conf, args): 'as `{}`.'.format(citekey, new_paper.citekey))) else: ui.info(('Paper `{}` was successfully edited.'.format( - citekey))) + citekey))) break + except coder.BibDecodingError: + if not ui.input_yn(question="Error parsing bibdata. Edit again?"): + ui.error("Aborting, paper not updated.") + ui.exit() + except repo.CiteKeyCollision: options = ['overwrite', 'edit again', 'abort'] choice = options[ui.input_choice( diff --git a/pubs/commands/import_cmd.py b/pubs/commands/import_cmd.py index 9ccb935..05279b2 100644 --- a/pubs/commands/import_cmd.py +++ b/pubs/commands/import_cmd.py @@ -27,7 +27,7 @@ def parser(subparsers, conf): return parser -def many_from_path(bibpath): +def many_from_path(ui, bibpath): """Extract list of papers found in bibliographic files in path. The behavior is to: @@ -49,11 +49,17 @@ def many_from_path(bibpath): biblist = [] for filepath in all_files: - biblist.append(coder.decode_bibdata(read_text_file(filepath))) + try: + biblist.append(coder.decode_bibdata(read_text_file(filepath))) + except coder.BibDecodingError: + ui.error("Could not parse bibtex at {}. Aborting import.".format(filepath)) + ui.exit() papers = {} for b in biblist: for k, b in b.items(): + if k in papers: + ui.warning('Duplicated citekey {}. Keeping last.'.format(k)) try: papers[k] = Paper(k, b) papers[k].added = datetime.datetime.now() @@ -75,7 +81,7 @@ def command(conf, args): rp = repo.Repository(conf) # Extract papers from bib - papers = many_from_path(bibpath) + papers = many_from_path(ui, bibpath) keys = args.keys or papers.keys() for k in keys: p = papers[k] diff --git a/pubs/endecoder.py b/pubs/endecoder.py index 8fd6941..259b8d9 100644 --- a/pubs/endecoder.py +++ b/pubs/endecoder.py @@ -66,6 +66,16 @@ class EnDecoder(object): * encode_bibdata will try to recognize exceptions """ + class BibDecodingError(Exception): + + message = "Could not parse provided bibdata:\n---\n{}\n---" + + def __init__(self, bibdata): + self.data = bibdata + + def __str__(self): + return self.message.format(self.data) + bwriter = bp.bwriter.BibTexWriter() bwriter.display_order = BIBFIELD_ORDER @@ -103,7 +113,10 @@ class EnDecoder(object): return entry def decode_bibdata(self, bibdata): - """""" + """Decodes bibdata from string. + + If the decoding fails, returns a BibParseError. + """ try: entries = bp.bparser.BibTexParser( bibdata, common_strings=True, @@ -121,4 +134,5 @@ class EnDecoder(object): except Exception: import traceback traceback.print_exc() - raise ValueError('could not parse provided bibdata:\n{}'.format(bibdata)) + raise self.BibDecodingError(bibdata) + # TODO: filter exceptions from pyparsing and pass reason upstream diff --git a/pubs/uis.py b/pubs/uis.py index d5ddd88..3c9aac6 100644 --- a/pubs/uis.py +++ b/pubs/uis.py @@ -104,6 +104,7 @@ class PrintUI(object): self.exit() return True # never happens + class InputUI(PrintUI): """UI class. Stores configuration parameters and system information. """ @@ -118,7 +119,7 @@ class InputUI(PrintUI): except EOFError: self.error('Standard input ended while waiting for answer.') self.exit(1) - return ustr(data) #.decode('utf-8') + return ustr(data) #.decode('utf-8') def input_choice_ng(self, options, option_chars=None, default=None, question=''): """Ask the user to chose between a set of options. The user is asked diff --git a/tests/test_apis.py b/tests/test_apis.py index 087f32f..a83481a 100644 --- a/tests/test_apis.py +++ b/tests/test_apis.py @@ -32,7 +32,7 @@ class TestDOI2Bibtex(unittest.TestCase): def test_parse_fails_on_incorrect_DOI(self): bib = doi2bibtex('999999') - with self.assertRaises(ValueError): + with self.assertRaises(EnDecoder.BibDecodingError): self.endecoder.decode_bibdata(bib) @@ -56,7 +56,7 @@ class TestISBN2Bibtex(unittest.TestCase): def test_parse_fails_on_incorrect_ISBN(self): bib = doi2bibtex('9' * 13) - with self.assertRaises(ValueError): + with self.assertRaises(EnDecoder.BibDecodingError): self.endecoder.decode_bibdata(bib) diff --git a/tests/test_endecoder.py b/tests/test_endecoder.py index ab8dc07..a6e3043 100644 --- a/tests/test_endecoder.py +++ b/tests/test_endecoder.py @@ -147,6 +147,11 @@ class TestEnDecode(unittest.TestCase): self.assertIn('author', entry1) self.assertIn('institution', entry1) + def test_endecodes_raises_exception(self): + decoder = endecoder.EnDecoder() + with self.assertRaises(decoder.BibDecodingError): + decoder.decode_bibdata("@misc{I am not a correct bibtex{{}") + if __name__ == '__main__': unittest.main() diff --git a/tests/test_usecase.py b/tests/test_usecase.py index bdc1de5..d60bdb2 100644 --- a/tests/test_usecase.py +++ b/tests/test_usecase.py @@ -337,11 +337,20 @@ class TestAdd(URLContentTestCase): def test_add_no_citekey_fails(self): # See #113 cmds = ['pubs init', - ('pubs add', [str_fixtures.bibtex_no_citekey]), + ('pubs add', [str_fixtures.bibtex_no_citekey, 'n']), ] with self.assertRaises(FakeSystemExit): self.execute_cmds(cmds) + def test_add_edit_fails(self): + cmds = ['pubs init', + ('pubs add', + ['@misc{I am not a correct bibtex{{}', 'n']), + ] + with self.assertRaises(FakeSystemExit) as cm: + self.execute_cmds(cmds) + self.assertEqual(cm.exception.code, 1) + class TestList(DataCommandTestCase): @@ -690,6 +699,32 @@ class TestUsecase(DataCommandTestCase): ] self.execute_cmds(cmds) + def test_editor_succeeds_on_second_edit(self): + cmds = ['pubs init', + 'pubs add data/pagerank.bib', + ('pubs edit Page99', [ + '', 'y', + '@misc{Page99, title="TTT", author="X. YY"}', '']), + ('pubs list', [], '[Page99] YY, X. "TTT" \n') + ] + self.execute_cmds(cmds) + + def test_add_aborts(self): + with self.assertRaises(FakeSystemExit): + cmds = ['pubs init', + ('pubs add New', ['']), + ] + self.execute_cmds(cmds) + + def test_add_succeeds_on_second_edit(self): + cmds = ['pubs init', + ('pubs add', [ + '', 'y', + '@misc{New, title="TTT", author="X. YY"}', '']), + ('pubs list', [], '[New] YY, X. "TTT" \n') + ] + self.execute_cmds(cmds) + def test_editor_success(self): cmds = ['pubs init', ('pubs add', [str_fixtures.bibtex_external0]), From ea711b6b426b0b46607f4d21dd8491163c07fcef Mon Sep 17 00:00:00 2001 From: Olivier Mangin Date: Wed, 25 Jul 2018 22:51:22 +0200 Subject: [PATCH 4/8] [Fix #87] Include citekey in message on bibtex decoding error from repository. Catches decoding error at databroker level to include citekey in message. Could be improved by a better exception class for BibDecodingError. --- pubs/databroker.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pubs/databroker.py b/pubs/databroker.py index d529c3e..97c1398 100644 --- a/pubs/databroker.py +++ b/pubs/databroker.py @@ -45,7 +45,11 @@ class DataBroker(object): def pull_bibentry(self, citekey): bibdata_raw = self.filebroker.pull_bibfile(citekey) - return self.endecoder.decode_bibdata(bibdata_raw) + try: + return self.endecoder.decode_bibdata(bibdata_raw) + except self.endecoder.BibDecodingError as e: + e.message = "Unable to decode bibtex for paper {}.".format(citekey) + raise e def push_metadata(self, citekey, metadata): metadata_raw = self.endecoder.encode_metadata(metadata) From 60650b874a7db5b2839dedb761fd7d97e5bd2f7c Mon Sep 17 00:00:00 2001 From: Olivier Mangin Date: Mon, 30 Jul 2018 17:46:04 +0200 Subject: [PATCH 5/8] Addresses minor comments (error message and better test). --- pubs/commands/import_cmd.py | 2 +- tests/test_usecase.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pubs/commands/import_cmd.py b/pubs/commands/import_cmd.py index 05279b2..53d5b4c 100644 --- a/pubs/commands/import_cmd.py +++ b/pubs/commands/import_cmd.py @@ -59,7 +59,7 @@ def many_from_path(ui, bibpath): for b in biblist: for k, b in b.items(): if k in papers: - ui.warning('Duplicated citekey {}. Keeping last.'.format(k)) + ui.warning('Duplicated citekey {}. Keeping the last one.'.format(k)) try: papers[k] = Paper(k, b) papers[k].added = datetime.datetime.now() diff --git a/tests/test_usecase.py b/tests/test_usecase.py index d60bdb2..d7168b3 100644 --- a/tests/test_usecase.py +++ b/tests/test_usecase.py @@ -703,7 +703,7 @@ class TestUsecase(DataCommandTestCase): cmds = ['pubs init', 'pubs add data/pagerank.bib', ('pubs edit Page99', [ - '', 'y', + '@misc{Page99, title="TTT" author="X. YY"}', 'y', '@misc{Page99, title="TTT", author="X. YY"}', '']), ('pubs list', [], '[Page99] YY, X. "TTT" \n') ] From 5a47150aad9fb4ce1b4ef1879f90125a2c0556d5 Mon Sep 17 00:00:00 2001 From: Olivier Mangin Date: Mon, 30 Jul 2018 18:24:56 +0200 Subject: [PATCH 6/8] Adds option to ignore malformed bibtex files or entry during import. --- pubs/commands/import_cmd.py | 42 ++++++++++++++++++++++++------------- tests/str_fixtures.py | 8 ++++++- tests/test_usecase.py | 19 +++++++++++++++++ 3 files changed, 53 insertions(+), 16 deletions(-) diff --git a/pubs/commands/import_cmd.py b/pubs/commands/import_cmd.py index 53d5b4c..977b62c 100644 --- a/pubs/commands/import_cmd.py +++ b/pubs/commands/import_cmd.py @@ -13,6 +13,10 @@ from ..uis import get_ui from ..content import system_path, read_text_file +_ABORT_USE_IGNORE_MSG = "Aborting import. Use --ignore-malformed to ignore." +_IGNORING_MSG = " Ignoring." + + def parser(subparsers, conf): parser = subparsers.add_parser('import', help='import paper(s) to the repository') @@ -24,10 +28,12 @@ def parser(subparsers, conf): help="one or several keys to import from the file") parser.add_argument('-O', '--overwrite', action='store_true', default=False, help="Overwrite keys already in the database") + parser.add_argument('-i', '--ignore-malformed', action='store_true', default=False, + help="Ignore malformed and unreadable files and entries") return parser -def many_from_path(ui, bibpath): +def many_from_path(ui, bibpath, ignore=False): """Extract list of papers found in bibliographic files in path. The behavior is to: @@ -52,8 +58,12 @@ def many_from_path(ui, bibpath): try: biblist.append(coder.decode_bibdata(read_text_file(filepath))) except coder.BibDecodingError: - ui.error("Could not parse bibtex at {}. Aborting import.".format(filepath)) - ui.exit() + error = "Could not parse bibtex at {}.".format(filepath) + if ignore: + ui.warning(error + _IGNORING_MSG) + else: + ui.error(error + _ABORT_USE_IGNORE_MSG) + ui.exit() papers = {} for b in biblist: @@ -64,7 +74,12 @@ def many_from_path(ui, bibpath): papers[k] = Paper(k, b) papers[k].added = datetime.datetime.now() except ValueError as e: - papers[k] = e + error = 'Could not load entry for citekey {} ({}).'.format(k, e) + if ignore: + ui.warning(error + _IGNORING_MSG) + else: + ui.error(error + _ABORT_USE_IGNORE_MSG) + ui.exit() return papers @@ -81,20 +96,17 @@ def command(conf, args): rp = repo.Repository(conf) # Extract papers from bib - papers = many_from_path(ui, bibpath) + papers = many_from_path(ui, bibpath, ignore=args.ignore_malformed) keys = args.keys or papers.keys() for k in keys: p = papers[k] - if isinstance(p, Exception): - ui.error('Could not load entry for citekey {}.'.format(k)) + rp.push_paper(p, overwrite=args.overwrite) + ui.info('{} imported.'.format(color.dye_out(p.citekey, 'citekey'))) + docfile = bibstruct.extract_docfile(p.bibdata) + if docfile is None: + ui.warning("No file for {}.".format(p.citekey)) else: - rp.push_paper(p, overwrite=args.overwrite) - ui.info('{} imported.'.format(color.dye_out(p.citekey, 'citekey'))) - docfile = bibstruct.extract_docfile(p.bibdata) - if docfile is None: - ui.warning("No file for {}.".format(p.citekey)) - else: - rp.push_doc(p.citekey, docfile, copy=copy) - #FIXME should move the file if configured to do so. + rp.push_doc(p.citekey, docfile, copy=copy) + # FIXME should move the file if configured to do so. rp.close() diff --git a/tests/str_fixtures.py b/tests/str_fixtures.py index 03360b8..3b6ded3 100644 --- a/tests/str_fixtures.py +++ b/tests/str_fixtures.py @@ -69,7 +69,7 @@ bibtex_no_citekey = """@Manual{, } """ -bibtex_month= """@inproceedings{Goyal2017, +bibtex_month = """@inproceedings{Goyal2017, author = {Goyal, Anirudh and Sordoni, Alessandro and C{\^{o}}t{\'{e}}, Marc-Alexandre and Ke, Nan Rosemary and Bengio, Yoshua}, title = {Z-Forcing: Training Stochastic Recurrent Networks}, year = {2017}, @@ -78,6 +78,12 @@ bibtex_month= """@inproceedings{Goyal2017, } """ +not_bibtex = """@misc{this looks, + like = a = bibtex file but + , is not a real one! + +""" + sample_conf = """ [main] diff --git a/tests/test_usecase.py b/tests/test_usecase.py index d7168b3..f9ae4e4 100644 --- a/tests/test_usecase.py +++ b/tests/test_usecase.py @@ -826,6 +826,25 @@ class TestUsecase(DataCommandTestCase): outs = self.execute_cmds(cmds) self.assertEqual(1 + 1, len(outs[-1].split('\n'))) + def test_import_fails_without_ignore(self): + with FakeFileOpen(self.fs)('data/fake.bib', 'w') as f: + f.write(str_fixtures.not_bibtex) + cmds = ['pubs init', + 'pubs import data/ Page99', + ] + with self.assertRaises(FakeSystemExit): + self.execute_cmds(cmds) + + def test_import_ignores(self): + with FakeFileOpen(self.fs)('data/fake.bib', 'w') as f: + f.write(str_fixtures.not_bibtex) + cmds = ['pubs init', + 'pubs import --ignore-malformed data/ Page99', + 'pubs list' + ] + outs = self.execute_cmds(cmds) + self.assertEqual(1 + 1, len(outs[-1].split('\n'))) + def test_update(self): cmds = ['pubs init', 'pubs add data/pagerank.bib', From 13f21a3d28645269912b9e4b961f1da8f7a63cde Mon Sep 17 00:00:00 2001 From: Olivier Mangin Date: Wed, 1 Aug 2018 14:43:32 +0200 Subject: [PATCH 7/8] Minor grammar in warning. --- pubs/commands/import_cmd.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pubs/commands/import_cmd.py b/pubs/commands/import_cmd.py index 977b62c..fa6236e 100644 --- a/pubs/commands/import_cmd.py +++ b/pubs/commands/import_cmd.py @@ -14,7 +14,7 @@ from ..content import system_path, read_text_file _ABORT_USE_IGNORE_MSG = "Aborting import. Use --ignore-malformed to ignore." -_IGNORING_MSG = " Ignoring." +_IGNORING_MSG = " Ignoring it." def parser(subparsers, conf): From 03900c324a5281f52d8389fe3e2213e0b8d1f967 Mon Sep 17 00:00:00 2001 From: Olivier Mangin Date: Wed, 1 Aug 2018 15:12:25 +0200 Subject: [PATCH 8/8] Adds changelog entry. --- changelog.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/changelog.md b/changelog.md index 1a3267b..314ccbd 100644 --- a/changelog.md +++ b/changelog.md @@ -29,6 +29,8 @@ ### Fixed bugs +- [[#149]](https://github.com/pubs/pubs/issues/149) More robust handling of parsing and citekey errors [(#87)](https://github.com/pubs/pubs/pull/87) + - [[#148]](https://github.com/pubs/pubs/issues/148) Fix compatibility with Pyfakefs 3.7 [(#151)](https://github.com/pubs/pubs/pull/151) - [[#95]](https://github.com/pubs/pubs/issues/95) Error message when editor is missing [(#141)](https://github.com/pubs/pubs/issues/141)