From 5a47150aad9fb4ce1b4ef1879f90125a2c0556d5 Mon Sep 17 00:00:00 2001 From: Olivier Mangin Date: Mon, 30 Jul 2018 18:24:56 +0200 Subject: [PATCH] Adds option to ignore malformed bibtex files or entry during import. --- pubs/commands/import_cmd.py | 42 ++++++++++++++++++++++++------------- tests/str_fixtures.py | 8 ++++++- tests/test_usecase.py | 19 +++++++++++++++++ 3 files changed, 53 insertions(+), 16 deletions(-) diff --git a/pubs/commands/import_cmd.py b/pubs/commands/import_cmd.py index 53d5b4c..977b62c 100644 --- a/pubs/commands/import_cmd.py +++ b/pubs/commands/import_cmd.py @@ -13,6 +13,10 @@ from ..uis import get_ui from ..content import system_path, read_text_file +_ABORT_USE_IGNORE_MSG = "Aborting import. Use --ignore-malformed to ignore." +_IGNORING_MSG = " Ignoring." + + def parser(subparsers, conf): parser = subparsers.add_parser('import', help='import paper(s) to the repository') @@ -24,10 +28,12 @@ def parser(subparsers, conf): help="one or several keys to import from the file") parser.add_argument('-O', '--overwrite', action='store_true', default=False, help="Overwrite keys already in the database") + parser.add_argument('-i', '--ignore-malformed', action='store_true', default=False, + help="Ignore malformed and unreadable files and entries") return parser -def many_from_path(ui, bibpath): +def many_from_path(ui, bibpath, ignore=False): """Extract list of papers found in bibliographic files in path. The behavior is to: @@ -52,8 +58,12 @@ def many_from_path(ui, bibpath): try: biblist.append(coder.decode_bibdata(read_text_file(filepath))) except coder.BibDecodingError: - ui.error("Could not parse bibtex at {}. Aborting import.".format(filepath)) - ui.exit() + error = "Could not parse bibtex at {}.".format(filepath) + if ignore: + ui.warning(error + _IGNORING_MSG) + else: + ui.error(error + _ABORT_USE_IGNORE_MSG) + ui.exit() papers = {} for b in biblist: @@ -64,7 +74,12 @@ def many_from_path(ui, bibpath): papers[k] = Paper(k, b) papers[k].added = datetime.datetime.now() except ValueError as e: - papers[k] = e + error = 'Could not load entry for citekey {} ({}).'.format(k, e) + if ignore: + ui.warning(error + _IGNORING_MSG) + else: + ui.error(error + _ABORT_USE_IGNORE_MSG) + ui.exit() return papers @@ -81,20 +96,17 @@ def command(conf, args): rp = repo.Repository(conf) # Extract papers from bib - papers = many_from_path(ui, bibpath) + papers = many_from_path(ui, bibpath, ignore=args.ignore_malformed) keys = args.keys or papers.keys() for k in keys: p = papers[k] - if isinstance(p, Exception): - ui.error('Could not load entry for citekey {}.'.format(k)) + rp.push_paper(p, overwrite=args.overwrite) + ui.info('{} imported.'.format(color.dye_out(p.citekey, 'citekey'))) + docfile = bibstruct.extract_docfile(p.bibdata) + if docfile is None: + ui.warning("No file for {}.".format(p.citekey)) else: - rp.push_paper(p, overwrite=args.overwrite) - ui.info('{} imported.'.format(color.dye_out(p.citekey, 'citekey'))) - docfile = bibstruct.extract_docfile(p.bibdata) - if docfile is None: - ui.warning("No file for {}.".format(p.citekey)) - else: - rp.push_doc(p.citekey, docfile, copy=copy) - #FIXME should move the file if configured to do so. + rp.push_doc(p.citekey, docfile, copy=copy) + # FIXME should move the file if configured to do so. rp.close() diff --git a/tests/str_fixtures.py b/tests/str_fixtures.py index 03360b8..3b6ded3 100644 --- a/tests/str_fixtures.py +++ b/tests/str_fixtures.py @@ -69,7 +69,7 @@ bibtex_no_citekey = """@Manual{, } """ -bibtex_month= """@inproceedings{Goyal2017, +bibtex_month = """@inproceedings{Goyal2017, author = {Goyal, Anirudh and Sordoni, Alessandro and C{\^{o}}t{\'{e}}, Marc-Alexandre and Ke, Nan Rosemary and Bengio, Yoshua}, title = {Z-Forcing: Training Stochastic Recurrent Networks}, year = {2017}, @@ -78,6 +78,12 @@ bibtex_month= """@inproceedings{Goyal2017, } """ +not_bibtex = """@misc{this looks, + like = a = bibtex file but + , is not a real one! + +""" + sample_conf = """ [main] diff --git a/tests/test_usecase.py b/tests/test_usecase.py index d7168b3..f9ae4e4 100644 --- a/tests/test_usecase.py +++ b/tests/test_usecase.py @@ -826,6 +826,25 @@ class TestUsecase(DataCommandTestCase): outs = self.execute_cmds(cmds) self.assertEqual(1 + 1, len(outs[-1].split('\n'))) + def test_import_fails_without_ignore(self): + with FakeFileOpen(self.fs)('data/fake.bib', 'w') as f: + f.write(str_fixtures.not_bibtex) + cmds = ['pubs init', + 'pubs import data/ Page99', + ] + with self.assertRaises(FakeSystemExit): + self.execute_cmds(cmds) + + def test_import_ignores(self): + with FakeFileOpen(self.fs)('data/fake.bib', 'w') as f: + f.write(str_fixtures.not_bibtex) + cmds = ['pubs init', + 'pubs import --ignore-malformed data/ Page99', + 'pubs list' + ] + outs = self.execute_cmds(cmds) + self.assertEqual(1 + 1, len(outs[-1].split('\n'))) + def test_update(self): cmds = ['pubs init', 'pubs add data/pagerank.bib',