Adds option to ignore malformed bibtex files or entry during import.

main
Olivier Mangin 7 years ago
parent 60650b874a
commit 5a47150aad
No known key found for this signature in database
GPG Key ID: D72FEC1C3120A884

@ -13,6 +13,10 @@ from ..uis import get_ui
from ..content import system_path, read_text_file from ..content import system_path, read_text_file
_ABORT_USE_IGNORE_MSG = "Aborting import. Use --ignore-malformed to ignore."
_IGNORING_MSG = " Ignoring."
def parser(subparsers, conf): def parser(subparsers, conf):
parser = subparsers.add_parser('import', parser = subparsers.add_parser('import',
help='import paper(s) to the repository') help='import paper(s) to the repository')
@ -24,10 +28,12 @@ def parser(subparsers, conf):
help="one or several keys to import from the file") help="one or several keys to import from the file")
parser.add_argument('-O', '--overwrite', action='store_true', default=False, parser.add_argument('-O', '--overwrite', action='store_true', default=False,
help="Overwrite keys already in the database") help="Overwrite keys already in the database")
parser.add_argument('-i', '--ignore-malformed', action='store_true', default=False,
help="Ignore malformed and unreadable files and entries")
return parser return parser
def many_from_path(ui, bibpath): def many_from_path(ui, bibpath, ignore=False):
"""Extract list of papers found in bibliographic files in path. """Extract list of papers found in bibliographic files in path.
The behavior is to: The behavior is to:
@ -52,8 +58,12 @@ def many_from_path(ui, bibpath):
try: try:
biblist.append(coder.decode_bibdata(read_text_file(filepath))) biblist.append(coder.decode_bibdata(read_text_file(filepath)))
except coder.BibDecodingError: except coder.BibDecodingError:
ui.error("Could not parse bibtex at {}. Aborting import.".format(filepath)) error = "Could not parse bibtex at {}.".format(filepath)
ui.exit() if ignore:
ui.warning(error + _IGNORING_MSG)
else:
ui.error(error + _ABORT_USE_IGNORE_MSG)
ui.exit()
papers = {} papers = {}
for b in biblist: for b in biblist:
@ -64,7 +74,12 @@ def many_from_path(ui, bibpath):
papers[k] = Paper(k, b) papers[k] = Paper(k, b)
papers[k].added = datetime.datetime.now() papers[k].added = datetime.datetime.now()
except ValueError as e: except ValueError as e:
papers[k] = e error = 'Could not load entry for citekey {} ({}).'.format(k, e)
if ignore:
ui.warning(error + _IGNORING_MSG)
else:
ui.error(error + _ABORT_USE_IGNORE_MSG)
ui.exit()
return papers return papers
@ -81,20 +96,17 @@ def command(conf, args):
rp = repo.Repository(conf) rp = repo.Repository(conf)
# Extract papers from bib # Extract papers from bib
papers = many_from_path(ui, bibpath) papers = many_from_path(ui, bibpath, ignore=args.ignore_malformed)
keys = args.keys or papers.keys() keys = args.keys or papers.keys()
for k in keys: for k in keys:
p = papers[k] p = papers[k]
if isinstance(p, Exception): rp.push_paper(p, overwrite=args.overwrite)
ui.error('Could not load entry for citekey {}.'.format(k)) ui.info('{} imported.'.format(color.dye_out(p.citekey, 'citekey')))
docfile = bibstruct.extract_docfile(p.bibdata)
if docfile is None:
ui.warning("No file for {}.".format(p.citekey))
else: else:
rp.push_paper(p, overwrite=args.overwrite) rp.push_doc(p.citekey, docfile, copy=copy)
ui.info('{} imported.'.format(color.dye_out(p.citekey, 'citekey'))) # FIXME should move the file if configured to do so.
docfile = bibstruct.extract_docfile(p.bibdata)
if docfile is None:
ui.warning("No file for {}.".format(p.citekey))
else:
rp.push_doc(p.citekey, docfile, copy=copy)
#FIXME should move the file if configured to do so.
rp.close() rp.close()

@ -69,7 +69,7 @@ bibtex_no_citekey = """@Manual{,
} }
""" """
bibtex_month= """@inproceedings{Goyal2017, bibtex_month = """@inproceedings{Goyal2017,
author = {Goyal, Anirudh and Sordoni, Alessandro and C{\^{o}}t{\'{e}}, Marc-Alexandre and Ke, Nan Rosemary and Bengio, Yoshua}, author = {Goyal, Anirudh and Sordoni, Alessandro and C{\^{o}}t{\'{e}}, Marc-Alexandre and Ke, Nan Rosemary and Bengio, Yoshua},
title = {Z-Forcing: Training Stochastic Recurrent Networks}, title = {Z-Forcing: Training Stochastic Recurrent Networks},
year = {2017}, year = {2017},
@ -78,6 +78,12 @@ bibtex_month= """@inproceedings{Goyal2017,
} }
""" """
not_bibtex = """@misc{this looks,
like = a = bibtex file but
, is not a real one!
"""
sample_conf = """ sample_conf = """
[main] [main]

@ -826,6 +826,25 @@ class TestUsecase(DataCommandTestCase):
outs = self.execute_cmds(cmds) outs = self.execute_cmds(cmds)
self.assertEqual(1 + 1, len(outs[-1].split('\n'))) self.assertEqual(1 + 1, len(outs[-1].split('\n')))
def test_import_fails_without_ignore(self):
with FakeFileOpen(self.fs)('data/fake.bib', 'w') as f:
f.write(str_fixtures.not_bibtex)
cmds = ['pubs init',
'pubs import data/ Page99',
]
with self.assertRaises(FakeSystemExit):
self.execute_cmds(cmds)
def test_import_ignores(self):
with FakeFileOpen(self.fs)('data/fake.bib', 'w') as f:
f.write(str_fixtures.not_bibtex)
cmds = ['pubs init',
'pubs import --ignore-malformed data/ Page99',
'pubs list'
]
outs = self.execute_cmds(cmds)
self.assertEqual(1 + 1, len(outs[-1].split('\n')))
def test_update(self): def test_update(self):
cmds = ['pubs init', cmds = ['pubs init',
'pubs add data/pagerank.bib', 'pubs add data/pagerank.bib',

Loading…
Cancel
Save