Adds `--strict` option to list to force strict unicode comparison.

main
Olivier Mangin 7 years ago
parent 248bf62317
commit 6e39cea473

@ -15,6 +15,8 @@ def parser(subparsers, conf):
default=None, dest='case_sensitive')
parser.add_argument('-I', '--force-case', action='store_true',
dest='case_sensitive')
parser.add_argument('--strict', action='store_true', default=False,
help='force strict unicode comparison of query')
parser.add_argument('-a', '--alphabetical', action='store_true',
dest='alphabetical', default=False,
help='lexicographic order on the citekeys.')
@ -34,7 +36,8 @@ def command(conf, args):
ui = get_ui()
rp = repo.Repository(conf)
papers = filter(get_paper_filter(args.query,
case_sensitive=args.case_sensitive),
case_sensitive=args.case_sensitive,
strict=args.strict),
rp.all_papers())
if args.nodocs:
papers = [p for p in papers if p.docpath is None]

@ -23,11 +23,20 @@ class InvalidQuery(ValueError):
class QueryFilter(object):
"""Filter function for papers built from a given query.
def __init__(self, query, case_sensitive=None):
:param case_sensitive: forces case (in)sensitivity; default is to
only be sensitive if query contains uppercase
:param strict: if set to True, compares the raw unicode without
interpreting latex commands, normalizing unicode, or ignoring case.
(Overrides the case_sensitive parameter.)
"""
def __init__(self, query, case_sensitive=None, strict=False):
if case_sensitive is None:
case_sensitive = not query.islower()
self.case = case_sensitive
self.strict = strict
self.query = self._normalize(query)
def __call__(self, paper):
@ -37,16 +46,20 @@ class QueryFilter(object):
return self.query in self._normalize(field_value)
def _normalize(self, s):
s = unicodedata.normalize('NFC', latex_to_unicode(s))
# Note: in theory latex_to_unicode also normalizes
return s if self.case else s.lower()
if self.strict:
return s
else:
s = unicodedata.normalize('NFC', latex_to_unicode(s))
# Note: in theory latex_to_unicode also normalizes
return s if self.case else s.lower()
class FieldFilter(QueryFilter):
"""Generic filter of form `query in paper['field']`"""
def __init__(self, field, query, case_sensitive=None):
super(FieldFilter, self).__init__(query, case_sensitive=case_sensitive)
def __init__(self, field, query, case_sensitive=None, strict=False):
super(FieldFilter, self).__init__(query, case_sensitive=case_sensitive,
strict=strict)
self.field = field
def __call__(self, paper):
@ -76,7 +89,7 @@ class YearFilter(QueryFilter):
whose year field is set and can be converted to an int.
"""
def __init__(self, query, case_sensitive=None):
def __init__(self, query):
split = query.split('-')
self.start = self._str_to_year(split[0])
if len(split) == 1:
@ -120,25 +133,29 @@ def _get_field_value(query_block):
return (field, value)
def _query_block_to_filter(query_block, case_sensitive=None):
def _query_block_to_filter(query_block, case_sensitive=None, strict=False):
field, value = _get_field_value(query_block)
if field == 'tag':
return TagFilter(value, case_sensitive=case_sensitive)
return TagFilter(value, case_sensitive=case_sensitive, strict=strict)
elif field == 'author':
return AuthorFilter(value, case_sensitive=case_sensitive)
return AuthorFilter(value, case_sensitive=case_sensitive,
strict=strict)
elif field == 'year':
return YearFilter(value)
else:
return FieldFilter(field, value, case_sensitive=case_sensitive)
return FieldFilter(field, value, case_sensitive=case_sensitive,
strict=strict)
# TODO implement search by type of document
def get_paper_filter(query, case_sensitive=None):
def get_paper_filter(query, case_sensitive=None, strict=False):
"""If case_sensitive is not given, only check case if query
is not lowercase.
:args query: list of query blocks (strings)
"""
filters = [_query_block_to_filter(query_block, case_sensitive=case_sensitive)
filters = [_query_block_to_filter(query_block,
case_sensitive=case_sensitive,
strict=strict)
for query_block in query]
return lambda paper: all([f(paper) for f in filters])

@ -101,6 +101,22 @@ class TestCheckField(unittest.TestCase):
self.assertTrue(
FieldFilter('title', "Jalapen\u0303o")(latexenc_paper))
def test_strict(self):
latexenc_paper = doe_paper.deepcopy()
latexenc_paper.bibentry['Doe2013']['title'] = "Jalape\u00f1o"
self.assertFalse(FieldFilter('title', "Jalapen\u0303o",
strict=True)(latexenc_paper))
latexenc_paper.bibentry['Doe2013']['title'] = "{G}ros"
self.assertFalse(
FieldFilter('title', "Gros", strict=True)(latexenc_paper))
def test_strict_implies_case(self):
latexenc_paper = doe_paper.deepcopy()
latexenc_paper.bibentry['Doe2013']['title'] = "Gros"
self.assertFalse(
FieldFilter('title', "gros", case_sensitive=False,
strict=True)(latexenc_paper))
class TestCheckQueryBlock(unittest.TestCase):
@ -146,12 +162,19 @@ class TestFilterPaper(unittest.TestCase):
self.assertTrue(get_paper_filter(['title:El'])(latexenc_paper))
self.assertTrue(get_paper_filter(['title:Niño'])(latexenc_paper))
self.assertTrue(get_paper_filter(['author:erdős'])(latexenc_paper))
self.assertTrue(get_paper_filter(['title:{E}l'])(latexenc_paper))
def test_normalize_unicode(self):
latexenc_paper = doe_paper.deepcopy()
latexenc_paper.bibentry['Doe2013']['title'] = "{E}l Ni{\~n}o"
self.assertTrue(get_paper_filter(['title:Nin\u0303o'])(latexenc_paper))
def test_strict(self):
latexenc_paper = doe_paper.deepcopy()
latexenc_paper.bibentry['Doe2013']['title'] = "El Ni{\~n}o"
self.assertFalse(get_paper_filter(
['title:Nin\u0303o'], strict=True)(latexenc_paper))
if __name__ == '__main__':
unittest.main()

@ -398,6 +398,33 @@ class TestList(DataCommandTestCase):
outs = self.execute_cmds(cmds)
self.assertEqual(0 + 1, len(outs[-1].split('\n')))
def test_list_strict_forces_case(self):
cmds = ['pubs init',
'pubs list',
'pubs import data/',
'pubs list --ignore-case --strict title:lAnguage',
]
outs = self.execute_cmds(cmds)
self.assertEqual(0 + 1, len(outs[-1].split('\n')))
def test_list_strict(self):
cmds = ['pubs init',
'pubs list',
'pubs import data/',
'pubs list --strict title:{L}anguage',
]
outs = self.execute_cmds(cmds)
self.assertEqual(0 + 1, len(outs[-1].split('\n')))
def test_list_latex_protection(self):
cmds = ['pubs init',
'pubs list',
'pubs import data/',
'pubs list title:{L}anguage',
]
outs = self.execute_cmds(cmds)
self.assertEqual(1 + 1, len(outs[-1].split('\n')))
class TestTag(DataCommandTestCase):

Loading…
Cancel
Save