From f3ec9621ee8bc0d8ec2267fff6e97e5028f80892 Mon Sep 17 00:00:00 2001 From: Olivier Mangin Date: Wed, 31 Jan 2018 10:30:27 -0500 Subject: [PATCH] Adds and checks unicode normalization. Fixes #103. --- pubs/query.py | 6 +++++- tests/test_queries.py | 11 +++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/pubs/query.py b/pubs/query.py index 40f3460..68b3420 100644 --- a/pubs/query.py +++ b/pubs/query.py @@ -1,4 +1,7 @@ +import unicodedata + from bibtexparser.latexenc import latex_to_unicode + from . import bibstruct @@ -34,7 +37,8 @@ class QueryFilter(object): return self.query in self._normalize(field_value) def _normalize(self, s): - s = latex_to_unicode(s) + s = unicodedata.normalize('NFC', latex_to_unicode(s)) + # Note: in theory latex_to_unicode also normalizes return s if self.case else s.lower() diff --git a/tests/test_queries.py b/tests/test_queries.py index 0508794..e60c95f 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -92,6 +92,12 @@ class TestCheckField(unittest.TestCase): self.assertTrue( FieldFilter('title', 'Gr{\\\"u}n')(latexenc_paper)) + def test_normalize_unicode(self): + latexenc_paper = doe_paper.deepcopy() + latexenc_paper.bibentry['Doe2013']['title'] = "Jalape\u00f1o" + self.assertTrue( + FieldFilter('title', "Jalapen\u0303o")(latexenc_paper)) + class TestCheckQueryBlock(unittest.TestCase): @@ -138,6 +144,11 @@ class TestFilterPaper(unittest.TestCase): self.assertTrue(get_paper_filter(['title:Niño'])(latexenc_paper)) self.assertTrue(get_paper_filter(['author:erdős'])(latexenc_paper)) + def test_normalize_unicode(self): + latexenc_paper = doe_paper.deepcopy() + latexenc_paper.bibentry['Doe2013']['title'] = "{E}l Ni{\~n}o" + self.assertTrue(get_paper_filter(['title:Nin\u0303o'])(latexenc_paper)) + if __name__ == '__main__': unittest.main()