Adds and checks unicode normalization. Fixes #103.

main
Olivier Mangin 7 years ago
parent 1bcbf65dd8
commit f3ec9621ee

@ -1,4 +1,7 @@
import unicodedata
from bibtexparser.latexenc import latex_to_unicode
from . import bibstruct
@ -34,7 +37,8 @@ class QueryFilter(object):
return self.query in self._normalize(field_value)
def _normalize(self, s):
s = latex_to_unicode(s)
s = unicodedata.normalize('NFC', latex_to_unicode(s))
# Note: in theory latex_to_unicode also normalizes
return s if self.case else s.lower()

@ -92,6 +92,12 @@ class TestCheckField(unittest.TestCase):
self.assertTrue(
FieldFilter('title', 'Gr{\\\"u}n')(latexenc_paper))
def test_normalize_unicode(self):
latexenc_paper = doe_paper.deepcopy()
latexenc_paper.bibentry['Doe2013']['title'] = "Jalape\u00f1o"
self.assertTrue(
FieldFilter('title', "Jalapen\u0303o")(latexenc_paper))
class TestCheckQueryBlock(unittest.TestCase):
@ -138,6 +144,11 @@ class TestFilterPaper(unittest.TestCase):
self.assertTrue(get_paper_filter(['title:Niño'])(latexenc_paper))
self.assertTrue(get_paper_filter(['author:erdős'])(latexenc_paper))
def test_normalize_unicode(self):
latexenc_paper = doe_paper.deepcopy()
latexenc_paper.bibentry['Doe2013']['title'] = "{E}l Ni{\~n}o"
self.assertTrue(get_paper_filter(['title:Nin\u0303o'])(latexenc_paper))
if __name__ == '__main__':
unittest.main()

Loading…
Cancel
Save