Adds and checks unicode normalization. Fixes #103.
This commit is contained in:
parent
1bcbf65dd8
commit
f3ec9621ee
@ -1,4 +1,7 @@
|
||||
import unicodedata
|
||||
|
||||
from bibtexparser.latexenc import latex_to_unicode
|
||||
|
||||
from . import bibstruct
|
||||
|
||||
|
||||
@ -34,7 +37,8 @@ class QueryFilter(object):
|
||||
return self.query in self._normalize(field_value)
|
||||
|
||||
def _normalize(self, s):
|
||||
s = latex_to_unicode(s)
|
||||
s = unicodedata.normalize('NFC', latex_to_unicode(s))
|
||||
# Note: in theory latex_to_unicode also normalizes
|
||||
return s if self.case else s.lower()
|
||||
|
||||
|
||||
|
@ -92,6 +92,12 @@ class TestCheckField(unittest.TestCase):
|
||||
self.assertTrue(
|
||||
FieldFilter('title', 'Gr{\\\"u}n')(latexenc_paper))
|
||||
|
||||
def test_normalize_unicode(self):
|
||||
latexenc_paper = doe_paper.deepcopy()
|
||||
latexenc_paper.bibentry['Doe2013']['title'] = "Jalape\u00f1o"
|
||||
self.assertTrue(
|
||||
FieldFilter('title', "Jalapen\u0303o")(latexenc_paper))
|
||||
|
||||
|
||||
class TestCheckQueryBlock(unittest.TestCase):
|
||||
|
||||
@ -138,6 +144,11 @@ class TestFilterPaper(unittest.TestCase):
|
||||
self.assertTrue(get_paper_filter(['title:Niño'])(latexenc_paper))
|
||||
self.assertTrue(get_paper_filter(['author:erdős'])(latexenc_paper))
|
||||
|
||||
def test_normalize_unicode(self):
|
||||
latexenc_paper = doe_paper.deepcopy()
|
||||
latexenc_paper.bibentry['Doe2013']['title'] = "{E}l Ni{\~n}o"
|
||||
self.assertTrue(get_paper_filter(['title:Nin\u0303o'])(latexenc_paper))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
Loading…
x
Reference in New Issue
Block a user