Fix encoding of keywords.

Also introduces ustr instead of unicode for python2 and str for python3.
2014-04-18 20:43:26 +02:00 · 2014-04-18 20:43:26 +02:00 · 7713e5d80e
commit 7713e5d80e
parent 9b6f6db297
5 changed files with 39 additions and 7 deletions
--- a/pubs/bibstruct.py
+++ b/pubs/bibstruct.py
@ -1,6 +1,8 @@
 import unicodedata
 import re
 from .p3 import ustr
    # citekey stuff
 CONTROL_CHARS = ''.join(map(unichr, range(0, 32) + range(127, 160)))
@ -10,14 +12,14 @@ CITEKEY_EXCLUDE_RE = re.compile('[%s]'
        % re.escape(CONTROL_CHARS + CITEKEY_FORBIDDEN_CHARS))
 def str2citekey(s):
-    key = unicodedata.normalize('NFKD', unicode(s)).encode('ascii', 'ignore')
+    key = unicodedata.normalize('NFKD', ustr(s)).encode('ascii', 'ignore')
    key = CITEKEY_EXCLUDE_RE.sub('', key)
    # Normalize chars and remove non-ascii
    return key
 def check_citekey(citekey):
    # TODO This is not the right way to test that (17/12/2012)
-    if unicode(citekey) != str2citekey(citekey):
+    if ustr(citekey) != str2citekey(citekey):
        raise ValueError("Invalid citekey: %s" % citekey)
 def verify_bibdata(bibdata):
--- a/pubs/endecoder.py
+++ b/pubs/endecoder.py
@ -20,6 +20,7 @@ def sanitize_citekey(record):
    record['id'] = record['id'].strip('\n')
    return record
 def customizations(record):
    """ Use some functions delivered by the library
@ -75,6 +76,8 @@ class EnDecoder(object):
            return ' and '.join(editor['name'] for editor in value)
        elif key == 'journal':
            return value['name']
        elif key == 'keyword':
            return ', '.join(keyword for keyword in value)
        else:
            return value
--- a/pubs/p3.py
+++ b/pubs/p3.py
@ -4,10 +4,12 @@ if sys.version_info[0] == 2:
    import ConfigParser as configparser
    import StringIO as io
    input = raw_input
    ustr = unicode
 else:
    import configparser
    import io
    ustr = str
 configparser = configparser
 io = io
-input = input
+input = input
--- a/pubs/uis.py
+++ b/pubs/uis.py
@ -4,6 +4,7 @@ import sys
 from .beets_ui import _encoding, input_
 from .content import editor_input
 from .p3 import ustr
 from . import color
 # package-shared ui that can be accessed using :
@ -39,7 +40,7 @@ class UI:
        replaces it.
        """
        txt = [s.encode(self.encoding, 'replace')
-               if isinstance(s, unicode) else s
+               if isinstance(s, ustr) else s
               for s in strings]
        print(' '.join(txt))
--- a/tests/test_endecoder.py
+++ b/tests/test_endecoder.py
@ -51,15 +51,39 @@ class TestEnDecode(unittest.TestCase):
            bibentry2 = entry2[citekey]
            for key, value in bibentry1.items():
                self.assertEqual(bibentry1[key], bibentry2[key])
        self.assertEqual(bibraw1, bibraw2)
-    def test_endecode_metadata(self):
+    def test_endecode_keyword(self):
        decoder = endecoder.EnDecoder()
        entry = decoder.decode_bibdata(turing_bib)
        keywords = ['artificial intelligence', 'Turing test']
        entry['turing1950computing']['keyword'] = keywords
        bibraw = decoder.encode_bibdata(entry)
        entry1 = decoder.decode_bibdata(bibraw)
        self.assertIn('keyword', entry1['turing1950computing'])
        self.assertEqual(set(keywords),
                         set(entry1['turing1950computing']['keyword']))
    def test_endecode_keyword_as_keywords(self):
        decoder = endecoder.EnDecoder()
        keywords = [u'artificial intelligence', u'Turing test']
        # Add keywords to bibraw
        keyword_str = 'keywords = {artificial intelligence, Turing test},\n'
        biblines = turing_bib.splitlines()
        biblines.insert(-3, keyword_str)
        bibsrc = '\n'.join(biblines)
        print bibsrc
        entry = decoder.decode_bibdata(bibsrc)['turing1950computing']
        print entry
        self.assertNotIn(u'keywords', entry)
        self.assertIn(u'keyword', entry)
        self.assertEqual(set(keywords), set(entry[u'keyword']))
    def test_endecode_metadata(self):
        decoder = endecoder.EnDecoder()
        entry = decoder.decode_metadata(metadata_raw0)
        metadata_output0 = decoder.encode_metadata(entry)
        self.assertEqual(set(metadata_raw0.split('\n')), set(metadata_output0.split('\n')))