Fix encoding of keywords.
Also introduces ustr instead of unicode for python2 and str for python3.
This commit is contained in:
parent
9b6f6db297
commit
7713e5d80e
@ -1,6 +1,8 @@
|
||||
import unicodedata
|
||||
import re
|
||||
|
||||
from .p3 import ustr
|
||||
|
||||
# citekey stuff
|
||||
|
||||
CONTROL_CHARS = ''.join(map(unichr, range(0, 32) + range(127, 160)))
|
||||
@ -10,14 +12,14 @@ CITEKEY_EXCLUDE_RE = re.compile('[%s]'
|
||||
% re.escape(CONTROL_CHARS + CITEKEY_FORBIDDEN_CHARS))
|
||||
|
||||
def str2citekey(s):
|
||||
key = unicodedata.normalize('NFKD', unicode(s)).encode('ascii', 'ignore')
|
||||
key = unicodedata.normalize('NFKD', ustr(s)).encode('ascii', 'ignore')
|
||||
key = CITEKEY_EXCLUDE_RE.sub('', key)
|
||||
# Normalize chars and remove non-ascii
|
||||
return key
|
||||
|
||||
def check_citekey(citekey):
|
||||
# TODO This is not the right way to test that (17/12/2012)
|
||||
if unicode(citekey) != str2citekey(citekey):
|
||||
if ustr(citekey) != str2citekey(citekey):
|
||||
raise ValueError("Invalid citekey: %s" % citekey)
|
||||
|
||||
def verify_bibdata(bibdata):
|
||||
|
@ -20,6 +20,7 @@ def sanitize_citekey(record):
|
||||
record['id'] = record['id'].strip('\n')
|
||||
return record
|
||||
|
||||
|
||||
def customizations(record):
|
||||
""" Use some functions delivered by the library
|
||||
|
||||
@ -75,6 +76,8 @@ class EnDecoder(object):
|
||||
return ' and '.join(editor['name'] for editor in value)
|
||||
elif key == 'journal':
|
||||
return value['name']
|
||||
elif key == 'keyword':
|
||||
return ', '.join(keyword for keyword in value)
|
||||
else:
|
||||
return value
|
||||
|
||||
|
@ -4,9 +4,11 @@ if sys.version_info[0] == 2:
|
||||
import ConfigParser as configparser
|
||||
import StringIO as io
|
||||
input = raw_input
|
||||
ustr = unicode
|
||||
else:
|
||||
import configparser
|
||||
import io
|
||||
ustr = str
|
||||
|
||||
configparser = configparser
|
||||
io = io
|
||||
|
@ -4,6 +4,7 @@ import sys
|
||||
|
||||
from .beets_ui import _encoding, input_
|
||||
from .content import editor_input
|
||||
from .p3 import ustr
|
||||
from . import color
|
||||
|
||||
# package-shared ui that can be accessed using :
|
||||
@ -39,7 +40,7 @@ class UI:
|
||||
replaces it.
|
||||
"""
|
||||
txt = [s.encode(self.encoding, 'replace')
|
||||
if isinstance(s, unicode) else s
|
||||
if isinstance(s, ustr) else s
|
||||
for s in strings]
|
||||
print(' '.join(txt))
|
||||
|
||||
|
@ -51,15 +51,39 @@ class TestEnDecode(unittest.TestCase):
|
||||
bibentry2 = entry2[citekey]
|
||||
for key, value in bibentry1.items():
|
||||
self.assertEqual(bibentry1[key], bibentry2[key])
|
||||
|
||||
self.assertEqual(bibraw1, bibraw2)
|
||||
|
||||
def test_endecode_metadata(self):
|
||||
def test_endecode_keyword(self):
|
||||
decoder = endecoder.EnDecoder()
|
||||
entry = decoder.decode_bibdata(turing_bib)
|
||||
keywords = ['artificial intelligence', 'Turing test']
|
||||
entry['turing1950computing']['keyword'] = keywords
|
||||
bibraw = decoder.encode_bibdata(entry)
|
||||
entry1 = decoder.decode_bibdata(bibraw)
|
||||
self.assertIn('keyword', entry1['turing1950computing'])
|
||||
self.assertEqual(set(keywords),
|
||||
set(entry1['turing1950computing']['keyword']))
|
||||
|
||||
def test_endecode_keyword_as_keywords(self):
|
||||
decoder = endecoder.EnDecoder()
|
||||
keywords = [u'artificial intelligence', u'Turing test']
|
||||
# Add keywords to bibraw
|
||||
keyword_str = 'keywords = {artificial intelligence, Turing test},\n'
|
||||
biblines = turing_bib.splitlines()
|
||||
biblines.insert(-3, keyword_str)
|
||||
bibsrc = '\n'.join(biblines)
|
||||
print bibsrc
|
||||
entry = decoder.decode_bibdata(bibsrc)['turing1950computing']
|
||||
print entry
|
||||
self.assertNotIn(u'keywords', entry)
|
||||
self.assertIn(u'keyword', entry)
|
||||
self.assertEqual(set(keywords), set(entry[u'keyword']))
|
||||
|
||||
|
||||
def test_endecode_metadata(self):
|
||||
decoder = endecoder.EnDecoder()
|
||||
entry = decoder.decode_metadata(metadata_raw0)
|
||||
metadata_output0 = decoder.encode_metadata(entry)
|
||||
|
||||
self.assertEqual(set(metadata_raw0.split('\n')), set(metadata_output0.split('\n')))
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user