Fix encoding of keywords.
Also introduces ustr instead of unicode for python2 and str for python3.
This commit is contained in:
parent
9b6f6db297
commit
7713e5d80e
@ -1,6 +1,8 @@
|
|||||||
import unicodedata
|
import unicodedata
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
from .p3 import ustr
|
||||||
|
|
||||||
# citekey stuff
|
# citekey stuff
|
||||||
|
|
||||||
CONTROL_CHARS = ''.join(map(unichr, range(0, 32) + range(127, 160)))
|
CONTROL_CHARS = ''.join(map(unichr, range(0, 32) + range(127, 160)))
|
||||||
@ -10,14 +12,14 @@ CITEKEY_EXCLUDE_RE = re.compile('[%s]'
|
|||||||
% re.escape(CONTROL_CHARS + CITEKEY_FORBIDDEN_CHARS))
|
% re.escape(CONTROL_CHARS + CITEKEY_FORBIDDEN_CHARS))
|
||||||
|
|
||||||
def str2citekey(s):
|
def str2citekey(s):
|
||||||
key = unicodedata.normalize('NFKD', unicode(s)).encode('ascii', 'ignore')
|
key = unicodedata.normalize('NFKD', ustr(s)).encode('ascii', 'ignore')
|
||||||
key = CITEKEY_EXCLUDE_RE.sub('', key)
|
key = CITEKEY_EXCLUDE_RE.sub('', key)
|
||||||
# Normalize chars and remove non-ascii
|
# Normalize chars and remove non-ascii
|
||||||
return key
|
return key
|
||||||
|
|
||||||
def check_citekey(citekey):
|
def check_citekey(citekey):
|
||||||
# TODO This is not the right way to test that (17/12/2012)
|
# TODO This is not the right way to test that (17/12/2012)
|
||||||
if unicode(citekey) != str2citekey(citekey):
|
if ustr(citekey) != str2citekey(citekey):
|
||||||
raise ValueError("Invalid citekey: %s" % citekey)
|
raise ValueError("Invalid citekey: %s" % citekey)
|
||||||
|
|
||||||
def verify_bibdata(bibdata):
|
def verify_bibdata(bibdata):
|
||||||
|
@ -20,6 +20,7 @@ def sanitize_citekey(record):
|
|||||||
record['id'] = record['id'].strip('\n')
|
record['id'] = record['id'].strip('\n')
|
||||||
return record
|
return record
|
||||||
|
|
||||||
|
|
||||||
def customizations(record):
|
def customizations(record):
|
||||||
""" Use some functions delivered by the library
|
""" Use some functions delivered by the library
|
||||||
|
|
||||||
@ -75,6 +76,8 @@ class EnDecoder(object):
|
|||||||
return ' and '.join(editor['name'] for editor in value)
|
return ' and '.join(editor['name'] for editor in value)
|
||||||
elif key == 'journal':
|
elif key == 'journal':
|
||||||
return value['name']
|
return value['name']
|
||||||
|
elif key == 'keyword':
|
||||||
|
return ', '.join(keyword for keyword in value)
|
||||||
else:
|
else:
|
||||||
return value
|
return value
|
||||||
|
|
||||||
|
@ -4,10 +4,12 @@ if sys.version_info[0] == 2:
|
|||||||
import ConfigParser as configparser
|
import ConfigParser as configparser
|
||||||
import StringIO as io
|
import StringIO as io
|
||||||
input = raw_input
|
input = raw_input
|
||||||
|
ustr = unicode
|
||||||
else:
|
else:
|
||||||
import configparser
|
import configparser
|
||||||
import io
|
import io
|
||||||
|
ustr = str
|
||||||
|
|
||||||
configparser = configparser
|
configparser = configparser
|
||||||
io = io
|
io = io
|
||||||
input = input
|
input = input
|
||||||
|
@ -4,6 +4,7 @@ import sys
|
|||||||
|
|
||||||
from .beets_ui import _encoding, input_
|
from .beets_ui import _encoding, input_
|
||||||
from .content import editor_input
|
from .content import editor_input
|
||||||
|
from .p3 import ustr
|
||||||
from . import color
|
from . import color
|
||||||
|
|
||||||
# package-shared ui that can be accessed using :
|
# package-shared ui that can be accessed using :
|
||||||
@ -39,7 +40,7 @@ class UI:
|
|||||||
replaces it.
|
replaces it.
|
||||||
"""
|
"""
|
||||||
txt = [s.encode(self.encoding, 'replace')
|
txt = [s.encode(self.encoding, 'replace')
|
||||||
if isinstance(s, unicode) else s
|
if isinstance(s, ustr) else s
|
||||||
for s in strings]
|
for s in strings]
|
||||||
print(' '.join(txt))
|
print(' '.join(txt))
|
||||||
|
|
||||||
|
@ -51,15 +51,39 @@ class TestEnDecode(unittest.TestCase):
|
|||||||
bibentry2 = entry2[citekey]
|
bibentry2 = entry2[citekey]
|
||||||
for key, value in bibentry1.items():
|
for key, value in bibentry1.items():
|
||||||
self.assertEqual(bibentry1[key], bibentry2[key])
|
self.assertEqual(bibentry1[key], bibentry2[key])
|
||||||
|
|
||||||
self.assertEqual(bibraw1, bibraw2)
|
self.assertEqual(bibraw1, bibraw2)
|
||||||
|
|
||||||
def test_endecode_metadata(self):
|
def test_endecode_keyword(self):
|
||||||
|
decoder = endecoder.EnDecoder()
|
||||||
|
entry = decoder.decode_bibdata(turing_bib)
|
||||||
|
keywords = ['artificial intelligence', 'Turing test']
|
||||||
|
entry['turing1950computing']['keyword'] = keywords
|
||||||
|
bibraw = decoder.encode_bibdata(entry)
|
||||||
|
entry1 = decoder.decode_bibdata(bibraw)
|
||||||
|
self.assertIn('keyword', entry1['turing1950computing'])
|
||||||
|
self.assertEqual(set(keywords),
|
||||||
|
set(entry1['turing1950computing']['keyword']))
|
||||||
|
|
||||||
|
def test_endecode_keyword_as_keywords(self):
|
||||||
|
decoder = endecoder.EnDecoder()
|
||||||
|
keywords = [u'artificial intelligence', u'Turing test']
|
||||||
|
# Add keywords to bibraw
|
||||||
|
keyword_str = 'keywords = {artificial intelligence, Turing test},\n'
|
||||||
|
biblines = turing_bib.splitlines()
|
||||||
|
biblines.insert(-3, keyword_str)
|
||||||
|
bibsrc = '\n'.join(biblines)
|
||||||
|
print bibsrc
|
||||||
|
entry = decoder.decode_bibdata(bibsrc)['turing1950computing']
|
||||||
|
print entry
|
||||||
|
self.assertNotIn(u'keywords', entry)
|
||||||
|
self.assertIn(u'keyword', entry)
|
||||||
|
self.assertEqual(set(keywords), set(entry[u'keyword']))
|
||||||
|
|
||||||
|
|
||||||
|
def test_endecode_metadata(self):
|
||||||
decoder = endecoder.EnDecoder()
|
decoder = endecoder.EnDecoder()
|
||||||
entry = decoder.decode_metadata(metadata_raw0)
|
entry = decoder.decode_metadata(metadata_raw0)
|
||||||
metadata_output0 = decoder.encode_metadata(entry)
|
metadata_output0 = decoder.encode_metadata(entry)
|
||||||
|
|
||||||
self.assertEqual(set(metadata_raw0.split('\n')), set(metadata_output0.split('\n')))
|
self.assertEqual(set(metadata_raw0.split('\n')), set(metadata_output0.split('\n')))
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user