diff --git a/pubs/bibstruct.py b/pubs/bibstruct.py index d3e6614..1d19f4f 100644 --- a/pubs/bibstruct.py +++ b/pubs/bibstruct.py @@ -1,10 +1,19 @@ from __future__ import unicode_literals +try: + import __builtin__ +except Exception: + # Python 3.x + import builtins + if 'unicode' not in builtins.__dict__.keys(): + unicode = str import unicodedata import re +from string import Formatter from .p3 import ustr, uchr + # Citekey stuff TYPE_KEY = 'ENTRYTYPE' @@ -54,28 +63,64 @@ def author_last(author_str): def valid_citekey(citekey): """Return if a citekey is a valid filename or not""" # FIXME: a bit crude, but efficient for now (and allows unicode citekeys) - return not '/' in citekey + return '/' not in citekey + + +class CitekeyFormatter(Formatter): + def __init__(self): + super(CitekeyFormatter, self).__init__() + + def format_field(self, val, fmt): + if len(fmt) > 0 and fmt[0] == 'u': + s = str(val).upper() + fmt = fmt[1:] + elif len(fmt) > 0 and fmt[0] == 'l': + s = str(val).lower() + fmt = fmt[1:] + else: + s = val + return str2citekey(s.__format__(fmt)) + + def get_value(self, key, args, entry): + if isinstance(key, (str, unicode)): + okey = key + if key == 'author' and 'author' not in entry: + key = 'editor' + elif key == 'editor' and 'editor' not in entry: + key = 'author' + + if key == 'author_last_name' and 'author' in entry: + return author_last(entry['author'][0]) + if key == 'short_title' and 'title' in entry: + return get_first_word(entry['title']) + else: + if key in entry: + return entry[key] + else: + raise ValueError( + "No {} defined: cannot generate a citekey.".format(okey)) + else: + raise ValueError('Key must be a str instance') + + +def get_first_word(title): + """ + Returns the first word of the title as used in Google Scholar or Arxiv citekeys + """ + title = re.split(r'[^a-zA-Z0-9]', title) + word_blacklist = {'and', 'on', 'in', 'of', 'the', 'a', 'an', 'at'} + word = next((x for x in title if x and x.lower() not in word_blacklist), None) + return word -def generate_citekey(bibdata): +def generate_citekey(bibdata, format_string='{author_last_name}{year}'): """ Generate a citekey from bib_data. :raise ValueError: if no author nor editor is defined. """ citekey, entry = get_entry(bibdata) - author_key = 'author' if 'author' in entry else 'editor' - try: - first_author = entry[author_key][0] - except KeyError: - raise ValueError( - "No author or editor defined: cannot generate a citekey.") - try: - year = entry['year'] - except KeyError: - year = '' - citekey = '{}{}'.format(''.join(author_last(first_author)), year) - - return str2citekey(citekey) + citekey = CitekeyFormatter().format(format_string, **entry) + return citekey def extract_docfile(bibdata, remove=False): diff --git a/pubs/commands/add_cmd.py b/pubs/commands/add_cmd.py index b56927e..d97f7ae 100644 --- a/pubs/commands/add_cmd.py +++ b/pubs/commands/add_cmd.py @@ -123,7 +123,10 @@ def command(conf, args): citekey = args.citekey if citekey is None: - base_key = bibstruct.extract_citekey(bibentry) + if conf['main']['normalize_citekey']: + base_key = bibstruct.generate_citekey(bibentry, conf['main']['citekey_format']) + else: + base_key = bibstruct.extract_citekey(bibentry) citekey = rp.unique_citekey(base_key, bibentry) elif citekey in rp: ui.error('citekey already exist {}.'.format(citekey)) diff --git a/pubs/config/spec.py b/pubs/config/spec.py index a323ada..8e379d7 100644 --- a/pubs/config/spec.py +++ b/pubs/config/spec.py @@ -35,6 +35,27 @@ max_authors = integer(default=3) # the full python stack is printed. debug = boolean(default=False) +# If true the citekey is normalized using the 'citekey_format' on adding new publications. +normalize_citekey = boolean(default=False) + +# String specifying how to format the citekey. All strings of +# the form '{{substitution:modifier}}' and '{{substitution}}' will +# be substituted with their appropriate values. The following +# substitutions are used: +# author_last_name: last name of the first author +# year: year of publication +# short_title: first word of the title (excluding words such as "the", "an", ...) +# modifiers: +# l: converts the text to lowercase +# u: converts the text to uppercase +# examples: +# {{author_last_name:l}}{{year}} generates 'yang2020' +# {{author_last_name}}{{year}}{{short_title}} generates 'Yang2020Towards' +# {{author_last_name:l}}{{year}}{{short_title:l}} generates 'yang2020towards' +# {{author_last_name:u}}{{year}} generates 'YANG2020' +# +citekey_format = string(default='{{author_last_name:l}}{{year}}{{short_title:l}}') + # which bibliographic fields to exclude from bibtex files. By default, none. # Please note that excluding critical fields such as `title` or `author` # will break many commands of pubs. diff --git a/pubs/version.py b/pubs/version.py index b4e3540..21320a8 100644 --- a/pubs/version.py +++ b/pubs/version.py @@ -1 +1 @@ -__version__ = '0.8.3' +__version__ = '0.8.4' diff --git a/tests/test_bibstruct.py b/tests/test_bibstruct.py index 05af028..e3b4299 100644 --- a/tests/test_bibstruct.py +++ b/tests/test_bibstruct.py @@ -32,6 +32,46 @@ class TestGenerateCitekey(unittest.TestCase): key = bibstruct.generate_citekey(bibentry) self.assertEqual(key, 'Salinger1961') + def test_no_modifier(self): + template = '{author_last_name}{year}' + bibentry = copy.deepcopy(fixtures.doe_bibentry) + key = bibstruct.generate_citekey(bibentry, template) + self.assertEqual(key, 'Doe2013') + + bibentry = copy.deepcopy(fixtures.franny_bibentry) + key = bibstruct.generate_citekey(bibentry, template) + self.assertEqual(key, 'Salinger1961') + + def test_all_keys(self): + template = '{author_last_name}-{year}-{short_title}' + bibentry = copy.deepcopy(fixtures.doe_bibentry) + key = bibstruct.generate_citekey(bibentry, template) + self.assertEqual(key, 'Doe-2013-Nice') + + bibentry = copy.deepcopy(fixtures.franny_bibentry) + key = bibstruct.generate_citekey(bibentry, template) + self.assertEqual(key, 'Salinger-1961-Franny') + + def test_l_modifier(self): + template = '{author_last_name:l}{year:l}' + bibentry = copy.deepcopy(fixtures.doe_bibentry) + key = bibstruct.generate_citekey(bibentry, template) + self.assertEqual(key, 'doe2013') + + bibentry = copy.deepcopy(fixtures.franny_bibentry) + key = bibstruct.generate_citekey(bibentry, template) + self.assertEqual(key, 'salinger1961') + + def test_u_modifier(self): + template = '{author_last_name:u}{year:u}' + bibentry = copy.deepcopy(fixtures.doe_bibentry) + key = bibstruct.generate_citekey(bibentry, template) + self.assertEqual(key, 'DOE2013') + + bibentry = copy.deepcopy(fixtures.franny_bibentry) + key = bibstruct.generate_citekey(bibentry, template) + self.assertEqual(key, 'SALINGER1961', template) + if __name__ == '__main__': unittest.main()