From c54de5c3b6506aab3f3b8a603128a843235323f8 Mon Sep 17 00:00:00 2001 From: Olivier Mangin Date: Mon, 18 Dec 2017 18:02:47 -0500 Subject: [PATCH 1/6] Refactors paper filtering from queries. --- pubs/commands/list_cmd.py | 117 +++++++++++++++++++++----------------- tests/test_queries.py | 79 +++++++++++++------------ 2 files changed, 105 insertions(+), 91 deletions(-) diff --git a/pubs/commands/list_cmd.py b/pubs/commands/list_cmd.py index 5884cdc..f3fa7b9 100644 --- a/pubs/commands/list_cmd.py +++ b/pubs/commands/list_cmd.py @@ -13,21 +13,21 @@ class InvalidQuery(ValueError): def parser(subparsers, conf): parser = subparsers.add_parser('list', help="list papers") parser.add_argument('-k', '--citekeys-only', action='store_true', - default=False, dest='citekeys', - help='Only returns citekeys of matching papers.') + default=False, dest='citekeys', + help='Only returns citekeys of matching papers.') parser.add_argument('-i', '--ignore-case', action='store_false', - default=None, dest='case_sensitive') + default=None, dest='case_sensitive') parser.add_argument('-I', '--force-case', action='store_true', - dest='case_sensitive') + dest='case_sensitive') parser.add_argument('-a', '--alphabetical', action='store_true', - dest='alphabetical', default=False, - help='lexicographic order on the citekeys.') + dest='alphabetical', default=False, + help='lexicographic order on the citekeys.') parser.add_argument('--no-docs', action='store_true', - dest='nodocs', default=False, - help='list only pubs without attached documents.') - + dest='nodocs', default=False, + help='list only pubs without attached documents.') parser.add_argument('query', nargs='*', - help='Paper query ("author:Einstein", "title:learning", "year:2000" or "tags:math")') + help='Paper query ("author:Einstein", "title:learning",' + '"year:2000" or "tags:math")') return parser @@ -38,8 +38,8 @@ def date_added(p): def command(conf, args): ui = get_ui() rp = repo.Repository(conf) - papers = filter(lambda p: filter_paper(p, args.query, - case_sensitive=args.case_sensitive), + papers = filter(get_paper_filter(args.query, + case_sensitive=args.case_sensitive), rp.all_papers()) if args.nodocs: papers = [p for p in papers if p.docpath is None] @@ -60,67 +60,82 @@ FIELD_ALIASES = { 'authors': 'author', 't': 'title', 'tags': 'tag', - } + 'y': 'year', +} -def _get_field_value(query_block): - split_block = query_block.split(':') - if len(split_block) != 2: - raise InvalidQuery("Invalid query (%s)" % query_block) - field = split_block[0] - if field in FIELD_ALIASES: - field = FIELD_ALIASES[field] - value = split_block[1] - return (field, value) +class QueryFilter(object): + + def __init__(self, query, case_sensitive=None): + if case_sensitive is None: + case_sensitive = not query.islower() + self.case = case_sensitive + self.query = self._lower(query) + + def __call__(self, paper): + raise NotImplementedError + + def _lower(self, s): + return s if self.case else s.lower() -def _lower(s, lower=True): - return s.lower() if lower else s +class FieldFilter(QueryFilter): + """Generic filter of form `query in paper['field']`""" + def __init__(self, field, query, case_sensitive=None): + super(FieldFilter, self).__init__(query, case_sensitive=case_sensitive) + self.field = field -def _check_author_match(paper, query, case_sensitive=False): - """Only checks within last names.""" - if not 'author' in paper.bibdata: - return False - return any([query in _lower(bibstruct.author_last(p), lower=(not case_sensitive)) - for p in paper.bibdata['author']]) + def __call__(self, paper): + return (self.field in paper.bibdata and + self.query in self._lower(paper.bibdata[self.field])) +class AuthorFilter(QueryFilter): -def _check_tag_match(paper, query, case_sensitive=False): - return any([query in _lower(t, lower=(not case_sensitive)) - for t in paper.tags]) + def __call__(self, paper): + """Only checks within last names.""" + if 'author' not in paper.bibdata: + return False + else: + return any([self.query in self._lower(bibstruct.author_last(author)) + for author in paper.bibdata['author']]) -def _check_field_match(paper, field, query, case_sensitive=False): - return query in _lower(paper.bibdata[field], - lower=(not case_sensitive)) +class TagFilter(QueryFilter): + + def __call__(self, paper): + return any([self.query in self._lower(t) for t in paper.tags]) + + +def _get_field_value(query_block): + split_block = query_block.split(':') + if len(split_block) != 2: + raise InvalidQuery("Invalid query (%s)" % query_block) + field = split_block[0] + if field in FIELD_ALIASES: + field = FIELD_ALIASES[field] + value = split_block[1] + return (field, value) -def _check_query_block(paper, query_block, case_sensitive=None): +def _query_block_to_filter(query_block, case_sensitive=None): field, value = _get_field_value(query_block) - if case_sensitive is None: - case_sensitive = not value.islower() - elif not case_sensitive: - value = value.lower() if field == 'tag': - return _check_tag_match(paper, value, case_sensitive=case_sensitive) + return TagFilter(value, case_sensitive=case_sensitive) elif field == 'author': - return _check_author_match(paper, value, case_sensitive=case_sensitive) - elif field in paper.bibdata: - return _check_field_match(paper, field, value, - case_sensitive=case_sensitive) + return AuthorFilter(value, case_sensitive=case_sensitive) else: - return False + return FieldFilter(field, value, case_sensitive=case_sensitive) # TODO implement search by type of document -def filter_paper(paper, query, case_sensitive=None): +def get_paper_filter(query, case_sensitive=None): """If case_sensitive is not given, only check case if query is not lowercase. :args query: list of query blocks (strings) """ - return all([_check_query_block(paper, query_block, - case_sensitive=case_sensitive) - for query_block in query]) + filters = [_query_block_to_filter(query_block, case_sensitive=case_sensitive) + for query_block in query] + return lambda paper: all([f(paper) for f in filters]) diff --git a/tests/test_queries.py b/tests/test_queries.py index 6ea7a10..1e52af0 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -1,10 +1,10 @@ import unittest import dotdot -from pubs.commands.list_cmd import (_check_author_match, - _check_field_match, - _check_query_block, - filter_paper, +from pubs.commands.list_cmd import (AuthorFilter, + FieldFilter, + _query_block_to_filter, + get_paper_filter, InvalidQuery) from pubs.paper import Paper @@ -16,28 +16,30 @@ page_paper = Paper.from_bibentry(fixtures.page_bibentry) turing_paper = Paper.from_bibentry(fixtures.turing_bibentry, metadata=fixtures.turing_metadata) + class TestAuthorFilter(unittest.TestCase): def test_fails_if_no_author(self): no_doe = doe_paper.deepcopy() no_doe.bibentry['author'] = [] - self.assertTrue(not _check_author_match(no_doe, 'whatever')) + self.assertTrue(not AuthorFilter('whatever')(no_doe)) def test_match_case(self): - self.assertTrue(_check_author_match(doe_paper, 'doe')) - self.assertTrue(_check_author_match(doe_paper, 'doe', - case_sensitive=False)) + self.assertTrue(AuthorFilter('doe')(doe_paper)) + self.assertTrue(AuthorFilter('doe', case_sensitive=False)(doe_paper)) + self.assertTrue(AuthorFilter('Doe')(doe_paper)) def test_do_not_match_case(self): - self.assertFalse(_check_author_match(doe_paper, 'dOe')) - self.assertFalse(_check_author_match(doe_paper, 'doe', - case_sensitive=True)) + self.assertFalse(AuthorFilter('dOe')(doe_paper)) + self.assertFalse(AuthorFilter('dOe', case_sensitive=True)(doe_paper)) + self.assertFalse(AuthorFilter('doe', case_sensitive=True)(doe_paper)) + self.assertTrue(AuthorFilter('dOe', case_sensitive=False)(doe_paper)) def test_match_not_first_author(self): - self.assertTrue(_check_author_match(page_paper, 'motwani')) + self.assertTrue(AuthorFilter('motwani')(page_paper)) def test_do_not_match_first_name(self): - self.assertTrue(not _check_author_match(page_paper, 'larry')) + self.assertTrue(not AuthorFilter('larry')(page_paper)) class TestCheckTag(unittest.TestCase): @@ -47,55 +49,52 @@ class TestCheckTag(unittest.TestCase): class TestCheckField(unittest.TestCase): def test_match_case(self): - self.assertTrue(_check_field_match(doe_paper, 'title', 'nice')) - self.assertTrue(_check_field_match(doe_paper, 'title', 'nice', - case_sensitive=False)) - self.assertTrue(_check_field_match(doe_paper, 'year', '2013')) + self.assertTrue(FieldFilter('title', 'nice')(doe_paper)) + self.assertTrue( + FieldFilter('title', 'nice', case_sensitive=False)(doe_paper)) + self.assertTrue(FieldFilter('year', '2013')(doe_paper)) def test_do_not_match_case(self): - self.assertTrue(_check_field_match(doe_paper, 'title', - 'Title', case_sensitive=True)) - self.assertFalse(_check_field_match(doe_paper, 'title', 'nice', - case_sensitive=True)) + self.assertTrue( + FieldFilter('title', 'Title', case_sensitive=True)(doe_paper)) + self.assertFalse( + FieldFilter('title', 'nice', case_sensitive=True)(doe_paper)) class TestCheckQueryBlock(unittest.TestCase): def test_raise_invalid_if_no_value(self): with self.assertRaises(InvalidQuery): - _check_query_block(doe_paper, 'title') + _query_block_to_filter('title') def test_raise_invalid_if_too_much(self): with self.assertRaises(InvalidQuery): - _check_query_block(doe_paper, 'whatever:value:too_much') + _query_block_to_filter('whatever:value:too_much') class TestFilterPaper(unittest.TestCase): def test_case(self): - self.assertTrue (filter_paper(doe_paper, ['title:nice'])) - self.assertTrue (filter_paper(doe_paper, ['title:Nice'])) - self.assertFalse(filter_paper(doe_paper, ['title:nIce'])) + self.assertTrue(get_paper_filter(['title:nice'])(doe_paper)) + self.assertTrue(get_paper_filter(['title:Nice'])(doe_paper)) + self.assertFalse(get_paper_filter(['title:nIce'])(doe_paper)) def test_fields(self): - self.assertTrue (filter_paper(doe_paper, ['year:2013'])) - self.assertFalse(filter_paper(doe_paper, ['year:2014'])) - self.assertTrue (filter_paper(doe_paper, ['author:doe'])) - self.assertTrue (filter_paper(doe_paper, ['author:Doe'])) + self.assertTrue(get_paper_filter(['year:2013'])(doe_paper)) + self.assertFalse(get_paper_filter(['year:2014'])(doe_paper)) + self.assertTrue(get_paper_filter(['author:doe'])(doe_paper)) + self.assertTrue(get_paper_filter(['author:Doe'])(doe_paper)) def test_tags(self): - self.assertTrue (filter_paper(turing_paper, ['tag:computer'])) - self.assertFalse(filter_paper(turing_paper, ['tag:Ai'])) - self.assertTrue (filter_paper(turing_paper, ['tag:AI'])) - self.assertTrue (filter_paper(turing_paper, ['tag:ai'])) + self.assertTrue(get_paper_filter(['tag:computer'])(turing_paper)) + self.assertFalse(get_paper_filter(['tag:Ai'])(turing_paper)) + self.assertTrue(get_paper_filter(['tag:AI'])(turing_paper)) + self.assertTrue(get_paper_filter(['tag:ai'])(turing_paper)) def test_multiple(self): - self.assertTrue (filter_paper(doe_paper, - ['author:doe', 'year:2013'])) - self.assertFalse(filter_paper(doe_paper, - ['author:doe', 'year:2014'])) - self.assertFalse(filter_paper(doe_paper, - ['author:doee', 'year:2014'])) + self.assertTrue(get_paper_filter(['author:doe', 'year:2013'])(doe_paper)) + self.assertFalse(get_paper_filter(['author:doe', 'year:2014'])(doe_paper)) + self.assertFalse(get_paper_filter(['author:doee', 'year:2014'])(doe_paper)) if __name__ == '__main__': From 84553d8eb37fc0c7d6c753212f09022049a5efb8 Mon Sep 17 00:00:00 2001 From: Olivier Mangin Date: Mon, 18 Dec 2017 18:39:17 -0500 Subject: [PATCH 2/6] [Fix #90] Adds year ranges to queries. --- pubs/commands/list_cmd.py | 40 ++++++++++++++++++++++++++++++++++++++- tests/test_queries.py | 27 ++++++++++++++++++++++++++ 2 files changed, 66 insertions(+), 1 deletion(-) diff --git a/pubs/commands/list_cmd.py b/pubs/commands/list_cmd.py index f3fa7b9..de44549 100644 --- a/pubs/commands/list_cmd.py +++ b/pubs/commands/list_cmd.py @@ -27,7 +27,7 @@ def parser(subparsers, conf): help='list only pubs without attached documents.') parser.add_argument('query', nargs='*', help='Paper query ("author:Einstein", "title:learning",' - '"year:2000" or "tags:math")') + '"year:2000", "year:2000-2010, or "tags:math")') return parser @@ -108,6 +108,44 @@ class TagFilter(QueryFilter): return any([self.query in self._lower(t) for t in paper.tags]) +class YearFilter(QueryFilter): + """Note: a query like `year:` or `year:-` would match any paper + whose year field is set and can be converted to an int. + """ + + def __init__(self, query, case_sensitive=None): + split = query.split('-') + self.start = self._str_to_year(split[0]) + if len(split) == 1: + self.end = self.start + elif len(split) == 2: + self.end = self._str_to_year(split[1]) + if (len(split) > 2 or ( + self.start is not None and + self.end is not None and + self.start > self.end)): + raise ValueError('Invalid year range "{}"'.format(query)) + + def __call__(self, paper): + """Only checks within last names.""" + if 'year' not in paper.bibdata: + return False + else: + try: + year = int(paper.bibdata['year']) + return ((self.start is None or year >= self.start) and + (self.end is None or year <= self.end)) + except ValueError: + return False + + @staticmethod + def _str_to_year(s): + try: + return int(s) if s else None + except ValueError: + raise ValueError('Invalid year "{}"'.format(s)) + + def _get_field_value(query_block): split_block = query_block.split(':') if len(split_block) != 2: diff --git a/tests/test_queries.py b/tests/test_queries.py index 1e52af0..0edf6a3 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -3,6 +3,7 @@ import unittest import dotdot from pubs.commands.list_cmd import (AuthorFilter, FieldFilter, + YearFilter, _query_block_to_filter, get_paper_filter, InvalidQuery) @@ -46,6 +47,32 @@ class TestCheckTag(unittest.TestCase): pass +class TestCheckYear(unittest.TestCase): + + def test_single_year(self): + self.assertTrue(YearFilter('2013')(doe_paper)) + self.assertFalse(YearFilter('2014')(doe_paper)) + + def test_before_year(self): + self.assertTrue(YearFilter('-2013')(doe_paper)) + self.assertTrue(YearFilter('-2014')(doe_paper)) + self.assertFalse(YearFilter('-2012')(doe_paper)) + + def test_after_year(self): + self.assertTrue(YearFilter('2013-')(doe_paper)) + self.assertTrue(YearFilter('2012-')(doe_paper)) + self.assertFalse(YearFilter('2014-')(doe_paper)) + + def test_year_range(self): + self.assertTrue(YearFilter('')(doe_paper)) + self.assertTrue(YearFilter('-')(doe_paper)) + self.assertTrue(YearFilter('2013-2013')(doe_paper)) + self.assertTrue(YearFilter('2012-2014')(doe_paper)) + self.assertFalse(YearFilter('2014-2015')(doe_paper)) + with self.assertRaises(ValueError): + YearFilter('2015-2014')(doe_paper) + + class TestCheckField(unittest.TestCase): def test_match_case(self): From 832a20d74326356a218d513b81c9fc69f6ae932f Mon Sep 17 00:00:00 2001 From: Olivier Mangin Date: Mon, 18 Dec 2017 18:44:49 -0500 Subject: [PATCH 3/6] Typo. --- pubs/commands/list_cmd.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pubs/commands/list_cmd.py b/pubs/commands/list_cmd.py index de44549..02b962d 100644 --- a/pubs/commands/list_cmd.py +++ b/pubs/commands/list_cmd.py @@ -27,7 +27,7 @@ def parser(subparsers, conf): help='list only pubs without attached documents.') parser.add_argument('query', nargs='*', help='Paper query ("author:Einstein", "title:learning",' - '"year:2000", "year:2000-2010, or "tags:math")') + '"year:2000", "year:2000-2010", or "tags:math")') return parser From 0bd8d5b8dc2ea66b4e3cb4adac539eb86579c9e8 Mon Sep 17 00:00:00 2001 From: Olivier Mangin Date: Mon, 18 Dec 2017 18:47:41 -0500 Subject: [PATCH 4/6] Actually adds the functionality as well as a few more tests. --- pubs/commands/list_cmd.py | 2 ++ tests/test_queries.py | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/pubs/commands/list_cmd.py b/pubs/commands/list_cmd.py index 02b962d..4461ddc 100644 --- a/pubs/commands/list_cmd.py +++ b/pubs/commands/list_cmd.py @@ -163,6 +163,8 @@ def _query_block_to_filter(query_block, case_sensitive=None): return TagFilter(value, case_sensitive=case_sensitive) elif field == 'author': return AuthorFilter(value, case_sensitive=case_sensitive) + elif field == 'year': + return YearFilter(value) else: return FieldFilter(field, value, case_sensitive=case_sensitive) diff --git a/tests/test_queries.py b/tests/test_queries.py index 0edf6a3..c9d1982 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -108,6 +108,7 @@ class TestFilterPaper(unittest.TestCase): def test_fields(self): self.assertTrue(get_paper_filter(['year:2013'])(doe_paper)) + self.assertTrue(get_paper_filter(['year:2010-'])(doe_paper)) self.assertFalse(get_paper_filter(['year:2014'])(doe_paper)) self.assertTrue(get_paper_filter(['author:doe'])(doe_paper)) self.assertTrue(get_paper_filter(['author:Doe'])(doe_paper)) @@ -120,7 +121,8 @@ class TestFilterPaper(unittest.TestCase): def test_multiple(self): self.assertTrue(get_paper_filter(['author:doe', 'year:2013'])(doe_paper)) - self.assertFalse(get_paper_filter(['author:doe', 'year:2014'])(doe_paper)) + self.assertTrue(get_paper_filter(['author:doe', 'year:2010-2014'])(doe_paper)) + self.assertFalse(get_paper_filter(['author:doe', 'year:2014-'])(doe_paper)) self.assertFalse(get_paper_filter(['author:doee', 'year:2014'])(doe_paper)) From 3ad4a73da14fb34f88653278200ec6a727db0e9a Mon Sep 17 00:00:00 2001 From: Olivier Mangin Date: Fri, 5 Jan 2018 14:34:13 -0500 Subject: [PATCH 5/6] Moves query code from list_cmd to didicated module. --- pubs/commands/list_cmd.py | 135 +------------------------------------- pubs/query.py | 135 ++++++++++++++++++++++++++++++++++++++ tests/test_queries.py | 9 +-- 3 files changed, 140 insertions(+), 139 deletions(-) create mode 100644 pubs/query.py diff --git a/pubs/commands/list_cmd.py b/pubs/commands/list_cmd.py index 4461ddc..2c1ec90 100644 --- a/pubs/commands/list_cmd.py +++ b/pubs/commands/list_cmd.py @@ -2,12 +2,8 @@ from datetime import datetime from .. import repo from .. import pretty -from .. import bibstruct from ..uis import get_ui - - -class InvalidQuery(ValueError): - pass +from ..query import get_paper_filter, QUERY_HELP def parser(subparsers, conf): @@ -26,8 +22,7 @@ def parser(subparsers, conf): dest='nodocs', default=False, help='list only pubs without attached documents.') parser.add_argument('query', nargs='*', - help='Paper query ("author:Einstein", "title:learning",' - '"year:2000", "year:2000-2010", or "tags:math")') + help=QUERY_HELP) return parser @@ -53,129 +48,3 @@ def command(conf, args): for p in papers)) rp.close() - - -FIELD_ALIASES = { - 'a': 'author', - 'authors': 'author', - 't': 'title', - 'tags': 'tag', - 'y': 'year', -} - - -class QueryFilter(object): - - def __init__(self, query, case_sensitive=None): - if case_sensitive is None: - case_sensitive = not query.islower() - self.case = case_sensitive - self.query = self._lower(query) - - def __call__(self, paper): - raise NotImplementedError - - def _lower(self, s): - return s if self.case else s.lower() - - -class FieldFilter(QueryFilter): - """Generic filter of form `query in paper['field']`""" - - def __init__(self, field, query, case_sensitive=None): - super(FieldFilter, self).__init__(query, case_sensitive=case_sensitive) - self.field = field - - def __call__(self, paper): - return (self.field in paper.bibdata and - self.query in self._lower(paper.bibdata[self.field])) - - -class AuthorFilter(QueryFilter): - - def __call__(self, paper): - """Only checks within last names.""" - if 'author' not in paper.bibdata: - return False - else: - return any([self.query in self._lower(bibstruct.author_last(author)) - for author in paper.bibdata['author']]) - - -class TagFilter(QueryFilter): - - def __call__(self, paper): - return any([self.query in self._lower(t) for t in paper.tags]) - - -class YearFilter(QueryFilter): - """Note: a query like `year:` or `year:-` would match any paper - whose year field is set and can be converted to an int. - """ - - def __init__(self, query, case_sensitive=None): - split = query.split('-') - self.start = self._str_to_year(split[0]) - if len(split) == 1: - self.end = self.start - elif len(split) == 2: - self.end = self._str_to_year(split[1]) - if (len(split) > 2 or ( - self.start is not None and - self.end is not None and - self.start > self.end)): - raise ValueError('Invalid year range "{}"'.format(query)) - - def __call__(self, paper): - """Only checks within last names.""" - if 'year' not in paper.bibdata: - return False - else: - try: - year = int(paper.bibdata['year']) - return ((self.start is None or year >= self.start) and - (self.end is None or year <= self.end)) - except ValueError: - return False - - @staticmethod - def _str_to_year(s): - try: - return int(s) if s else None - except ValueError: - raise ValueError('Invalid year "{}"'.format(s)) - - -def _get_field_value(query_block): - split_block = query_block.split(':') - if len(split_block) != 2: - raise InvalidQuery("Invalid query (%s)" % query_block) - field = split_block[0] - if field in FIELD_ALIASES: - field = FIELD_ALIASES[field] - value = split_block[1] - return (field, value) - - -def _query_block_to_filter(query_block, case_sensitive=None): - field, value = _get_field_value(query_block) - if field == 'tag': - return TagFilter(value, case_sensitive=case_sensitive) - elif field == 'author': - return AuthorFilter(value, case_sensitive=case_sensitive) - elif field == 'year': - return YearFilter(value) - else: - return FieldFilter(field, value, case_sensitive=case_sensitive) - - -# TODO implement search by type of document -def get_paper_filter(query, case_sensitive=None): - """If case_sensitive is not given, only check case if query - is not lowercase. - - :args query: list of query blocks (strings) - """ - filters = [_query_block_to_filter(query_block, case_sensitive=case_sensitive) - for query_block in query] - return lambda paper: all([f(paper) for f in filters]) diff --git a/pubs/query.py b/pubs/query.py new file mode 100644 index 0000000..baa17ca --- /dev/null +++ b/pubs/query.py @@ -0,0 +1,135 @@ +from . import bibstruct + + +QUERY_HELP = ('Paper query ("author:Einstein", "title:learning",' + '"year:2000", "year:2000-2010", or "tags:math")') + + +FIELD_ALIASES = { + 'a': 'author', + 'authors': 'author', + 't': 'title', + 'tags': 'tag', + 'y': 'year', +} + + +class InvalidQuery(ValueError): + pass + + +class QueryFilter(object): + + def __init__(self, query, case_sensitive=None): + if case_sensitive is None: + case_sensitive = not query.islower() + self.case = case_sensitive + self.query = self._lower(query) + + def __call__(self, paper): + raise NotImplementedError + + def _lower(self, s): + return s if self.case else s.lower() + + +class FieldFilter(QueryFilter): + """Generic filter of form `query in paper['field']`""" + + def __init__(self, field, query, case_sensitive=None): + super(FieldFilter, self).__init__(query, case_sensitive=case_sensitive) + self.field = field + + def __call__(self, paper): + return (self.field in paper.bibdata and + self.query in self._lower(paper.bibdata[self.field])) + + +class AuthorFilter(QueryFilter): + + def __call__(self, paper): + """Only checks within last names.""" + if 'author' not in paper.bibdata: + return False + else: + return any([self.query in self._lower(bibstruct.author_last(author)) + for author in paper.bibdata['author']]) + + +class TagFilter(QueryFilter): + + def __call__(self, paper): + return any([self.query in self._lower(t) for t in paper.tags]) + + +class YearFilter(QueryFilter): + """Note: a query like `year:` or `year:-` would match any paper + whose year field is set and can be converted to an int. + """ + + def __init__(self, query, case_sensitive=None): + split = query.split('-') + self.start = self._str_to_year(split[0]) + if len(split) == 1: + self.end = self.start + elif len(split) == 2: + self.end = self._str_to_year(split[1]) + if (len(split) > 2 or ( + self.start is not None and + self.end is not None and + self.start > self.end)): + raise ValueError('Invalid year range "{}"'.format(query)) + + def __call__(self, paper): + """Only checks within last names.""" + if 'year' not in paper.bibdata: + return False + else: + try: + year = int(paper.bibdata['year']) + return ((self.start is None or year >= self.start) and + (self.end is None or year <= self.end)) + except ValueError: + return False + + @staticmethod + def _str_to_year(s): + try: + return int(s) if s else None + except ValueError: + raise ValueError('Invalid year "{}"'.format(s)) + + +def _get_field_value(query_block): + split_block = query_block.split(':') + if len(split_block) != 2: + raise InvalidQuery("Invalid query (%s)" % query_block) + field = split_block[0] + if field in FIELD_ALIASES: + field = FIELD_ALIASES[field] + value = split_block[1] + return (field, value) + + +def _query_block_to_filter(query_block, case_sensitive=None): + field, value = _get_field_value(query_block) + if field == 'tag': + return TagFilter(value, case_sensitive=case_sensitive) + elif field == 'author': + return AuthorFilter(value, case_sensitive=case_sensitive) + elif field == 'year': + return YearFilter(value) + else: + return FieldFilter(field, value, case_sensitive=case_sensitive) + + +# TODO implement search by type of document +def get_paper_filter(query, case_sensitive=None): + """If case_sensitive is not given, only check case if query + is not lowercase. + + :args query: list of query blocks (strings) + """ + filters = [_query_block_to_filter(query_block, case_sensitive=case_sensitive) + for query_block in query] + return lambda paper: all([f(paper) for f in filters]) diff --git a/tests/test_queries.py b/tests/test_queries.py index c9d1982..b97af6b 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -1,12 +1,9 @@ import unittest import dotdot -from pubs.commands.list_cmd import (AuthorFilter, - FieldFilter, - YearFilter, - _query_block_to_filter, - get_paper_filter, - InvalidQuery) +from pubs.query import (AuthorFilter, FieldFilter, YearFilter, + _query_block_to_filter, get_paper_filter, + InvalidQuery) from pubs.paper import Paper From e069da518fea830515ca04b797170090c25de3b1 Mon Sep 17 00:00:00 2001 From: Olivier Mangin Date: Fri, 5 Jan 2018 18:04:59 -0500 Subject: [PATCH 6/6] Fixes query tests (`assertFalse(not ...)` and wrong name). --- tests/test_queries.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_queries.py b/tests/test_queries.py index b97af6b..245833b 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -20,7 +20,7 @@ class TestAuthorFilter(unittest.TestCase): def test_fails_if_no_author(self): no_doe = doe_paper.deepcopy() no_doe.bibentry['author'] = [] - self.assertTrue(not AuthorFilter('whatever')(no_doe)) + self.assertFalse(AuthorFilter('whatever')(no_doe)) def test_match_case(self): self.assertTrue(AuthorFilter('doe')(doe_paper)) @@ -37,7 +37,7 @@ class TestAuthorFilter(unittest.TestCase): self.assertTrue(AuthorFilter('motwani')(page_paper)) def test_do_not_match_first_name(self): - self.assertTrue(not AuthorFilter('larry')(page_paper)) + self.assertFalse(AuthorFilter('lawrence')(page_paper)) class TestCheckTag(unittest.TestCase):