From fbc9b94f8d1aae767a29fc9119ea3656038073c9 Mon Sep 17 00:00:00 2001 From: Olivier Mangin Date: Mon, 14 Jan 2019 22:03:53 -0800 Subject: [PATCH 1/5] Fix 177: convert latex to unicode before printing one-liner. This commit actually introduces a new method on the paper object to return a copy of the bibdata which entries' latex have been converted to unicode. --- pubs/paper.py | 6 ++++++ pubs/pretty.py | 2 +- tests/str_fixtures.py | 12 ++++++++++++ tests/test_paper.py | 17 +++++++++++++++++ 4 files changed, 36 insertions(+), 1 deletion(-) diff --git a/pubs/paper.py b/pubs/paper.py index e0c10d0..365fd2a 100644 --- a/pubs/paper.py +++ b/pubs/paper.py @@ -1,6 +1,8 @@ import copy from dateutil.parser import parse as datetime_parse +from bibtexparser.customization import convert_to_unicode + from . import bibstruct from .p3 import ustr @@ -102,6 +104,10 @@ class Paper(object): def added(self, value): self.metadata['added'] = value + def get_unicode_bibdata(self): + """Converts latex in bibdata fields to unicode.""" + return convert_to_unicode(self.bibdata) + @staticmethod def from_bibentry(bibentry, citekey=None, metadata=None): bibentry_key, bibdata = bibstruct.get_entry(bibentry) diff --git a/pubs/pretty.py b/pubs/pretty.py index 645ef02..0c951e6 100644 --- a/pubs/pretty.py +++ b/pubs/pretty.py @@ -64,7 +64,7 @@ def paper_oneliner(p, citekey_only=False): if citekey_only: return p.citekey else: - bibdesc = bib_oneliner(p.bibdata) + bibdesc = bib_oneliner(p.get_unicode_bibdata()) doc_str = '' if p.docpath is not None: doc_extension = os.path.splitext(p.docpath)[1] diff --git a/tests/str_fixtures.py b/tests/str_fixtures.py index 3b6ded3..f00c01a 100644 --- a/tests/str_fixtures.py +++ b/tests/str_fixtures.py @@ -84,6 +84,18 @@ not_bibtex = """@misc{this looks, """ +bibtex_with_latex = """@article{kjaer2018large, + title={A large impact crater beneath Hiawatha Glacier in northwest Greenland}, + author={Kj{\\ae}r, Kurt H and Larsen, Nicolaj K and Binder, Tobias and Bj{\\o}rk, Anders A and Eisen, Olaf and Fahnestock, Mark A and Funder, Svend and Garde, Adam A and Haack, Henning and Helm, Veit and others}, + journal={Science advances}, + volume={4}, + number={11}, + pages={eaar8173}, + year={2018}, + publisher={American Association for the Advancement of Science} +} +""" + sample_conf = """ [main] diff --git a/tests/test_paper.py b/tests/test_paper.py index 9c78554..21a1236 100644 --- a/tests/test_paper.py +++ b/tests/test_paper.py @@ -4,7 +4,9 @@ import unittest import dotdot import fixtures +import str_fixtures from pubs.paper import Paper +from pubs.endecoder import EnDecoder class TestAttributes(unittest.TestCase): @@ -47,5 +49,20 @@ class TestAttributes(unittest.TestCase): Paper(" ", fixtures.doe_bibdata) +class TestPaperUnicodeBibdata(unittest.TestCase): + + def test_no_latex(self): + p = Paper.from_bibentry(fixtures.page_bibentry, + metadata=fixtures.page_metadata).deepcopy() + self.assertEqual(p.bibdata, p.get_unicode_bibdata()) + + def test_latex_converted(self): + bib = EnDecoder().decode_bibdata(str_fixtures.bibtex_with_latex) + p = Paper.from_bibentry(bib) + ubib = p.get_unicode_bibdata() + self.assertEqual(ubib['author'][0], "Kjær, Kurt H") + self.assertEqual(ubib['author'][3], "Bjørk, Anders A") + + if __name__ == '__main__': unittest.main() From 52378d09039726e787eb1450f9f80dcb5ccd5e11 Mon Sep 17 00:00:00 2001 From: Olivier Mangin Date: Mon, 14 Jan 2019 22:47:38 -0800 Subject: [PATCH 2/5] Adds missing test dependency to certifi. --- dev_requirements.txt | 1 + setup.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/dev_requirements.txt b/dev_requirements.txt index 0d91a9e..1891f81 100644 --- a/dev_requirements.txt +++ b/dev_requirements.txt @@ -19,6 +19,7 @@ six # those are the additional packages required to run the tests pyfakefs +certifi ddt mock pytest # optional (python setup.py test works without it), but possible nonetheless diff --git a/setup.py b/setup.py index ee088a2..90757f5 100644 --- a/setup.py +++ b/setup.py @@ -60,7 +60,7 @@ setup( ], test_suite='tests', - tests_require=['pyfakefs>=3.4', 'mock', 'ddt'], + tests_require=['pyfakefs>=3.4', 'mock', 'ddt', 'certifi'], # in order to avoid 'zipimport.ZipImportError: bad local file header' zip_safe=False, From c8352fb7dfa88af1dbc0d09b198b258c6f956760 Mon Sep 17 00:00:00 2001 From: Olivier Mangin Date: Mon, 14 Jan 2019 23:00:41 -0800 Subject: [PATCH 3/5] Fixes test not passing because bitexparser's latex_to_unicode also normalizes --- tests/test_usecase.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_usecase.py b/tests/test_usecase.py index 80edea7..952cb1b 100644 --- a/tests/test_usecase.py +++ b/tests/test_usecase.py @@ -288,7 +288,7 @@ class TestAdd(URLContentTestCase): def test_add_utf8_citekey(self): correct = ["", ("added to pubs:\n" - "[hausdorff1949grundzüge] Hausdorff, Felix \"Grundzüge der Mengenlehre\" (1949) \n"), + "[hausdorff1949grundzüge] Hausdorff, Felix \"Grundzüge der Mengenlehre\" (1949) \n"), "The 'hausdorff1949grundzüge' citekey has been renamed into 'アスキー'\n", "The 'アスキー' citekey has been renamed into 'Ḽơᶉëᶆ_ȋṕšᶙṁ'\n" ] From e58ae98b93b8364a07fd5f5f452ba88ad332c948 Mon Sep 17 00:00:00 2001 From: "Fabien C. Y. Benureau" Date: Fri, 18 Jan 2019 13:51:39 +0900 Subject: [PATCH 4/5] fix for 187 --- pubs/commands/import_cmd.py | 1 - pubs/endecoder.py | 6 ++++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pubs/commands/import_cmd.py b/pubs/commands/import_cmd.py index 6d51488..948d10d 100644 --- a/pubs/commands/import_cmd.py +++ b/pubs/commands/import_cmd.py @@ -54,7 +54,6 @@ def many_from_path(ui, bibpath, ignore=False): bibpath = system_path(bibpath) if os.path.isdir(bibpath): - print([os.path.splitext(f)[-1][1:] for f in os.listdir(bibpath)]) all_files = [os.path.join(bibpath, f) for f in os.listdir(bibpath) if os.path.splitext(f)[-1][1:] == 'bib'] else: diff --git a/pubs/endecoder.py b/pubs/endecoder.py index abac0f1..0322b02 100644 --- a/pubs/endecoder.py +++ b/pubs/endecoder.py @@ -122,7 +122,7 @@ class EnDecoder(object): def decode_bibdata(self, bibdata): """Decodes bibdata from string. - If the decoding fails, returns a BibParseError. + If the decoding fails, returns a BibDecodingError. """ if len(bibdata) == 0: error_msg = 'parsing error: the provided string has length zero.' @@ -131,7 +131,6 @@ class EnDecoder(object): entries = bp.bparser.BibTexParser( bibdata, common_strings=True, customization=customizations, homogenize_fields=True).get_entry_dict() - # Remove id from bibtexparser attribute which is stored as citekey for e in entries: entries[e].pop(BP_ID_KEY) @@ -140,6 +139,9 @@ class EnDecoder(object): entries[e][TYPE_KEY] = t if len(entries) > 0: return entries + else: + raise self.BibDecodingError(('no valid entry found in the provided data: ' + ' {}').format(bibdata), bibdata) except (pyparsing.ParseException, pyparsing.ParseSyntaxException) as e: error_msg = self._format_parsing_error(e) raise self.BibDecodingError(error_msg, bibdata) From 03dc4f192baef594416c539de31f6e9f5a45a37f Mon Sep 17 00:00:00 2001 From: Olivier Mangin Date: Thu, 17 Jan 2019 21:15:53 -0800 Subject: [PATCH 5/5] Fix test not passing because of bytestring on python 2.7 --- tests/test_paper.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_paper.py b/tests/test_paper.py index 21a1236..1a3bae6 100644 --- a/tests/test_paper.py +++ b/tests/test_paper.py @@ -60,8 +60,8 @@ class TestPaperUnicodeBibdata(unittest.TestCase): bib = EnDecoder().decode_bibdata(str_fixtures.bibtex_with_latex) p = Paper.from_bibentry(bib) ubib = p.get_unicode_bibdata() - self.assertEqual(ubib['author'][0], "Kjær, Kurt H") - self.assertEqual(ubib['author'][3], "Bjørk, Anders A") + self.assertEqual(ubib['author'][0], u"Kjær, Kurt H") + self.assertEqual(ubib['author'][3], u"Bjørk, Anders A") if __name__ == '__main__':