Merge pull request #185 from pubs/fix/177

Fix/177 convert latex to unicode before printing one-liner
2019-01-17 21:37:28 -08:00 · 2019-01-17 21:37:28 -08:00 · 1c892bd64e
commit 1c892bd64e
parent 3fa1604155 03dc4f192b
9 changed files with 43 additions and 6 deletions
--- a/dev_requirements.txt
+++ b/dev_requirements.txt
@ -19,6 +19,7 @@ six
 # those are the additional packages required to run the tests
 pyfakefs
 certifi
 ddt
 mock
 pytest # optional (python setup.py test works without it), but possible nonetheless
--- a/pubs/commands/import_cmd.py
+++ b/pubs/commands/import_cmd.py
@ -54,7 +54,6 @@ def many_from_path(ui, bibpath, ignore=False):
    bibpath = system_path(bibpath)
    if os.path.isdir(bibpath):
        print([os.path.splitext(f)[-1][1:] for f in os.listdir(bibpath)])
        all_files = [os.path.join(bibpath, f) for f in os.listdir(bibpath)
                     if os.path.splitext(f)[-1][1:] == 'bib']
    else:
--- a/pubs/endecoder.py
+++ b/pubs/endecoder.py
@ -122,7 +122,7 @@ class EnDecoder(object):
    def decode_bibdata(self, bibdata):
        """Decodes bibdata from string.
-        If the decoding fails, returns a BibParseError.
+        If the decoding fails, returns a BibDecodingError.
        """
        if len(bibdata) == 0:
            error_msg = 'parsing error: the provided string has length zero.'
@ -131,7 +131,6 @@ class EnDecoder(object):
            entries = bp.bparser.BibTexParser(
                bibdata, common_strings=True, customization=customizations,
                homogenize_fields=True).get_entry_dict()
            # Remove id from bibtexparser attribute which is stored as citekey
            for e in entries:
                entries[e].pop(BP_ID_KEY)
@ -140,6 +139,9 @@ class EnDecoder(object):
                entries[e][TYPE_KEY] = t
            if len(entries) > 0:
                return entries
            else:
                raise self.BibDecodingError(('no valid entry found in the provided data: '
                                            ' {}').format(bibdata), bibdata)
        except (pyparsing.ParseException, pyparsing.ParseSyntaxException) as e:
            error_msg = self._format_parsing_error(e)
            raise self.BibDecodingError(error_msg, bibdata)
--- a/pubs/paper.py
+++ b/pubs/paper.py
@ -1,6 +1,8 @@
 import copy
 from dateutil.parser import parse as datetime_parse
 from bibtexparser.customization import convert_to_unicode
 from . import bibstruct
 from .p3 import ustr
@ -102,6 +104,10 @@ class Paper(object):
    def added(self, value):
        self.metadata['added'] = value
    def get_unicode_bibdata(self):
        """Converts latex in bibdata fields to unicode."""
        return convert_to_unicode(self.bibdata)
    @staticmethod
    def from_bibentry(bibentry, citekey=None, metadata=None):
        bibentry_key, bibdata = bibstruct.get_entry(bibentry)
--- a/pubs/pretty.py
+++ b/pubs/pretty.py
@ -64,7 +64,7 @@ def paper_oneliner(p, citekey_only=False):
    if citekey_only:
        return p.citekey
    else:
-        bibdesc = bib_oneliner(p.bibdata)
+        bibdesc = bib_oneliner(p.get_unicode_bibdata())
        doc_str = ''
        if p.docpath is not None:
            doc_extension = os.path.splitext(p.docpath)[1]
--- a/setup.py
+++ b/setup.py
@ -60,7 +60,7 @@ setup(
    ],
    test_suite='tests',
-    tests_require=['pyfakefs>=3.4', 'mock', 'ddt'],
+    tests_require=['pyfakefs>=3.4', 'mock', 'ddt', 'certifi'],
    # in order to avoid 'zipimport.ZipImportError: bad local file header'
    zip_safe=False,
--- a/tests/str_fixtures.py
+++ b/tests/str_fixtures.py
@ -84,6 +84,18 @@ not_bibtex = """@misc{this looks,
 """
 bibtex_with_latex = """@article{kjaer2018large,
  title={A large impact crater beneath Hiawatha Glacier in northwest Greenland},
  author={Kj{\\ae}r, Kurt H and Larsen, Nicolaj K and Binder, Tobias and Bj{\\o}rk, Anders A and Eisen, Olaf and Fahnestock, Mark A and Funder, Svend and Garde, Adam A and Haack, Henning and Helm, Veit and others},
  journal={Science advances},
  volume={4},
  number={11},
  pages={eaar8173},
  year={2018},
  publisher={American Association for the Advancement of Science}
 }
 """
 sample_conf = """
 [main]
--- a/tests/test_paper.py
+++ b/tests/test_paper.py
@ -4,7 +4,9 @@ import unittest
 import dotdot
 import fixtures
 import str_fixtures
 from pubs.paper import Paper
 from pubs.endecoder import EnDecoder
 class TestAttributes(unittest.TestCase):
@ -47,5 +49,20 @@ class TestAttributes(unittest.TestCase):
            Paper(" ", fixtures.doe_bibdata)
 class TestPaperUnicodeBibdata(unittest.TestCase):
    def test_no_latex(self):
        p = Paper.from_bibentry(fixtures.page_bibentry,
                                metadata=fixtures.page_metadata).deepcopy()
        self.assertEqual(p.bibdata, p.get_unicode_bibdata())
    def test_latex_converted(self):
        bib = EnDecoder().decode_bibdata(str_fixtures.bibtex_with_latex)
        p = Paper.from_bibentry(bib)
        ubib = p.get_unicode_bibdata()
        self.assertEqual(ubib['author'][0], u"Kjær, Kurt H")
        self.assertEqual(ubib['author'][3], u"Bjørk, Anders A")
 if __name__ == '__main__':
    unittest.main()
--- a/tests/test_usecase.py
+++ b/tests/test_usecase.py
@ -288,7 +288,7 @@ class TestAdd(URLContentTestCase):
    def test_add_utf8_citekey(self):
        correct = ["",
                   ("added to pubs:\n"
-                    "[hausdorff1949grundzüge] Hausdorff, Felix \"Grundzüge der Mengenlehre\" (1949) \n"),
+                    "[hausdorff1949grundzüge] Hausdorff, Felix \"Grundzüge der Mengenlehre\" (1949) \n"),
                   "The 'hausdorff1949grundzüge' citekey has been renamed into 'アスキー'\n",
                   "The 'アスキー' citekey has been renamed into 'Ḽơᶉëᶆ_ȋṕšᶙṁ'\n"
                  ]