Merge branch 'fix/76'

2017-07-22 15:45:52 -04:00 · 2017-07-22 15:45:52 -04:00 · fd084bb827
commit fd084bb827
parent 0dd51aff58 da6f07c6c9
6 changed files with 117 additions and 29 deletions
--- a/pubs/apis.py
+++ b/pubs/apis.py
@ -3,15 +3,19 @@
 import requests
 from bs4 import BeautifulSoup

+
 def doi2bibtex(doi):
    """Return a bibtex string of metadata from a DOI"""

    url = 'http://dx.doi.org/{}'.format(doi)
    headers = {'accept': 'application/x-bibtex'}
    r = requests.get(url, headers=headers)
+    if r.encoding is None:
+        r.encoding = 'utf8'  # Do not rely on guessing from request

    return r.text

+
 def isbn2bibtex(isbn):
    """Return a bibtex string of metadata from a DOI"""

--- a/pubs/content.py
+++ b/pubs/content.py
@ -52,16 +52,17 @@ def _open(path, mode):
    else:
        return open(system_path(path), mode, encoding='utf-8')

+
 def check_file(path, fail=True):
    syspath = system_path(path)
-    return (_check_system_path_exists(syspath, fail=fail)
-            and _check_system_path_is(u'isfile', syspath, fail=fail))
+    return (_check_system_path_exists(syspath, fail=fail) and
+            _check_system_path_is(u'isfile', syspath, fail=fail))


 def check_directory(path, fail=True):
    syspath = system_path(path)
-    return (_check_system_path_exists(syspath, fail=fail)
-            and _check_system_path_is(u'isdir', syspath, fail=fail))
+    return (_check_system_path_exists(syspath, fail=fail) and
+            _check_system_path_is(u'isdir', syspath, fail=fail))


 def read_text_file(filepath, fail=True):
@ -79,6 +80,7 @@ def read_text_file(filepath, fail=True):

    return content

+
 def read_binary_file(filepath, fail=True):
    check_file(filepath, fail=fail)
    with _open(filepath, 'rb') as f:
@ -92,7 +94,16 @@ def remove_file(filepath):


 def write_file(filepath, data, mode='w'):
+    """Write data to file.
+
+    Data should be unicode except when binary mode is selected,
+    in which case data is expected to be binary.
+    """
    check_directory(os.path.dirname(filepath))
+    if 'b' not in mode and sys.version_info < (3,):
+        # _open returns in binary mode for python2
+        # Data must be encoded
+        data = data.encode('utf-8')
    with _open(filepath, mode) as f:
        f.write(data)

--- a/pubs/filebroker.py
+++ b/pubs/filebroker.py
@ -3,17 +3,20 @@ import re
 from .p3 import urlparse

 from .content import (check_file, check_directory, read_text_file, write_file,
-                      system_path, check_content, content_type, get_content,
-                      copy_content)
+                      system_path, check_content, copy_content)

 from . import content


+META_EXT = '.yaml'
+BIB_EXT  = '.bib'
+
+
 def filter_filename(filename, ext):
    """ Return the filename without the extension if the extension matches ext.
        Otherwise return None
    """
-    pattern ='.*\{}$'.format(ext)
+    pattern = '.*\{}$'.format(ext)
    if re.match(pattern, filename) is not None:
        return filename[:-len(ext)]

@ -48,6 +51,12 @@ class FileBroker(object):
        if not check_directory(self.bibdir, fail=False):
            os.mkdir(system_path(self.bibdir))

+    def bib_path(self, citekey):
+        return os.path.join(self.bibdir, citekey + BIB_EXT)
+
+    def meta_path(self, citekey):
+        return os.path.join(self.metadir, citekey + META_EXT)
+
    def pull_cachefile(self, filename):
        filepath = os.path.join(self.cachedir, filename)
        return content.read_binary_file(filepath)
@ -58,35 +67,31 @@ class FileBroker(object):

    def mtime_metafile(self, citekey):
        try:
-            filepath = os.path.join(self.metadir, citekey + '.yaml')
+            filepath = self.meta_path(citekey)
            return os.path.getmtime(filepath)
        except OSError:
            raise IOError("'{}' not found.".format(filepath))

    def mtime_bibfile(self, citekey):
        try:
-            filepath = os.path.join(self.bibdir, citekey + '.bib')
+            filepath = self.bib_path(citekey)
            return os.path.getmtime(filepath)
        except OSError:
            raise IOError("'{}' not found.".format(filepath))

    def pull_metafile(self, citekey):
-        filepath = os.path.join(self.metadir, citekey + '.yaml')
-        return read_text_file(filepath)
+        return read_text_file(self.meta_path(citekey))

    def pull_bibfile(self, citekey):
-        filepath = os.path.join(self.bibdir, citekey + '.bib')
-        return read_text_file(filepath)
+        return read_text_file(self.bib_path(citekey))

    def push_metafile(self, citekey, metadata):
        """Put content to disk. Will gladly override anything standing in its way."""
-        filepath = os.path.join(self.metadir, citekey + '.yaml')
-        write_file(filepath, metadata)
+        write_file(self.meta_path(citekey), metadata)

    def push_bibfile(self, citekey, bibdata):
        """Put content to disk. Will gladly override anything standing in its way."""
-        filepath = os.path.join(self.bibdir, citekey + '.bib')
-        write_file(filepath, bibdata)
+        write_file(self.bib_path(citekey), bibdata)

    def push(self, citekey, metadata, bibdata):
        """Put content to disk. Will gladly override anything standing in its way."""
@ -94,10 +99,10 @@ class FileBroker(object):
        self.push_bibfile(citekey, bibdata)

    def remove(self, citekey):
-        metafilepath = os.path.join(self.metadir, citekey + '.yaml')
+        metafilepath = self.meta_path(citekey)
        if check_file(metafilepath):
            os.remove(system_path(metafilepath))
-        bibfilepath = os.path.join(self.bibdir, citekey + '.bib')
+        bibfilepath = self.bib_path(citekey)
        if check_file(bibfilepath):
            os.remove(system_path(bibfilepath))

@ -106,16 +111,16 @@ class FileBroker(object):

            :param meta_check:  if True, will return if both the bibtex and the meta file exists.
        """
-        does_exists = check_file(os.path.join(self.bibdir, citekey + '.bib'), fail=False)
+        does_exists = check_file(self.bib_path(citekey), fail=False)
        if meta_check:
-            meta_exists = check_file(os.path.join(self.metadir, citekey + '.yaml'), fail=False)
+            meta_exists = check_file(self.meta_path(citekey), fail=False)
            does_exists = does_exists and meta_exists
        return does_exists

    def listing(self, filestats=True):
        metafiles = []
        for filename in os.listdir(system_path(self.metadir)):
-            citekey = filter_filename(filename, '.yaml')
+            citekey = filter_filename(filename, META_EXT)
            if citekey is not None:
                if filestats:
                    stats = os.stat(system_path(os.path.join(self.metadir, filename)))
@ -125,7 +130,7 @@ class FileBroker(object):

        bibfiles = []
        for filename in os.listdir(system_path(self.bibdir)):
-            citekey = filter_filename(filename, '.bib')
+            citekey = filter_filename(filename, BIB_EXT)
            if citekey is not None:
                if filestats:
                    stats = os.stat(system_path(os.path.join(self.bibdir, filename)))
--- a/pubs/p3.py
+++ b/pubs/p3.py
@ -39,6 +39,7 @@ else:
    # for test_usecase.
    def _get_raw_stdout():
        return sys.stdout.buffer
+
    def _get_raw_stderr():
        return sys.stderr.buffer

--- a/pubs/repo.py
+++ b/pubs/repo.py
@ -110,8 +110,9 @@ class Repository(object):
            self.databroker.remove_note(citekey, self.conf['main']['note_extension'],
                                        silent=True)
        except IOError:
-            pass # FIXME: if IOError is about being unable to
-                 # remove the file, we need to issue an error.
+            # FIXME: if IOError is about being unable to
+            # remove the file, we need to issue an error.
+            pass
        self.citekeys.remove(citekey)
        self.databroker.remove(citekey)

@ -126,16 +127,18 @@ class Repository(object):
                p.docpath = None
                self.push_paper(p, overwrite=True, event=False)
        except IOError:
-            pass # FIXME: if IOError is about being unable to
-                 # remove the file, we need to issue an error.I
+            # FIXME: if IOError is about being unable to
+            # remove the file, we need to issue an error.I
+            pass

    def pull_docpath(self, citekey):
        try:
            p = self.pull_paper(citekey)
            return self.databroker.real_docpath(p.docpath)
        except IOError:
-            pass # FIXME: if IOError is about being unable to
-                 # remove the file, we need to issue an error.I
+            # FIXME: if IOError is about being unable to
+            # remove the file, we need to issue an error.I
+            pass

    def rename_paper(self, paper, new_citekey=None, old_citekey=None):
        if old_citekey is None:
--- a/tests/test_apis.py
+++ b/tests/test_apis.py
@ -0,0 +1,64 @@
+# coding: utf8
+
+from __future__ import unicode_literals
+import unittest
+
+import dotdot
+
+from pubs.p3 import ustr
+from pubs.endecoder import EnDecoder
+from pubs.apis import doi2bibtex, isbn2bibtex
+
+
+class TestDOI2Bibtex(unittest.TestCase):
+
+    def setUp(self):
+        self.endecoder = EnDecoder()
+
+    def test_unicode(self):
+        bib = doi2bibtex('10.1007/BF01700692')
+        self.assertIsInstance(bib, ustr)
+        self.assertIn('Kurt Gödel', bib)
+
+    def test_parses_to_bibtex(self):
+        bib = doi2bibtex('10.1007/BF01700692')
+        b = self.endecoder.decode_bibdata(bib)
+        self.assertEqual(len(b), 1)
+        entry = b[list(b)[0]]
+        self.assertEqual(entry['author'][0], 'Gödel, Kurt')
+        self.assertEqual(entry['title'],
+                         'Über formal unentscheidbare Sätze der Principia '
+                         'Mathematica und verwandter Systeme I')
+
+    def test_parse_fails_on_incorrect_DOI(self):
+        bib = doi2bibtex('999999')
+        with self.assertRaises(ValueError):
+            self.endecoder.decode_bibdata(bib)
+
+
+class TestISBN2Bibtex(unittest.TestCase):
+
+    def setUp(self):
+        self.endecoder = EnDecoder()
+
+    def test_unicode(self):
+        bib = isbn2bibtex('9782081336742')
+        self.assertIsInstance(bib, ustr)
+        self.assertIn('Poincaré, Henri', bib)
+
+    def test_parses_to_bibtex(self):
+        bib = isbn2bibtex('9782081336742')
+        b = self.endecoder.decode_bibdata(bib)
+        self.assertEqual(len(b), 1)
+        entry = b[list(b)[0]]
+        self.assertEqual(entry['author'][0], 'Poincaré, Henri')
+        self.assertEqual(entry['title'], 'La science et l\'hypothèse')
+
+    def test_parse_fails_on_incorrect_ISBN(self):
+        bib = doi2bibtex('9' * 13)
+        with self.assertRaises(ValueError):
+            self.endecoder.decode_bibdata(bib)
+
+
+# Note: apparently ottobib.com uses caracter modifiers for accents instead
+# of the correct unicode characters. TODO: Should we convert them?