diff --git a/pubs/apis.py b/pubs/apis.py index 8c95126..0a0e449 100644 --- a/pubs/apis.py +++ b/pubs/apis.py @@ -3,15 +3,19 @@ import requests from bs4 import BeautifulSoup + def doi2bibtex(doi): """Return a bibtex string of metadata from a DOI""" url = 'http://dx.doi.org/{}'.format(doi) headers = {'accept': 'application/x-bibtex'} r = requests.get(url, headers=headers) + if r.encoding is None: + r.encoding = 'utf8' # Do not rely on guessing from request return r.text + def isbn2bibtex(isbn): """Return a bibtex string of metadata from a DOI""" diff --git a/pubs/content.py b/pubs/content.py index 796302b..c529b77 100644 --- a/pubs/content.py +++ b/pubs/content.py @@ -52,16 +52,17 @@ def _open(path, mode): else: return open(system_path(path), mode, encoding='utf-8') + def check_file(path, fail=True): syspath = system_path(path) - return (_check_system_path_exists(syspath, fail=fail) - and _check_system_path_is(u'isfile', syspath, fail=fail)) + return (_check_system_path_exists(syspath, fail=fail) and + _check_system_path_is(u'isfile', syspath, fail=fail)) def check_directory(path, fail=True): syspath = system_path(path) - return (_check_system_path_exists(syspath, fail=fail) - and _check_system_path_is(u'isdir', syspath, fail=fail)) + return (_check_system_path_exists(syspath, fail=fail) and + _check_system_path_is(u'isdir', syspath, fail=fail)) def read_text_file(filepath, fail=True): @@ -79,6 +80,7 @@ def read_text_file(filepath, fail=True): return content + def read_binary_file(filepath, fail=True): check_file(filepath, fail=fail) with _open(filepath, 'rb') as f: @@ -92,7 +94,16 @@ def remove_file(filepath): def write_file(filepath, data, mode='w'): + """Write data to file. + + Data should be unicode except when binary mode is selected, + in which case data is expected to be binary. + """ check_directory(os.path.dirname(filepath)) + if 'b' not in mode and sys.version_info < (3,): + # _open returns in binary mode for python2 + # Data must be encoded + data = data.encode('utf-8') with _open(filepath, mode) as f: f.write(data) diff --git a/pubs/filebroker.py b/pubs/filebroker.py index 47a8bd8..b040313 100644 --- a/pubs/filebroker.py +++ b/pubs/filebroker.py @@ -3,17 +3,20 @@ import re from .p3 import urlparse from .content import (check_file, check_directory, read_text_file, write_file, - system_path, check_content, content_type, get_content, - copy_content) + system_path, check_content, copy_content) from . import content +META_EXT = '.yaml' +BIB_EXT = '.bib' + + def filter_filename(filename, ext): """ Return the filename without the extension if the extension matches ext. Otherwise return None """ - pattern ='.*\{}$'.format(ext) + pattern = '.*\{}$'.format(ext) if re.match(pattern, filename) is not None: return filename[:-len(ext)] @@ -48,6 +51,12 @@ class FileBroker(object): if not check_directory(self.bibdir, fail=False): os.mkdir(system_path(self.bibdir)) + def bib_path(self, citekey): + return os.path.join(self.bibdir, citekey + BIB_EXT) + + def meta_path(self, citekey): + return os.path.join(self.metadir, citekey + META_EXT) + def pull_cachefile(self, filename): filepath = os.path.join(self.cachedir, filename) return content.read_binary_file(filepath) @@ -58,35 +67,31 @@ class FileBroker(object): def mtime_metafile(self, citekey): try: - filepath = os.path.join(self.metadir, citekey + '.yaml') + filepath = self.meta_path(citekey) return os.path.getmtime(filepath) except OSError: raise IOError("'{}' not found.".format(filepath)) def mtime_bibfile(self, citekey): try: - filepath = os.path.join(self.bibdir, citekey + '.bib') + filepath = self.bib_path(citekey) return os.path.getmtime(filepath) except OSError: raise IOError("'{}' not found.".format(filepath)) def pull_metafile(self, citekey): - filepath = os.path.join(self.metadir, citekey + '.yaml') - return read_text_file(filepath) + return read_text_file(self.meta_path(citekey)) def pull_bibfile(self, citekey): - filepath = os.path.join(self.bibdir, citekey + '.bib') - return read_text_file(filepath) + return read_text_file(self.bib_path(citekey)) def push_metafile(self, citekey, metadata): """Put content to disk. Will gladly override anything standing in its way.""" - filepath = os.path.join(self.metadir, citekey + '.yaml') - write_file(filepath, metadata) + write_file(self.meta_path(citekey), metadata) def push_bibfile(self, citekey, bibdata): """Put content to disk. Will gladly override anything standing in its way.""" - filepath = os.path.join(self.bibdir, citekey + '.bib') - write_file(filepath, bibdata) + write_file(self.bib_path(citekey), bibdata) def push(self, citekey, metadata, bibdata): """Put content to disk. Will gladly override anything standing in its way.""" @@ -94,10 +99,10 @@ class FileBroker(object): self.push_bibfile(citekey, bibdata) def remove(self, citekey): - metafilepath = os.path.join(self.metadir, citekey + '.yaml') + metafilepath = self.meta_path(citekey) if check_file(metafilepath): os.remove(system_path(metafilepath)) - bibfilepath = os.path.join(self.bibdir, citekey + '.bib') + bibfilepath = self.bib_path(citekey) if check_file(bibfilepath): os.remove(system_path(bibfilepath)) @@ -106,16 +111,16 @@ class FileBroker(object): :param meta_check: if True, will return if both the bibtex and the meta file exists. """ - does_exists = check_file(os.path.join(self.bibdir, citekey + '.bib'), fail=False) + does_exists = check_file(self.bib_path(citekey), fail=False) if meta_check: - meta_exists = check_file(os.path.join(self.metadir, citekey + '.yaml'), fail=False) + meta_exists = check_file(self.meta_path(citekey), fail=False) does_exists = does_exists and meta_exists return does_exists def listing(self, filestats=True): metafiles = [] for filename in os.listdir(system_path(self.metadir)): - citekey = filter_filename(filename, '.yaml') + citekey = filter_filename(filename, META_EXT) if citekey is not None: if filestats: stats = os.stat(system_path(os.path.join(self.metadir, filename))) @@ -125,7 +130,7 @@ class FileBroker(object): bibfiles = [] for filename in os.listdir(system_path(self.bibdir)): - citekey = filter_filename(filename, '.bib') + citekey = filter_filename(filename, BIB_EXT) if citekey is not None: if filestats: stats = os.stat(system_path(os.path.join(self.bibdir, filename))) diff --git a/pubs/p3.py b/pubs/p3.py index 93229d6..9c9082c 100644 --- a/pubs/p3.py +++ b/pubs/p3.py @@ -39,6 +39,7 @@ else: # for test_usecase. def _get_raw_stdout(): return sys.stdout.buffer + def _get_raw_stderr(): return sys.stderr.buffer diff --git a/pubs/repo.py b/pubs/repo.py index 32026d3..fd942b0 100644 --- a/pubs/repo.py +++ b/pubs/repo.py @@ -110,8 +110,9 @@ class Repository(object): self.databroker.remove_note(citekey, self.conf['main']['note_extension'], silent=True) except IOError: - pass # FIXME: if IOError is about being unable to - # remove the file, we need to issue an error. + # FIXME: if IOError is about being unable to + # remove the file, we need to issue an error. + pass self.citekeys.remove(citekey) self.databroker.remove(citekey) @@ -126,16 +127,18 @@ class Repository(object): p.docpath = None self.push_paper(p, overwrite=True, event=False) except IOError: - pass # FIXME: if IOError is about being unable to - # remove the file, we need to issue an error.I + # FIXME: if IOError is about being unable to + # remove the file, we need to issue an error.I + pass def pull_docpath(self, citekey): try: p = self.pull_paper(citekey) return self.databroker.real_docpath(p.docpath) except IOError: - pass # FIXME: if IOError is about being unable to - # remove the file, we need to issue an error.I + # FIXME: if IOError is about being unable to + # remove the file, we need to issue an error.I + pass def rename_paper(self, paper, new_citekey=None, old_citekey=None): if old_citekey is None: diff --git a/tests/test_apis.py b/tests/test_apis.py new file mode 100644 index 0000000..087f32f --- /dev/null +++ b/tests/test_apis.py @@ -0,0 +1,64 @@ +# coding: utf8 + +from __future__ import unicode_literals +import unittest + +import dotdot + +from pubs.p3 import ustr +from pubs.endecoder import EnDecoder +from pubs.apis import doi2bibtex, isbn2bibtex + + +class TestDOI2Bibtex(unittest.TestCase): + + def setUp(self): + self.endecoder = EnDecoder() + + def test_unicode(self): + bib = doi2bibtex('10.1007/BF01700692') + self.assertIsInstance(bib, ustr) + self.assertIn('Kurt Gödel', bib) + + def test_parses_to_bibtex(self): + bib = doi2bibtex('10.1007/BF01700692') + b = self.endecoder.decode_bibdata(bib) + self.assertEqual(len(b), 1) + entry = b[list(b)[0]] + self.assertEqual(entry['author'][0], 'Gödel, Kurt') + self.assertEqual(entry['title'], + 'Über formal unentscheidbare Sätze der Principia ' + 'Mathematica und verwandter Systeme I') + + def test_parse_fails_on_incorrect_DOI(self): + bib = doi2bibtex('999999') + with self.assertRaises(ValueError): + self.endecoder.decode_bibdata(bib) + + +class TestISBN2Bibtex(unittest.TestCase): + + def setUp(self): + self.endecoder = EnDecoder() + + def test_unicode(self): + bib = isbn2bibtex('9782081336742') + self.assertIsInstance(bib, ustr) + self.assertIn('Poincaré, Henri', bib) + + def test_parses_to_bibtex(self): + bib = isbn2bibtex('9782081336742') + b = self.endecoder.decode_bibdata(bib) + self.assertEqual(len(b), 1) + entry = b[list(b)[0]] + self.assertEqual(entry['author'][0], 'Poincaré, Henri') + self.assertEqual(entry['title'], 'La science et l\'hypothèse') + + def test_parse_fails_on_incorrect_ISBN(self): + bib = doi2bibtex('9' * 13) + with self.assertRaises(ValueError): + self.endecoder.decode_bibdata(bib) + + +# Note: apparently ottobib.com uses caracter modifiers for accents instead +# of the correct unicode characters. TODO: Should we convert them?