Merge branch 'fix/76'

main
Olivier Mangin 8 years ago
commit fd084bb827

@ -3,15 +3,19 @@
import requests import requests
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
def doi2bibtex(doi): def doi2bibtex(doi):
"""Return a bibtex string of metadata from a DOI""" """Return a bibtex string of metadata from a DOI"""
url = 'http://dx.doi.org/{}'.format(doi) url = 'http://dx.doi.org/{}'.format(doi)
headers = {'accept': 'application/x-bibtex'} headers = {'accept': 'application/x-bibtex'}
r = requests.get(url, headers=headers) r = requests.get(url, headers=headers)
if r.encoding is None:
r.encoding = 'utf8' # Do not rely on guessing from request
return r.text return r.text
def isbn2bibtex(isbn): def isbn2bibtex(isbn):
"""Return a bibtex string of metadata from a DOI""" """Return a bibtex string of metadata from a DOI"""

@ -52,16 +52,17 @@ def _open(path, mode):
else: else:
return open(system_path(path), mode, encoding='utf-8') return open(system_path(path), mode, encoding='utf-8')
def check_file(path, fail=True): def check_file(path, fail=True):
syspath = system_path(path) syspath = system_path(path)
return (_check_system_path_exists(syspath, fail=fail) return (_check_system_path_exists(syspath, fail=fail) and
and _check_system_path_is(u'isfile', syspath, fail=fail)) _check_system_path_is(u'isfile', syspath, fail=fail))
def check_directory(path, fail=True): def check_directory(path, fail=True):
syspath = system_path(path) syspath = system_path(path)
return (_check_system_path_exists(syspath, fail=fail) return (_check_system_path_exists(syspath, fail=fail) and
and _check_system_path_is(u'isdir', syspath, fail=fail)) _check_system_path_is(u'isdir', syspath, fail=fail))
def read_text_file(filepath, fail=True): def read_text_file(filepath, fail=True):
@ -79,6 +80,7 @@ def read_text_file(filepath, fail=True):
return content return content
def read_binary_file(filepath, fail=True): def read_binary_file(filepath, fail=True):
check_file(filepath, fail=fail) check_file(filepath, fail=fail)
with _open(filepath, 'rb') as f: with _open(filepath, 'rb') as f:
@ -92,7 +94,16 @@ def remove_file(filepath):
def write_file(filepath, data, mode='w'): def write_file(filepath, data, mode='w'):
"""Write data to file.
Data should be unicode except when binary mode is selected,
in which case data is expected to be binary.
"""
check_directory(os.path.dirname(filepath)) check_directory(os.path.dirname(filepath))
if 'b' not in mode and sys.version_info < (3,):
# _open returns in binary mode for python2
# Data must be encoded
data = data.encode('utf-8')
with _open(filepath, mode) as f: with _open(filepath, mode) as f:
f.write(data) f.write(data)

@ -3,12 +3,15 @@ import re
from .p3 import urlparse from .p3 import urlparse
from .content import (check_file, check_directory, read_text_file, write_file, from .content import (check_file, check_directory, read_text_file, write_file,
system_path, check_content, content_type, get_content, system_path, check_content, copy_content)
copy_content)
from . import content from . import content
META_EXT = '.yaml'
BIB_EXT = '.bib'
def filter_filename(filename, ext): def filter_filename(filename, ext):
""" Return the filename without the extension if the extension matches ext. """ Return the filename without the extension if the extension matches ext.
Otherwise return None Otherwise return None
@ -48,6 +51,12 @@ class FileBroker(object):
if not check_directory(self.bibdir, fail=False): if not check_directory(self.bibdir, fail=False):
os.mkdir(system_path(self.bibdir)) os.mkdir(system_path(self.bibdir))
def bib_path(self, citekey):
return os.path.join(self.bibdir, citekey + BIB_EXT)
def meta_path(self, citekey):
return os.path.join(self.metadir, citekey + META_EXT)
def pull_cachefile(self, filename): def pull_cachefile(self, filename):
filepath = os.path.join(self.cachedir, filename) filepath = os.path.join(self.cachedir, filename)
return content.read_binary_file(filepath) return content.read_binary_file(filepath)
@ -58,35 +67,31 @@ class FileBroker(object):
def mtime_metafile(self, citekey): def mtime_metafile(self, citekey):
try: try:
filepath = os.path.join(self.metadir, citekey + '.yaml') filepath = self.meta_path(citekey)
return os.path.getmtime(filepath) return os.path.getmtime(filepath)
except OSError: except OSError:
raise IOError("'{}' not found.".format(filepath)) raise IOError("'{}' not found.".format(filepath))
def mtime_bibfile(self, citekey): def mtime_bibfile(self, citekey):
try: try:
filepath = os.path.join(self.bibdir, citekey + '.bib') filepath = self.bib_path(citekey)
return os.path.getmtime(filepath) return os.path.getmtime(filepath)
except OSError: except OSError:
raise IOError("'{}' not found.".format(filepath)) raise IOError("'{}' not found.".format(filepath))
def pull_metafile(self, citekey): def pull_metafile(self, citekey):
filepath = os.path.join(self.metadir, citekey + '.yaml') return read_text_file(self.meta_path(citekey))
return read_text_file(filepath)
def pull_bibfile(self, citekey): def pull_bibfile(self, citekey):
filepath = os.path.join(self.bibdir, citekey + '.bib') return read_text_file(self.bib_path(citekey))
return read_text_file(filepath)
def push_metafile(self, citekey, metadata): def push_metafile(self, citekey, metadata):
"""Put content to disk. Will gladly override anything standing in its way.""" """Put content to disk. Will gladly override anything standing in its way."""
filepath = os.path.join(self.metadir, citekey + '.yaml') write_file(self.meta_path(citekey), metadata)
write_file(filepath, metadata)
def push_bibfile(self, citekey, bibdata): def push_bibfile(self, citekey, bibdata):
"""Put content to disk. Will gladly override anything standing in its way.""" """Put content to disk. Will gladly override anything standing in its way."""
filepath = os.path.join(self.bibdir, citekey + '.bib') write_file(self.bib_path(citekey), bibdata)
write_file(filepath, bibdata)
def push(self, citekey, metadata, bibdata): def push(self, citekey, metadata, bibdata):
"""Put content to disk. Will gladly override anything standing in its way.""" """Put content to disk. Will gladly override anything standing in its way."""
@ -94,10 +99,10 @@ class FileBroker(object):
self.push_bibfile(citekey, bibdata) self.push_bibfile(citekey, bibdata)
def remove(self, citekey): def remove(self, citekey):
metafilepath = os.path.join(self.metadir, citekey + '.yaml') metafilepath = self.meta_path(citekey)
if check_file(metafilepath): if check_file(metafilepath):
os.remove(system_path(metafilepath)) os.remove(system_path(metafilepath))
bibfilepath = os.path.join(self.bibdir, citekey + '.bib') bibfilepath = self.bib_path(citekey)
if check_file(bibfilepath): if check_file(bibfilepath):
os.remove(system_path(bibfilepath)) os.remove(system_path(bibfilepath))
@ -106,16 +111,16 @@ class FileBroker(object):
:param meta_check: if True, will return if both the bibtex and the meta file exists. :param meta_check: if True, will return if both the bibtex and the meta file exists.
""" """
does_exists = check_file(os.path.join(self.bibdir, citekey + '.bib'), fail=False) does_exists = check_file(self.bib_path(citekey), fail=False)
if meta_check: if meta_check:
meta_exists = check_file(os.path.join(self.metadir, citekey + '.yaml'), fail=False) meta_exists = check_file(self.meta_path(citekey), fail=False)
does_exists = does_exists and meta_exists does_exists = does_exists and meta_exists
return does_exists return does_exists
def listing(self, filestats=True): def listing(self, filestats=True):
metafiles = [] metafiles = []
for filename in os.listdir(system_path(self.metadir)): for filename in os.listdir(system_path(self.metadir)):
citekey = filter_filename(filename, '.yaml') citekey = filter_filename(filename, META_EXT)
if citekey is not None: if citekey is not None:
if filestats: if filestats:
stats = os.stat(system_path(os.path.join(self.metadir, filename))) stats = os.stat(system_path(os.path.join(self.metadir, filename)))
@ -125,7 +130,7 @@ class FileBroker(object):
bibfiles = [] bibfiles = []
for filename in os.listdir(system_path(self.bibdir)): for filename in os.listdir(system_path(self.bibdir)):
citekey = filter_filename(filename, '.bib') citekey = filter_filename(filename, BIB_EXT)
if citekey is not None: if citekey is not None:
if filestats: if filestats:
stats = os.stat(system_path(os.path.join(self.bibdir, filename))) stats = os.stat(system_path(os.path.join(self.bibdir, filename)))

@ -39,6 +39,7 @@ else:
# for test_usecase. # for test_usecase.
def _get_raw_stdout(): def _get_raw_stdout():
return sys.stdout.buffer return sys.stdout.buffer
def _get_raw_stderr(): def _get_raw_stderr():
return sys.stderr.buffer return sys.stderr.buffer

@ -110,8 +110,9 @@ class Repository(object):
self.databroker.remove_note(citekey, self.conf['main']['note_extension'], self.databroker.remove_note(citekey, self.conf['main']['note_extension'],
silent=True) silent=True)
except IOError: except IOError:
pass # FIXME: if IOError is about being unable to # FIXME: if IOError is about being unable to
# remove the file, we need to issue an error. # remove the file, we need to issue an error.
pass
self.citekeys.remove(citekey) self.citekeys.remove(citekey)
self.databroker.remove(citekey) self.databroker.remove(citekey)
@ -126,16 +127,18 @@ class Repository(object):
p.docpath = None p.docpath = None
self.push_paper(p, overwrite=True, event=False) self.push_paper(p, overwrite=True, event=False)
except IOError: except IOError:
pass # FIXME: if IOError is about being unable to # FIXME: if IOError is about being unable to
# remove the file, we need to issue an error.I # remove the file, we need to issue an error.I
pass
def pull_docpath(self, citekey): def pull_docpath(self, citekey):
try: try:
p = self.pull_paper(citekey) p = self.pull_paper(citekey)
return self.databroker.real_docpath(p.docpath) return self.databroker.real_docpath(p.docpath)
except IOError: except IOError:
pass # FIXME: if IOError is about being unable to # FIXME: if IOError is about being unable to
# remove the file, we need to issue an error.I # remove the file, we need to issue an error.I
pass
def rename_paper(self, paper, new_citekey=None, old_citekey=None): def rename_paper(self, paper, new_citekey=None, old_citekey=None):
if old_citekey is None: if old_citekey is None:

@ -0,0 +1,64 @@
# coding: utf8
from __future__ import unicode_literals
import unittest
import dotdot
from pubs.p3 import ustr
from pubs.endecoder import EnDecoder
from pubs.apis import doi2bibtex, isbn2bibtex
class TestDOI2Bibtex(unittest.TestCase):
def setUp(self):
self.endecoder = EnDecoder()
def test_unicode(self):
bib = doi2bibtex('10.1007/BF01700692')
self.assertIsInstance(bib, ustr)
self.assertIn('Kurt Gödel', bib)
def test_parses_to_bibtex(self):
bib = doi2bibtex('10.1007/BF01700692')
b = self.endecoder.decode_bibdata(bib)
self.assertEqual(len(b), 1)
entry = b[list(b)[0]]
self.assertEqual(entry['author'][0], 'Gödel, Kurt')
self.assertEqual(entry['title'],
'Über formal unentscheidbare Sätze der Principia '
'Mathematica und verwandter Systeme I')
def test_parse_fails_on_incorrect_DOI(self):
bib = doi2bibtex('999999')
with self.assertRaises(ValueError):
self.endecoder.decode_bibdata(bib)
class TestISBN2Bibtex(unittest.TestCase):
def setUp(self):
self.endecoder = EnDecoder()
def test_unicode(self):
bib = isbn2bibtex('9782081336742')
self.assertIsInstance(bib, ustr)
self.assertIn('Poincaré, Henri', bib)
def test_parses_to_bibtex(self):
bib = isbn2bibtex('9782081336742')
b = self.endecoder.decode_bibdata(bib)
self.assertEqual(len(b), 1)
entry = b[list(b)[0]]
self.assertEqual(entry['author'][0], 'Poincaré, Henri')
self.assertEqual(entry['title'], 'La science et l\'hypothèse')
def test_parse_fails_on_incorrect_ISBN(self):
bib = doi2bibtex('9' * 13)
with self.assertRaises(ValueError):
self.endecoder.decode_bibdata(bib)
# Note: apparently ottobib.com uses caracter modifiers for accents instead
# of the correct unicode characters. TODO: Should we convert them?
Loading…
Cancel
Save