Merge branch 'fix/76'

main
Olivier Mangin 8 years ago
commit fd084bb827

@ -3,15 +3,19 @@
import requests import requests
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
def doi2bibtex(doi): def doi2bibtex(doi):
"""Return a bibtex string of metadata from a DOI""" """Return a bibtex string of metadata from a DOI"""
url = 'http://dx.doi.org/{}'.format(doi) url = 'http://dx.doi.org/{}'.format(doi)
headers = {'accept': 'application/x-bibtex'} headers = {'accept': 'application/x-bibtex'}
r = requests.get(url, headers=headers) r = requests.get(url, headers=headers)
if r.encoding is None:
r.encoding = 'utf8' # Do not rely on guessing from request
return r.text return r.text
def isbn2bibtex(isbn): def isbn2bibtex(isbn):
"""Return a bibtex string of metadata from a DOI""" """Return a bibtex string of metadata from a DOI"""

@ -52,16 +52,17 @@ def _open(path, mode):
else: else:
return open(system_path(path), mode, encoding='utf-8') return open(system_path(path), mode, encoding='utf-8')
def check_file(path, fail=True): def check_file(path, fail=True):
syspath = system_path(path) syspath = system_path(path)
return (_check_system_path_exists(syspath, fail=fail) return (_check_system_path_exists(syspath, fail=fail) and
and _check_system_path_is(u'isfile', syspath, fail=fail)) _check_system_path_is(u'isfile', syspath, fail=fail))
def check_directory(path, fail=True): def check_directory(path, fail=True):
syspath = system_path(path) syspath = system_path(path)
return (_check_system_path_exists(syspath, fail=fail) return (_check_system_path_exists(syspath, fail=fail) and
and _check_system_path_is(u'isdir', syspath, fail=fail)) _check_system_path_is(u'isdir', syspath, fail=fail))
def read_text_file(filepath, fail=True): def read_text_file(filepath, fail=True):
@ -79,6 +80,7 @@ def read_text_file(filepath, fail=True):
return content return content
def read_binary_file(filepath, fail=True): def read_binary_file(filepath, fail=True):
check_file(filepath, fail=fail) check_file(filepath, fail=fail)
with _open(filepath, 'rb') as f: with _open(filepath, 'rb') as f:
@ -92,7 +94,16 @@ def remove_file(filepath):
def write_file(filepath, data, mode='w'): def write_file(filepath, data, mode='w'):
"""Write data to file.
Data should be unicode except when binary mode is selected,
in which case data is expected to be binary.
"""
check_directory(os.path.dirname(filepath)) check_directory(os.path.dirname(filepath))
if 'b' not in mode and sys.version_info < (3,):
# _open returns in binary mode for python2
# Data must be encoded
data = data.encode('utf-8')
with _open(filepath, mode) as f: with _open(filepath, mode) as f:
f.write(data) f.write(data)

@ -3,17 +3,20 @@ import re
from .p3 import urlparse from .p3 import urlparse
from .content import (check_file, check_directory, read_text_file, write_file, from .content import (check_file, check_directory, read_text_file, write_file,
system_path, check_content, content_type, get_content, system_path, check_content, copy_content)
copy_content)
from . import content from . import content
META_EXT = '.yaml'
BIB_EXT = '.bib'
def filter_filename(filename, ext): def filter_filename(filename, ext):
""" Return the filename without the extension if the extension matches ext. """ Return the filename without the extension if the extension matches ext.
Otherwise return None Otherwise return None
""" """
pattern ='.*\{}$'.format(ext) pattern = '.*\{}$'.format(ext)
if re.match(pattern, filename) is not None: if re.match(pattern, filename) is not None:
return filename[:-len(ext)] return filename[:-len(ext)]
@ -48,6 +51,12 @@ class FileBroker(object):
if not check_directory(self.bibdir, fail=False): if not check_directory(self.bibdir, fail=False):
os.mkdir(system_path(self.bibdir)) os.mkdir(system_path(self.bibdir))
def bib_path(self, citekey):
return os.path.join(self.bibdir, citekey + BIB_EXT)
def meta_path(self, citekey):
return os.path.join(self.metadir, citekey + META_EXT)
def pull_cachefile(self, filename): def pull_cachefile(self, filename):
filepath = os.path.join(self.cachedir, filename) filepath = os.path.join(self.cachedir, filename)
return content.read_binary_file(filepath) return content.read_binary_file(filepath)
@ -58,35 +67,31 @@ class FileBroker(object):
def mtime_metafile(self, citekey): def mtime_metafile(self, citekey):
try: try:
filepath = os.path.join(self.metadir, citekey + '.yaml') filepath = self.meta_path(citekey)
return os.path.getmtime(filepath) return os.path.getmtime(filepath)
except OSError: except OSError:
raise IOError("'{}' not found.".format(filepath)) raise IOError("'{}' not found.".format(filepath))
def mtime_bibfile(self, citekey): def mtime_bibfile(self, citekey):
try: try:
filepath = os.path.join(self.bibdir, citekey + '.bib') filepath = self.bib_path(citekey)
return os.path.getmtime(filepath) return os.path.getmtime(filepath)
except OSError: except OSError:
raise IOError("'{}' not found.".format(filepath)) raise IOError("'{}' not found.".format(filepath))
def pull_metafile(self, citekey): def pull_metafile(self, citekey):
filepath = os.path.join(self.metadir, citekey + '.yaml') return read_text_file(self.meta_path(citekey))
return read_text_file(filepath)
def pull_bibfile(self, citekey): def pull_bibfile(self, citekey):
filepath = os.path.join(self.bibdir, citekey + '.bib') return read_text_file(self.bib_path(citekey))
return read_text_file(filepath)
def push_metafile(self, citekey, metadata): def push_metafile(self, citekey, metadata):
"""Put content to disk. Will gladly override anything standing in its way.""" """Put content to disk. Will gladly override anything standing in its way."""
filepath = os.path.join(self.metadir, citekey + '.yaml') write_file(self.meta_path(citekey), metadata)
write_file(filepath, metadata)
def push_bibfile(self, citekey, bibdata): def push_bibfile(self, citekey, bibdata):
"""Put content to disk. Will gladly override anything standing in its way.""" """Put content to disk. Will gladly override anything standing in its way."""
filepath = os.path.join(self.bibdir, citekey + '.bib') write_file(self.bib_path(citekey), bibdata)
write_file(filepath, bibdata)
def push(self, citekey, metadata, bibdata): def push(self, citekey, metadata, bibdata):
"""Put content to disk. Will gladly override anything standing in its way.""" """Put content to disk. Will gladly override anything standing in its way."""
@ -94,10 +99,10 @@ class FileBroker(object):
self.push_bibfile(citekey, bibdata) self.push_bibfile(citekey, bibdata)
def remove(self, citekey): def remove(self, citekey):
metafilepath = os.path.join(self.metadir, citekey + '.yaml') metafilepath = self.meta_path(citekey)
if check_file(metafilepath): if check_file(metafilepath):
os.remove(system_path(metafilepath)) os.remove(system_path(metafilepath))
bibfilepath = os.path.join(self.bibdir, citekey + '.bib') bibfilepath = self.bib_path(citekey)
if check_file(bibfilepath): if check_file(bibfilepath):
os.remove(system_path(bibfilepath)) os.remove(system_path(bibfilepath))
@ -106,16 +111,16 @@ class FileBroker(object):
:param meta_check: if True, will return if both the bibtex and the meta file exists. :param meta_check: if True, will return if both the bibtex and the meta file exists.
""" """
does_exists = check_file(os.path.join(self.bibdir, citekey + '.bib'), fail=False) does_exists = check_file(self.bib_path(citekey), fail=False)
if meta_check: if meta_check:
meta_exists = check_file(os.path.join(self.metadir, citekey + '.yaml'), fail=False) meta_exists = check_file(self.meta_path(citekey), fail=False)
does_exists = does_exists and meta_exists does_exists = does_exists and meta_exists
return does_exists return does_exists
def listing(self, filestats=True): def listing(self, filestats=True):
metafiles = [] metafiles = []
for filename in os.listdir(system_path(self.metadir)): for filename in os.listdir(system_path(self.metadir)):
citekey = filter_filename(filename, '.yaml') citekey = filter_filename(filename, META_EXT)
if citekey is not None: if citekey is not None:
if filestats: if filestats:
stats = os.stat(system_path(os.path.join(self.metadir, filename))) stats = os.stat(system_path(os.path.join(self.metadir, filename)))
@ -125,7 +130,7 @@ class FileBroker(object):
bibfiles = [] bibfiles = []
for filename in os.listdir(system_path(self.bibdir)): for filename in os.listdir(system_path(self.bibdir)):
citekey = filter_filename(filename, '.bib') citekey = filter_filename(filename, BIB_EXT)
if citekey is not None: if citekey is not None:
if filestats: if filestats:
stats = os.stat(system_path(os.path.join(self.bibdir, filename))) stats = os.stat(system_path(os.path.join(self.bibdir, filename)))

@ -39,6 +39,7 @@ else:
# for test_usecase. # for test_usecase.
def _get_raw_stdout(): def _get_raw_stdout():
return sys.stdout.buffer return sys.stdout.buffer
def _get_raw_stderr(): def _get_raw_stderr():
return sys.stderr.buffer return sys.stderr.buffer

@ -110,8 +110,9 @@ class Repository(object):
self.databroker.remove_note(citekey, self.conf['main']['note_extension'], self.databroker.remove_note(citekey, self.conf['main']['note_extension'],
silent=True) silent=True)
except IOError: except IOError:
pass # FIXME: if IOError is about being unable to # FIXME: if IOError is about being unable to
# remove the file, we need to issue an error. # remove the file, we need to issue an error.
pass
self.citekeys.remove(citekey) self.citekeys.remove(citekey)
self.databroker.remove(citekey) self.databroker.remove(citekey)
@ -126,16 +127,18 @@ class Repository(object):
p.docpath = None p.docpath = None
self.push_paper(p, overwrite=True, event=False) self.push_paper(p, overwrite=True, event=False)
except IOError: except IOError:
pass # FIXME: if IOError is about being unable to # FIXME: if IOError is about being unable to
# remove the file, we need to issue an error.I # remove the file, we need to issue an error.I
pass
def pull_docpath(self, citekey): def pull_docpath(self, citekey):
try: try:
p = self.pull_paper(citekey) p = self.pull_paper(citekey)
return self.databroker.real_docpath(p.docpath) return self.databroker.real_docpath(p.docpath)
except IOError: except IOError:
pass # FIXME: if IOError is about being unable to # FIXME: if IOError is about being unable to
# remove the file, we need to issue an error.I # remove the file, we need to issue an error.I
pass
def rename_paper(self, paper, new_citekey=None, old_citekey=None): def rename_paper(self, paper, new_citekey=None, old_citekey=None):
if old_citekey is None: if old_citekey is None:

@ -0,0 +1,64 @@
# coding: utf8
from __future__ import unicode_literals
import unittest
import dotdot
from pubs.p3 import ustr
from pubs.endecoder import EnDecoder
from pubs.apis import doi2bibtex, isbn2bibtex
class TestDOI2Bibtex(unittest.TestCase):
def setUp(self):
self.endecoder = EnDecoder()
def test_unicode(self):
bib = doi2bibtex('10.1007/BF01700692')
self.assertIsInstance(bib, ustr)
self.assertIn('Kurt Gödel', bib)
def test_parses_to_bibtex(self):
bib = doi2bibtex('10.1007/BF01700692')
b = self.endecoder.decode_bibdata(bib)
self.assertEqual(len(b), 1)
entry = b[list(b)[0]]
self.assertEqual(entry['author'][0], 'Gödel, Kurt')
self.assertEqual(entry['title'],
'Über formal unentscheidbare Sätze der Principia '
'Mathematica und verwandter Systeme I')
def test_parse_fails_on_incorrect_DOI(self):
bib = doi2bibtex('999999')
with self.assertRaises(ValueError):
self.endecoder.decode_bibdata(bib)
class TestISBN2Bibtex(unittest.TestCase):
def setUp(self):
self.endecoder = EnDecoder()
def test_unicode(self):
bib = isbn2bibtex('9782081336742')
self.assertIsInstance(bib, ustr)
self.assertIn('Poincaré, Henri', bib)
def test_parses_to_bibtex(self):
bib = isbn2bibtex('9782081336742')
b = self.endecoder.decode_bibdata(bib)
self.assertEqual(len(b), 1)
entry = b[list(b)[0]]
self.assertEqual(entry['author'][0], 'Poincaré, Henri')
self.assertEqual(entry['title'], 'La science et l\'hypothèse')
def test_parse_fails_on_incorrect_ISBN(self):
bib = doi2bibtex('9' * 13)
with self.assertRaises(ValueError):
self.endecoder.decode_bibdata(bib)
# Note: apparently ottobib.com uses caracter modifiers for accents instead
# of the correct unicode characters. TODO: Should we convert them?
Loading…
Cancel
Save