added support of url (as long as they begin in http://) for docfiles
This commit is contained in:
parent
0fc3df98fe
commit
b03b899c5a
@ -67,16 +67,19 @@ def extract_docfile(bibdata, remove=False):
|
|||||||
citekey, entry = get_entry(bibdata)
|
citekey, entry = get_entry(bibdata)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
field = entry.fields['file']
|
if 'file' in entry.fields:
|
||||||
# Check if this is mendeley specific
|
field = entry.fields['file']
|
||||||
for f in field.split(':'):
|
# Check if this is mendeley specific
|
||||||
if len(f) > 0:
|
for f in field.split(':'):
|
||||||
break
|
if len(f) > 0:
|
||||||
if remove:
|
break
|
||||||
entry.fields.pop('file')
|
if remove:
|
||||||
# This is a hck for Mendeley. Make clean
|
entry.fields.pop('file')
|
||||||
if f[0] != '/':
|
# This is a hck for Mendeley. Make clean
|
||||||
f = '/' + f
|
if f[0] != '/':
|
||||||
return f
|
f = '/' + f
|
||||||
|
return f
|
||||||
|
if 'attachments' in entry.fields:
|
||||||
|
return entry.fields['attachments']
|
||||||
except (KeyError, IndexError):
|
except (KeyError, IndexError):
|
||||||
return None
|
return None
|
||||||
|
@ -89,7 +89,7 @@ def command(args):
|
|||||||
if copy_doc is None:
|
if copy_doc is None:
|
||||||
copy_doc = config().import_copy
|
copy_doc = config().import_copy
|
||||||
if copy_doc:
|
if copy_doc:
|
||||||
docfile = rp.databroker.copy_doc(citekey, docfile)
|
docfile = rp.databroker.copy_doc(citekey, docfile)
|
||||||
|
|
||||||
# create the paper
|
# create the paper
|
||||||
|
|
||||||
|
@ -1,6 +1,12 @@
|
|||||||
import os
|
import os
|
||||||
import subprocess
|
import subprocess
|
||||||
import tempfile
|
import tempfile
|
||||||
|
import shutil
|
||||||
|
|
||||||
|
import urlparse
|
||||||
|
import httplib
|
||||||
|
import urllib2
|
||||||
|
|
||||||
|
|
||||||
# files i/o
|
# files i/o
|
||||||
|
|
||||||
@ -38,9 +44,34 @@ def write_file(filepath, data):
|
|||||||
|
|
||||||
# dealing with formatless content
|
# dealing with formatless content
|
||||||
|
|
||||||
|
def content_type(path):
|
||||||
|
parsed = urlparse.urlparse(path)
|
||||||
|
if parsed.scheme == 'http':
|
||||||
|
return 'url'
|
||||||
|
else:
|
||||||
|
return 'file'
|
||||||
|
|
||||||
|
def url_exists(url):
|
||||||
|
parsed = urlparse.urlparse(url)
|
||||||
|
conn = httplib.HTTPConnection(parsed.netloc)
|
||||||
|
conn.request('HEAD', parsed.path)
|
||||||
|
response = conn.getresponse()
|
||||||
|
conn.close()
|
||||||
|
return response.status == 200
|
||||||
|
|
||||||
|
|
||||||
|
def check_content(path):
|
||||||
|
if content_type(path) == 'url':
|
||||||
|
return url_exists(path)
|
||||||
|
else:
|
||||||
|
return check_file(path)
|
||||||
|
|
||||||
def get_content(path):
|
def get_content(path):
|
||||||
"""Will be useful when we need to get content from url"""
|
"""Will be useful when we need to get content from url"""
|
||||||
return read_file(path)
|
if content_type(path) == 'url':
|
||||||
|
return urllib2.urlopen(path)
|
||||||
|
else:
|
||||||
|
return read_file(path)
|
||||||
|
|
||||||
def move_content(source, target, overwrite = False):
|
def move_content(source, target, overwrite = False):
|
||||||
if source == target:
|
if source == target:
|
||||||
|
@ -4,6 +4,7 @@ import re
|
|||||||
import urlparse
|
import urlparse
|
||||||
|
|
||||||
from .content import check_file, check_directory, read_file, write_file
|
from .content import check_file, check_directory, read_file, write_file
|
||||||
|
from .content import check_content, content_type, get_content
|
||||||
|
|
||||||
def filter_filename(filename, ext):
|
def filter_filename(filename, ext):
|
||||||
""" Return the filename without the extension if the extension matches ext.
|
""" Return the filename without the extension if the extension matches ext.
|
||||||
@ -140,6 +141,8 @@ class DocBroker(object):
|
|||||||
docpath = os.path.join(self.docdir, parsed.netloc)
|
docpath = os.path.join(self.docdir, parsed.netloc)
|
||||||
else:
|
else:
|
||||||
docpath = os.path.join(self.docdir, parsed.netloc, parsed.path[1:])
|
docpath = os.path.join(self.docdir, parsed.netloc, parsed.path[1:])
|
||||||
|
elif content_type(docpath) != 'file':
|
||||||
|
return docpath
|
||||||
return os.path.normpath(os.path.abspath(docpath))
|
return os.path.normpath(os.path.abspath(docpath))
|
||||||
|
|
||||||
def add_doc(self, citekey, source_path, overwrite=False):
|
def add_doc(self, citekey, source_path, overwrite=False):
|
||||||
@ -151,13 +154,16 @@ class DocBroker(object):
|
|||||||
:return: the above location
|
:return: the above location
|
||||||
"""
|
"""
|
||||||
full_source_path = self.real_docpath(source_path)
|
full_source_path = self.real_docpath(source_path)
|
||||||
check_file(full_source_path)
|
check_content(full_source_path)
|
||||||
|
|
||||||
target_path = '{}://{}'.format(self.scheme, citekey + os.path.splitext(source_path)[-1])
|
target_path = '{}://{}'.format(self.scheme, citekey + os.path.splitext(source_path)[-1])
|
||||||
full_target_path = self.real_docpath(target_path)
|
full_target_path = self.real_docpath(target_path)
|
||||||
if not overwrite and check_file(full_target_path, fail=False):
|
if not overwrite and check_file(full_target_path, fail=False):
|
||||||
raise IOError('{} file exists.'.format(full_target_path))
|
raise IOError('{} file exists.'.format(full_target_path))
|
||||||
shutil.copy(full_source_path, full_target_path)
|
|
||||||
|
doc_content = get_content(full_source_path)
|
||||||
|
with open(full_target_path, 'wb') as f:
|
||||||
|
f.write(doc_content.read())
|
||||||
|
|
||||||
return target_path
|
return target_path
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user