added support of url (as long as they begin in http://) for docfiles

main
humm 11 years ago
parent 0fc3df98fe
commit b03b899c5a

@ -67,16 +67,19 @@ def extract_docfile(bibdata, remove=False):
citekey, entry = get_entry(bibdata)
try:
field = entry.fields['file']
# Check if this is mendeley specific
for f in field.split(':'):
if len(f) > 0:
break
if remove:
entry.fields.pop('file')
# This is a hck for Mendeley. Make clean
if f[0] != '/':
f = '/' + f
return f
if 'file' in entry.fields:
field = entry.fields['file']
# Check if this is mendeley specific
for f in field.split(':'):
if len(f) > 0:
break
if remove:
entry.fields.pop('file')
# This is a hck for Mendeley. Make clean
if f[0] != '/':
f = '/' + f
return f
if 'attachments' in entry.fields:
return entry.fields['attachments']
except (KeyError, IndexError):
return None

@ -89,7 +89,7 @@ def command(args):
if copy_doc is None:
copy_doc = config().import_copy
if copy_doc:
docfile = rp.databroker.copy_doc(citekey, docfile)
docfile = rp.databroker.copy_doc(citekey, docfile)
# create the paper

@ -1,6 +1,12 @@
import os
import subprocess
import tempfile
import shutil
import urlparse
import httplib
import urllib2
# files i/o
@ -38,9 +44,34 @@ def write_file(filepath, data):
# dealing with formatless content
def content_type(path):
parsed = urlparse.urlparse(path)
if parsed.scheme == 'http':
return 'url'
else:
return 'file'
def url_exists(url):
parsed = urlparse.urlparse(url)
conn = httplib.HTTPConnection(parsed.netloc)
conn.request('HEAD', parsed.path)
response = conn.getresponse()
conn.close()
return response.status == 200
def check_content(path):
if content_type(path) == 'url':
return url_exists(path)
else:
return check_file(path)
def get_content(path):
"""Will be useful when we need to get content from url"""
return read_file(path)
if content_type(path) == 'url':
return urllib2.urlopen(path)
else:
return read_file(path)
def move_content(source, target, overwrite = False):
if source == target:

@ -4,6 +4,7 @@ import re
import urlparse
from .content import check_file, check_directory, read_file, write_file
from .content import check_content, content_type, get_content
def filter_filename(filename, ext):
""" Return the filename without the extension if the extension matches ext.
@ -140,6 +141,8 @@ class DocBroker(object):
docpath = os.path.join(self.docdir, parsed.netloc)
else:
docpath = os.path.join(self.docdir, parsed.netloc, parsed.path[1:])
elif content_type(docpath) != 'file':
return docpath
return os.path.normpath(os.path.abspath(docpath))
def add_doc(self, citekey, source_path, overwrite=False):
@ -151,13 +154,16 @@ class DocBroker(object):
:return: the above location
"""
full_source_path = self.real_docpath(source_path)
check_file(full_source_path)
check_content(full_source_path)
target_path = '{}://{}'.format(self.scheme, citekey + os.path.splitext(source_path)[-1])
full_target_path = self.real_docpath(target_path)
if not overwrite and check_file(full_target_path, fail=False):
raise IOError('{} file exists.'.format(full_target_path))
shutil.copy(full_source_path, full_target_path)
doc_content = get_content(full_source_path)
with open(full_target_path, 'wb') as f:
f.write(doc_content.read())
return target_path

Loading…
Cancel
Save