diff --git a/papers/bibstruct.py b/papers/bibstruct.py index 8b9f317..6b05ce6 100644 --- a/papers/bibstruct.py +++ b/papers/bibstruct.py @@ -67,16 +67,19 @@ def extract_docfile(bibdata, remove=False): citekey, entry = get_entry(bibdata) try: - field = entry.fields['file'] - # Check if this is mendeley specific - for f in field.split(':'): - if len(f) > 0: - break - if remove: - entry.fields.pop('file') - # This is a hck for Mendeley. Make clean - if f[0] != '/': - f = '/' + f - return f + if 'file' in entry.fields: + field = entry.fields['file'] + # Check if this is mendeley specific + for f in field.split(':'): + if len(f) > 0: + break + if remove: + entry.fields.pop('file') + # This is a hck for Mendeley. Make clean + if f[0] != '/': + f = '/' + f + return f + if 'attachments' in entry.fields: + return entry.fields['attachments'] except (KeyError, IndexError): return None diff --git a/papers/commands/add_cmd.py b/papers/commands/add_cmd.py index f35301d..6b80e83 100644 --- a/papers/commands/add_cmd.py +++ b/papers/commands/add_cmd.py @@ -89,7 +89,7 @@ def command(args): if copy_doc is None: copy_doc = config().import_copy if copy_doc: - docfile = rp.databroker.copy_doc(citekey, docfile) + docfile = rp.databroker.copy_doc(citekey, docfile) # create the paper diff --git a/papers/content.py b/papers/content.py index 737653b..113614b 100644 --- a/papers/content.py +++ b/papers/content.py @@ -1,6 +1,12 @@ import os import subprocess import tempfile +import shutil + +import urlparse +import httplib +import urllib2 + # files i/o @@ -38,9 +44,34 @@ def write_file(filepath, data): # dealing with formatless content +def content_type(path): + parsed = urlparse.urlparse(path) + if parsed.scheme == 'http': + return 'url' + else: + return 'file' + +def url_exists(url): + parsed = urlparse.urlparse(url) + conn = httplib.HTTPConnection(parsed.netloc) + conn.request('HEAD', parsed.path) + response = conn.getresponse() + conn.close() + return response.status == 200 + + +def check_content(path): + if content_type(path) == 'url': + return url_exists(path) + else: + return check_file(path) + def get_content(path): """Will be useful when we need to get content from url""" - return read_file(path) + if content_type(path) == 'url': + return urllib2.urlopen(path) + else: + return read_file(path) def move_content(source, target, overwrite = False): if source == target: diff --git a/papers/filebroker.py b/papers/filebroker.py index a723ab9..665e1cf 100644 --- a/papers/filebroker.py +++ b/papers/filebroker.py @@ -4,6 +4,7 @@ import re import urlparse from .content import check_file, check_directory, read_file, write_file +from .content import check_content, content_type, get_content def filter_filename(filename, ext): """ Return the filename without the extension if the extension matches ext. @@ -140,6 +141,8 @@ class DocBroker(object): docpath = os.path.join(self.docdir, parsed.netloc) else: docpath = os.path.join(self.docdir, parsed.netloc, parsed.path[1:]) + elif content_type(docpath) != 'file': + return docpath return os.path.normpath(os.path.abspath(docpath)) def add_doc(self, citekey, source_path, overwrite=False): @@ -151,13 +154,16 @@ class DocBroker(object): :return: the above location """ full_source_path = self.real_docpath(source_path) - check_file(full_source_path) + check_content(full_source_path) target_path = '{}://{}'.format(self.scheme, citekey + os.path.splitext(source_path)[-1]) full_target_path = self.real_docpath(target_path) if not overwrite and check_file(full_target_path, fail=False): raise IOError('{} file exists.'.format(full_target_path)) - shutil.copy(full_source_path, full_target_path) + + doc_content = get_content(full_source_path) + with open(full_target_path, 'wb') as f: + f.write(doc_content.read()) return target_path