added support of url (as long as they begin in http://) for docfiles

2013-11-13 22:06:41 +01:00 · 2013-11-13 22:06:41 +01:00 · b03b899c5a
commit b03b899c5a
parent 0fc3df98fe
4 changed files with 55 additions and 15 deletions
--- a/papers/bibstruct.py
+++ b/papers/bibstruct.py
@ -67,16 +67,19 @@ def extract_docfile(bibdata, remove=False):
    citekey, entry = get_entry(bibdata)
    try:
-        field = entry.fields['file']
+        if 'file' in entry.fields:
-        # Check if this is mendeley specific
+            field = entry.fields['file']
-        for f in field.split(':'):
+            # Check if this is mendeley specific
-            if len(f) > 0:
+            for f in field.split(':'):
-                break
+                if len(f) > 0:
-        if remove:
+                    break
-            entry.fields.pop('file')
+            if remove:
-        # This is a hck for Mendeley. Make clean
+                entry.fields.pop('file')
-        if f[0] != '/':
+            # This is a hck for Mendeley. Make clean
-            f = '/' + f
+            if f[0] != '/':
-        return f
+                f = '/' + f
            return f
        if 'attachments' in entry.fields:
            return entry.fields['attachments']
    except (KeyError, IndexError):
        return None
--- a/papers/commands/add_cmd.py
+++ b/papers/commands/add_cmd.py
@ -89,7 +89,7 @@ def command(args):
        if copy_doc is None:
            copy_doc = config().import_copy
        if copy_doc:
-            docfile = rp.databroker.copy_doc(citekey, docfile) 
+            docfile = rp.databroker.copy_doc(citekey, docfile)
    # create the paper
--- a/papers/content.py
+++ b/papers/content.py
@ -1,6 +1,12 @@
 import os
 import subprocess
 import tempfile
 import shutil
 import urlparse
 import httplib
 import urllib2
    # files i/o
@ -38,9 +44,34 @@ def write_file(filepath, data):
    # dealing with formatless content
 def content_type(path):
    parsed = urlparse.urlparse(path)
    if parsed.scheme == 'http':
        return 'url'
    else:
        return 'file'
 def url_exists(url):
    parsed = urlparse.urlparse(url)
    conn = httplib.HTTPConnection(parsed.netloc)
    conn.request('HEAD', parsed.path)
    response = conn.getresponse()
    conn.close()
    return response.status == 200
 def check_content(path):
    if content_type(path) == 'url':
        return url_exists(path)
    else:
        return check_file(path)
 def get_content(path):
    """Will be useful when we need to get content from url"""
-    return read_file(path)
+    if content_type(path) == 'url':
        return urllib2.urlopen(path)
    else:
        return read_file(path)
 def move_content(source, target, overwrite = False):
    if source == target:
--- a/papers/filebroker.py
+++ b/papers/filebroker.py
@ -4,6 +4,7 @@ import re
 import urlparse
 from .content import check_file, check_directory, read_file, write_file
 from .content import check_content, content_type, get_content
 def filter_filename(filename, ext):
    """ Return the filename without the extension if the extension matches ext.
@ -140,6 +141,8 @@ class DocBroker(object):
                docpath = os.path.join(self.docdir, parsed.netloc)
            else:
                docpath = os.path.join(self.docdir, parsed.netloc, parsed.path[1:])
        elif content_type(docpath) != 'file':
            return docpath
        return os.path.normpath(os.path.abspath(docpath))
    def add_doc(self, citekey, source_path, overwrite=False):
@ -151,13 +154,16 @@ class DocBroker(object):
            :return: the above location
        """
        full_source_path = self.real_docpath(source_path)
-        check_file(full_source_path)
+        check_content(full_source_path)
        target_path = '{}://{}'.format(self.scheme, citekey + os.path.splitext(source_path)[-1])
        full_target_path = self.real_docpath(target_path)
        if not overwrite and check_file(full_target_path, fail=False):
            raise IOError('{} file exists.'.format(full_target_path))
-        shutil.copy(full_source_path, full_target_path)
+        
        doc_content = get_content(full_source_path)
        with open(full_target_path, 'wb') as f:
            f.write(doc_content.read())
        return target_path