added support of url (as long as they begin in http://) for docfiles

2013-11-13 22:06:41 +01:00 · 2013-11-13 22:06:41 +01:00 · b03b899c5a
commit b03b899c5a
parent 0fc3df98fe
4 changed files with 55 additions and 15 deletions
--- a/papers/bibstruct.py
+++ b/papers/bibstruct.py
@ -67,16 +67,19 @@ def extract_docfile(bibdata, remove=False):
    citekey, entry = get_entry(bibdata)

    try:
-        field = entry.fields['file']
-        # Check if this is mendeley specific
-        for f in field.split(':'):
-            if len(f) > 0:
-                break
-        if remove:
-            entry.fields.pop('file')
-        # This is a hck for Mendeley. Make clean
-        if f[0] != '/':
-            f = '/' + f
-        return f
+        if 'file' in entry.fields:
+            field = entry.fields['file']
+            # Check if this is mendeley specific
+            for f in field.split(':'):
+                if len(f) > 0:
+                    break
+            if remove:
+                entry.fields.pop('file')
+            # This is a hck for Mendeley. Make clean
+            if f[0] != '/':
+                f = '/' + f
+            return f
+        if 'attachments' in entry.fields:
+            return entry.fields['attachments']
    except (KeyError, IndexError):
        return None
--- a/papers/commands/add_cmd.py
+++ b/papers/commands/add_cmd.py
@ -89,7 +89,7 @@ def command(args):
        if copy_doc is None:
            copy_doc = config().import_copy
        if copy_doc:
-            docfile = rp.databroker.copy_doc(citekey, docfile) 
+            docfile = rp.databroker.copy_doc(citekey, docfile)

    # create the paper

--- a/papers/content.py
+++ b/papers/content.py
@ -1,6 +1,12 @@
 import os
 import subprocess
 import tempfile
+import shutil
+
+import urlparse
+import httplib
+import urllib2
+

    # files i/o

@ -38,9 +44,34 @@ def write_file(filepath, data):

    # dealing with formatless content

+def content_type(path):
+    parsed = urlparse.urlparse(path)
+    if parsed.scheme == 'http':
+        return 'url'
+    else:
+        return 'file'
+
+def url_exists(url):
+    parsed = urlparse.urlparse(url)
+    conn = httplib.HTTPConnection(parsed.netloc)
+    conn.request('HEAD', parsed.path)
+    response = conn.getresponse()
+    conn.close()
+    return response.status == 200
+
+    
+def check_content(path):
+    if content_type(path) == 'url':
+        return url_exists(path)
+    else:
+        return check_file(path)
+
 def get_content(path):
    """Will be useful when we need to get content from url"""
-    return read_file(path)
+    if content_type(path) == 'url':
+        return urllib2.urlopen(path)
+    else:
+        return read_file(path)

 def move_content(source, target, overwrite = False):
    if source == target:
--- a/papers/filebroker.py
+++ b/papers/filebroker.py
@ -4,6 +4,7 @@ import re
 import urlparse

 from .content import check_file, check_directory, read_file, write_file
+from .content import check_content, content_type, get_content

 def filter_filename(filename, ext):
    """ Return the filename without the extension if the extension matches ext.
@ -140,6 +141,8 @@ class DocBroker(object):
                docpath = os.path.join(self.docdir, parsed.netloc)
            else:
                docpath = os.path.join(self.docdir, parsed.netloc, parsed.path[1:])
+        elif content_type(docpath) != 'file':
+            return docpath
        return os.path.normpath(os.path.abspath(docpath))

    def add_doc(self, citekey, source_path, overwrite=False):
@ -151,13 +154,16 @@ class DocBroker(object):
            :return: the above location
        """
        full_source_path = self.real_docpath(source_path)
-        check_file(full_source_path)
+        check_content(full_source_path)

        target_path = '{}://{}'.format(self.scheme, citekey + os.path.splitext(source_path)[-1])
        full_target_path = self.real_docpath(target_path)
        if not overwrite and check_file(full_target_path, fail=False):
            raise IOError('{} file exists.'.format(full_target_path))
-        shutil.copy(full_source_path, full_target_path)
+        
+        doc_content = get_content(full_source_path)
+        with open(full_target_path, 'wb') as f:
+            f.write(doc_content.read())

        return target_path