Prechádzať zdrojové kódy

Improved utf-8 support on non darwin platforms

Jonas Borgström 14 rokov pred
rodič
commit
ddade8384c
3 zmenil súbory, kde vykonal 21 pridanie a 11 odobranie
  1. 5 4
      darc/archive.py
  2. 3 7
      darc/archiver.py
  3. 13 0
      darc/helpers.py

+ 5 - 4
darc/archive.py

@@ -11,7 +11,8 @@ from xattr import xattr, XATTR_NOFOLLOW
 
 from . import NS_ARCHIVE_METADATA, NS_CHUNK
 from ._speedups import chunkify
-from .helpers import uid2user, user2uid, gid2group, group2gid, IntegrityError, Counter
+from .helpers import uid2user, user2uid, gid2group, group2gid, IntegrityError, \
+    Counter, encode_filename
 
 CHUNK_SIZE = 64 * 1024
 WINDOW_SIZE = 4096
@@ -142,7 +143,7 @@ class Archive(object):
         dest = dest or os.getcwdu()
         dir_stat_queue = []
         assert item['path'][0] not in ('/', '\\', ':')
-        path = os.path.join(dest, item['path'].decode('utf-8'))
+        path = os.path.join(dest, encode_filename(item['path']))
         mode = item['mode']
         if stat.S_ISDIR(mode):
             if not os.path.exists(path):
@@ -166,7 +167,7 @@ class Archive(object):
                 os.makedirs(os.path.dirname(path))
             # Hard link?
             if 'source' in item:
-                source = os.path.join(dest, item['source'].decode('utf-8'))
+                source = os.path.join(dest, item['source'])
                 if os.path.exists(path):
                     os.unlink(path)
                 os.link(source, path)
@@ -310,7 +311,7 @@ class Archive(object):
                 return
             else:
                 self.hard_links[st.st_ino, st.st_dev] = safe_path
-        path_hash = self.key.id_hash(path.encode('utf-8'))
+        path_hash = self.key.id_hash(path)
         ids = cache.file_known_and_unchanged(path_hash, st)
         chunks = None
         if ids is not None:

+ 3 - 7
darc/archiver.py

@@ -27,16 +27,12 @@ class Archiver(object):
 
     def print_error(self, msg, *args):
         msg = args and msg % args or msg
-        if hasattr(sys.stderr, 'encoding'):
-            msg = msg.encode(sys.stderr.encoding or 'utf-8', 'ignore')
         self.exit_code = 1
         print >> sys.stderr, msg
 
     def print_verbose(self, msg, *args, **kw):
         if self.verbose:
             msg = args and msg % args or msg
-            if hasattr(sys.stdout, 'encoding'):
-                msg = msg.encode(sys.stdout.encoding or 'utf-8', 'ignore')
             if kw.get('newline', True):
                 print msg
             else:
@@ -78,7 +74,7 @@ class Archiver(object):
             except IOError:
                 pass
         for path in args.paths:
-            self._process(archive, cache, args.patterns, skip_inodes, unicode(path))
+            self._process(archive, cache, args.patterns, skip_inodes, path)
         archive.save(args.archive.archive, cache)
         return self.exit_code
 
@@ -117,7 +113,7 @@ class Archiver(object):
 
     def do_extract(self, args):
         def start_cb(item):
-            self.print_verbose(item['path'].decode('utf-8'))
+            self.print_verbose(item['path'])
         def extract_cb(item):
             if exclude_path(item['path'], args.patterns):
                 return
@@ -185,7 +181,7 @@ class Archiver(object):
         key = Key(store)
         archive = Archive(store, key, args.archive.archive)
         def start_cb(item):
-            self.print_verbose('%s ...', item['path'].decode('utf-8'), newline=False)
+            self.print_verbose('%s ...', item['path'], newline=False)
         def result_cb(item, success):
             if success:
                 self.print_verbose('OK')

+ 13 - 0
darc/helpers.py

@@ -8,7 +8,20 @@ import pwd
 import re
 import stat
 import struct
+import sys
 import time
+import urllib
+
+# OSX filenames are UTF-8 Only so any non-utf8 filenames are url encoded
+if sys.platform == 'darwin':
+    def encode_filename(name):
+        try:
+            name.decode('utf-8')
+            return name
+        except UnicodeDecodeError:
+            return urllib.quote(name)
+else:
+    encode_filename = str
 
 class Counter(object):