Browse Source

Port to Python 3.2+

Jonas Borgström 12 năm trước cách đây
mục cha
commit
1fdc5eabc6
12 tập tin đã thay đổi với 356 bổ sung340 xóa
  1. 16 3
      darc/_chunker.c
  2. 86 85
      darc/archive.py
  3. 48 47
      darc/archiver.py
  4. 20 19
      darc/cache.py
  5. 2 1
      darc/hashindex.pyx
  6. 20 21
      darc/helpers.py
  7. 57 57
      darc/key.py
  8. 1 2
      darc/lrucache.py
  9. 16 14
      darc/remote.py
  10. 57 51
      darc/store.py
  11. 31 33
      darc/test.py
  12. 2 7
      setup.py

+ 16 - 3
darc/_chunker.c

@@ -122,14 +122,27 @@ chunker_fill(Chunker *c)
     if(!data) {
         return 0;
     }
-    int n = PyString_Size(data);
-    memcpy(c->data + c->position + c->remaining, PyString_AsString(data), n);
+    int n = PyBytes_Size(data);
+    memcpy(c->data + c->position + c->remaining, PyBytes_AsString(data), n);
     c->remaining += n;
     c->bytes_read += n;
     Py_DECREF(data);
     return 1;
 }
 
+PyObject *
+PyBuffer_FromMemory(void *data, Py_ssize_t len)
+{
+    Py_buffer buffer;
+    PyObject *mv;
+
+    PyBuffer_FillInfo(&buffer, NULL, data, len, 1, PyBUF_CONTIG_RO);
+    mv = PyMemoryView_FromBuffer(&buffer);
+    PyBuffer_Release(&buffer);
+    return mv;
+}
+
+
 static PyObject *
 chunker_process(Chunker *c)
 {
@@ -186,4 +199,4 @@ chunker_process(Chunker *c)
     c->bytes_yielded += n;
     return PyBuffer_FromMemory(c->data + old_last, n);
     
-}
+}

+ 86 - 85
darc/archive.py

@@ -1,27 +1,26 @@
-from __future__ import with_statement
 from datetime import datetime, timedelta
 from getpass import getuser
-from itertools import izip_longest
+from itertools import zip_longest
 import msgpack
 import os
 import socket
 import stat
 import sys
 import time
-from cStringIO import StringIO
-from xattr import xattr, XATTR_NOFOLLOW
+from io import BytesIO
+import xattr
 
 from .chunker import chunkify
 from .helpers import uid2user, user2uid, gid2group, group2gid, \
-    encode_filename, Statistics
+    Statistics, decode_dict
 
 ITEMS_BUFFER = 1024 * 1024
 CHUNK_MIN = 1024
 WINDOW_SIZE = 0xfff
 CHUNK_MASK = 0xffff
 
-have_lchmod = hasattr(os, 'lchmod')
-linux = sys.platform == 'linux2'
+utime_supports_fd = os.utime in getattr(os, 'supports_fd', {})
+has_lchmod = hasattr(os, 'lchmod')
 
 
 class ItemIter(object):
@@ -39,20 +38,20 @@ class ItemIter(object):
     def __iter__(self):
         return self
 
-    def next(self):
+    def __next__(self):
         if self.stack:
             item = self.stack.pop(0)
         else:
             self._peek = None
             item = self.get_next()
-        self.peeks = max(0, self.peeks - len(item.get('chunks', [])))
+        self.peeks = max(0, self.peeks - len(item.get(b'chunks', [])))
         return item
 
     def get_next(self):
-        next = self.unpacker.next()
-        while self.filter and not self.filter(next):
-            next = self.unpacker.next()
-        return next
+        n = next(self.unpacker)
+        while self.filter and not self.filter(n):
+            n = next(self.unpacker)
+        return n
 
     def peek(self):
         while True:
@@ -61,12 +60,12 @@ class ItemIter(object):
                     raise StopIteration
                 self._peek = self.get_next()
                 self.stack.append(self._peek)
-                if 'chunks' in self._peek:
-                    self._peek_iter = iter(self._peek['chunks'])
+                if b'chunks' in self._peek:
+                    self._peek_iter = iter(self._peek[b'chunks'])
                 else:
                     self._peek_iter = None
             try:
-                item = self._peek_iter.next()
+                item = next(self._peek_iter)
                 self.peeks += 1
                 return item
             except StopIteration:
@@ -83,15 +82,12 @@ class Archive(object):
 
     def __init__(self, store, key, manifest, name, cache=None, create=False,
                  checkpoint_interval=300, numeric_owner=False):
-        if sys.platform == 'darwin':
-            self.cwd = os.getcwdu()
-        else:
-            self.cwd = os.getcwd()
+        self.cwd = os.getcwd()
         self.key = key
         self.store = store
         self.cache = cache
         self.manifest = manifest
-        self.items = StringIO()
+        self.items = BytesIO()
         self.items_ids = []
         self.hard_links = {}
         self.stats = Statistics()
@@ -112,20 +108,22 @@ class Archive(object):
             if name not in self.manifest.archives:
                 raise self.DoesNotExist(name)
             info = self.manifest.archives[name]
-            self.load(info['id'])
+            self.load(info[b'id'])
 
     def load(self, id):
         self.id = id
         data = self.key.decrypt(self.id, self.store.get(self.id))
         self.metadata = msgpack.unpackb(data)
-        if self.metadata['version'] != 1:
+        if self.metadata[b'version'] != 1:
             raise Exception('Unknown archive metadata version')
-        self.name = self.metadata['name']
+        decode_dict(self.metadata, (b'name', b'hostname', b'username', b'time'))
+        self.metadata[b'cmdline'] = [arg.decode('utf-8', 'surrogateescape') for arg in self.metadata[b'cmdline']]
+        self.name = self.metadata[b'name']
 
     @property
     def ts(self):
         """Timestamp of archive creation in UTC"""
-        t, f = self.metadata['time'].split('.', 1)
+        t, f = self.metadata[b'time'].split('.', 1)
         return datetime.strptime(t, '%Y-%m-%dT%H:%M:%S') + timedelta(seconds=float('.' + f))
 
     def __repr__(self):
@@ -136,18 +134,19 @@ class Archive(object):
         i = 0
         n = 20
         while True:
-            items = self.metadata['items'][i:i + n]
+            items = self.metadata[b'items'][i:i + n]
             i += n
             if not items:
                 break
-            for id, chunk in [(id, chunk) for id, chunk in izip_longest(items, self.store.get_many(items))]:
+            for id, chunk in [(id, chunk) for id, chunk in zip_longest(items, self.store.get_many(items))]:
                 unpacker.feed(self.key.decrypt(id, chunk))
                 iter = ItemIter(unpacker, filter)
                 for item in iter:
+                    decode_dict(item, (b'path', b'source', b'user', b'group'))
                     yield item, iter.peek
 
     def add_item(self, item):
-        self.items.write(msgpack.packb(item))
+        self.items.write(msgpack.packb(item, unicode_errors='surrogateescape'))
         now = time.time()
         if now - self.last_checkpoint > self.checkpoint_interval:
             self.last_checkpoint = now
@@ -159,7 +158,7 @@ class Archive(object):
         if self.items.tell() == 0:
             return
         self.items.seek(0)
-        chunks = list(str(s) for s in chunkify(self.items, WINDOW_SIZE, CHUNK_MASK, CHUNK_MIN, self.key.chunk_seed))
+        chunks = list(bytes(s) for s in chunkify(self.items, WINDOW_SIZE, CHUNK_MASK, CHUNK_MIN, self.key.chunk_seed))
         self.items.seek(0)
         self.items.truncate()
         for chunk in chunks[:-1]:
@@ -190,7 +189,7 @@ class Archive(object):
             'username': getuser(),
             'time': datetime.utcnow().isoformat(),
         }
-        data = msgpack.packb(metadata)
+        data = msgpack.packb(metadata, unicode_errors='surrogateescape')
         self.id = self.key.id_hash(data)
         self.cache.add_chunk(self.id, data, self.stats)
         self.manifest.archives[name] = {'id': self.id, 'time': metadata['time']}
@@ -209,12 +208,12 @@ class Archive(object):
         cache.begin_txn()
         stats = Statistics()
         add(self.id)
-        for id, chunk in izip_longest(self.metadata['items'], self.store.get_many(self.metadata['items'])):
+        for id, chunk in zip_longest(self.metadata[b'items'], self.store.get_many(self.metadata[b'items'])):
             add(id)
             unpacker.feed(self.key.decrypt(id, chunk))
             for item in unpacker:
                 try:
-                    for id, size, csize in item['chunks']:
+                    for id, size, csize in item[b'chunks']:
                         add(id)
                     stats.nfiles += 1
                 except KeyError:
@@ -224,8 +223,8 @@ class Archive(object):
 
     def extract_item(self, item, dest=None, restore_attrs=True, peek=None):
         dest = dest or self.cwd
-        assert item['path'][0] not in ('/', '\\', ':')
-        path = os.path.join(dest, encode_filename(item['path']))
+        assert item[b'path'][:1] not in ('/', '\\', ':')
+        path = os.path.join(dest, item[b'path'])
         # Attempt to remove existing files, ignore errors on failure
         try:
             st = os.lstat(path)
@@ -235,7 +234,7 @@ class Archive(object):
                 os.unlink(path)
         except OSError:
             pass
-        mode = item['mode']
+        mode = item[b'mode']
         if stat.S_ISDIR(mode):
             if not os.path.exists(path):
                 os.makedirs(path)
@@ -245,18 +244,18 @@ class Archive(object):
             if not os.path.exists(os.path.dirname(path)):
                 os.makedirs(os.path.dirname(path))
             # Hard link?
-            if 'source' in item:
-                source = os.path.join(dest, item['source'])
+            if b'source' in item:
+                source = os.path.join(dest, item[b'source'])
                 if os.path.exists(path):
                     os.unlink(path)
                 os.link(source, path)
             else:
-                with open(path, 'wbx') as fd:
-                    ids = [id for id, size, csize in item['chunks']]
-                    for id, chunk in izip_longest(ids, self.store.get_many(ids, peek)):
+                with open(path, 'wb') as fd:
+                    ids = [id for id, size, csize in item[b'chunks']]
+                    for id, chunk in zip_longest(ids, self.store.get_many(ids, peek)):
                         data = self.key.decrypt(id, chunk)
                         fd.write(data)
-                self.restore_attrs(path, item)
+                    self.restore_attrs(path, item, fd=fd.fileno())
         elif stat.S_ISFIFO(mode):
             if not os.path.exists(os.path.dirname(path)):
                 os.makedirs(os.path.dirname(path))
@@ -265,53 +264,61 @@ class Archive(object):
         elif stat.S_ISLNK(mode):
             if not os.path.exists(os.path.dirname(path)):
                 os.makedirs(os.path.dirname(path))
-            source = item['source']
+            source = item[b'source']
             if os.path.exists(path):
                 os.unlink(path)
             os.symlink(source, path)
             self.restore_attrs(path, item, symlink=True)
         elif stat.S_ISCHR(mode) or stat.S_ISBLK(mode):
-            os.mknod(path, item['mode'], item['rdev'])
+            os.mknod(path, item[b'mode'], item[b'rdev'])
             self.restore_attrs(path, item)
         else:
-            raise Exception('Unknown archive item type %r' % item['mode'])
+            raise Exception('Unknown archive item type %r' % item[b'mode'])
 
-    def restore_attrs(self, path, item, symlink=False):
-        xattrs = item.get('xattrs')
+    def restore_attrs(self, path, item, symlink=False, fd=None):
+        xattrs = item.get(b'xattrs')
         if xattrs:
-            xa = xattr(path, XATTR_NOFOLLOW)
             for k, v in xattrs.items():
                 try:
-                    xa.set(k, v)
-                except (IOError, KeyError):
+                    xattr.set(fd or path, k, v)
+                except (EnvironmentError):
                     pass
         uid = gid = None
         if not self.numeric_owner:
-            uid = user2uid(item['user'])
-            gid = group2gid(item['group'])
-        uid = uid or item['uid']
-        gid = gid or item['gid']
+            uid = user2uid(item[b'user'])
+            gid = group2gid(item[b'group'])
+        uid = uid or item[b'uid']
+        gid = gid or item[b'gid']
+        # This code is a bit of a mess due to os specific differences
         try:
-            os.lchown(path, uid, gid)
+            if fd:
+                os.fchown(fd, uid, gid)
+            else:
+                os.lchown(path, uid, gid)
         except OSError:
             pass
-        if have_lchmod:
-            os.lchmod(path, item['mode'])
+        if fd:
+            os.fchmod(fd, item[b'mode'])
+        elif not symlink:
+            os.chmod(path, item[b'mode'])
+        elif has_lchmod:  # Not available on Linux
+            os.lchmod(path, item[b'mode'])
+        if fd and utime_supports_fd:  # Python >= 3.3
+            os.utime(fd, (item[b'mtime'], item[b'mtime']))
+        elif utime_supports_fd:  # Python >= 3.3
+            os.utime(path, (item[b'mtime'], item[b'mtime']), follow_symlinks=False)
         elif not symlink:
-            os.chmod(path, item['mode'])
-        if not symlink:
-            # FIXME: We should really call futimes here (c extension required)
-            os.utime(path, (item['mtime'], item['mtime']))
+            os.utime(path, (item[b'mtime'], item[b'mtime']))
 
     def verify_file(self, item, start, result, peek=None):
-        if not item['chunks']:
+        if not item[b'chunks']:
             start(item)
             result(item, True)
         else:
             start(item)
-            ids = [id for id, size, csize in item['chunks']]
+            ids = [id for id, size, csize in item[b'chunks']]
             try:
-                for id, chunk in izip_longest(ids, self.store.get_many(ids, peek)):
+                for id, chunk in zip_longest(ids, self.store.get_many(ids, peek)):
                     self.key.decrypt(id, chunk)
             except Exception:
                 result(item, False)
@@ -320,11 +327,11 @@ class Archive(object):
 
     def delete(self, cache):
         unpacker = msgpack.Unpacker(use_list=False)
-        for id in self.metadata['items']:
+        for id in self.metadata[b'items']:
             unpacker.feed(self.key.decrypt(id, self.store.get(id)))
             for item in unpacker:
                 try:
-                    for chunk_id, size, csize in item['chunks']:
+                    for chunk_id, size, csize in item[b'chunks']:
                         self.cache.chunk_decref(chunk_id)
                 except KeyError:
                     pass
@@ -337,40 +344,34 @@ class Archive(object):
 
     def stat_attrs(self, st, path):
         item = {
-            'mode': st.st_mode,
-            'uid': st.st_uid, 'user': uid2user(st.st_uid),
-            'gid': st.st_gid, 'group': gid2group(st.st_gid),
-            'mtime': st.st_mtime,
+            b'mode': st.st_mode,
+            b'uid': st.st_uid, b'user': uid2user(st.st_uid),
+            b'gid': st.st_gid, b'group': gid2group(st.st_gid),
+            b'mtime': st.st_mtime,
         }
         if self.numeric_owner:
-            item['user'] = item['group'] = None
+            item[b'user'] = item[b'group'] = None
         try:
-            xa = xattr(path, XATTR_NOFOLLOW)
-            xattrs = {}
-            for key in xa:
-                # Only store the user namespace on Linux
-                if linux and not key.startswith('user'):
-                    continue
-                xattrs[key] = xa[key]
+            xattrs = xattr.get_all(path, True)
             if xattrs:
-                item['xattrs'] = xattrs
-        except IOError:
+                item[b'xattrs'] = dict(xattrs)
+        except EnvironmentError:
             pass
         return item
 
     def process_item(self, path, st):
-        item = {'path': path.lstrip('/\\:')}
+        item = {b'path': path.lstrip('/\\:')}
         item.update(self.stat_attrs(st, path))
         self.add_item(item)
 
     def process_dev(self, path, st):
-        item = {'path': path.lstrip('/\\:'), 'rdev': st.st_rdev}
+        item = {b'path': path.lstrip('/\\:'), b'rdev': st.st_rdev}
         item.update(self.stat_attrs(st, path))
         self.add_item(item)
 
     def process_symlink(self, path, st):
         source = os.readlink(path)
-        item = {'path': path.lstrip('/\\:'), 'source': source}
+        item = {b'path': path.lstrip('/\\:'), b'source': source}
         item.update(self.stat_attrs(st, path))
         self.add_item(item)
 
@@ -381,12 +382,12 @@ class Archive(object):
             source = self.hard_links.get((st.st_ino, st.st_dev))
             if (st.st_ino, st.st_dev) in self.hard_links:
                 item = self.stat_attrs(st, path)
-                item.update({'path': safe_path, 'source': source})
+                item.update({b'path': safe_path, b'source': source})
                 self.add_item(item)
                 return
             else:
                 self.hard_links[st.st_ino, st.st_dev] = safe_path
-        path_hash = self.key.id_hash(path)
+        path_hash = self.key.id_hash(path.encode('utf-8', 'surrogateescape'))
         ids = cache.file_known_and_unchanged(path_hash, st)
         chunks = None
         if ids is not None:
@@ -404,7 +405,7 @@ class Archive(object):
                     chunks.append(cache.add_chunk(self.key.id_hash(chunk), chunk, self.stats))
             ids = [id for id, _, _ in chunks]
             cache.memorize_file(path_hash, st, ids)
-        item = {'path': safe_path, 'chunks': chunks}
+        item = {b'path': safe_path, b'chunks': chunks}
         item.update(self.stat_attrs(st, path))
         self.stats.nfiles += 1
         self.add_item(item)

+ 48 - 47
darc/archiver.py

@@ -1,4 +1,5 @@
 import argparse
+from binascii import hexlify
 from datetime import datetime
 from operator import attrgetter
 import os
@@ -11,7 +12,7 @@ from .cache import Cache
 from .key import key_creator
 from .helpers import location_validator, format_time, \
     format_file_mode, IncludePattern, ExcludePattern, exclude_path, adjust_patterns, to_localtime, \
-    get_cache_dir, format_timedelta, prune_split, Manifest, Location
+    get_cache_dir, format_timedelta, prune_split, Manifest, Location, remove_surrogates
 from .remote import StoreServer, RemoteStore
 
 
@@ -31,21 +32,21 @@ class Archiver(object):
     def print_error(self, msg, *args):
         msg = args and msg % args or msg
         self.exit_code = 1
-        print >> sys.stderr, 'darc: ' + msg
+        print('darc: ' + msg, file=sys.stderr)
 
     def print_verbose(self, msg, *args, **kw):
         if self.verbose:
             msg = args and msg % args or msg
             if kw.get('newline', True):
-                print msg
+                print(msg)
             else:
-                print msg,
+                print(msg, end=' ')
 
     def do_serve(self, args):
         return StoreServer().serve()
 
     def do_init(self, args):
-        print 'Initializing store "%s"' % args.store.orig
+        print('Initializing store "%s"' % args.store.orig)
         store = self.open_store(args.store, create=True)
         key = key_creator(store, args)
         manifest = Manifest()
@@ -87,7 +88,7 @@ class Archiver(object):
             if args.dontcross:
                 try:
                     restrict_dev = os.lstat(path).st_dev
-                except OSError, e:
+                except OSError as e:
                     self.print_error('%s: %s', path, e)
                     continue
             else:
@@ -97,14 +98,14 @@ class Archiver(object):
         if args.stats:
             t = datetime.now()
             diff = t - t0
-            print '-' * 40
-            print 'Archive name: %s' % args.archive.archive
-            print 'Archive fingerprint: %s' % archive.id.encode('hex')
-            print 'Start time: %s' % t0.strftime('%c')
-            print 'End time: %s' % t.strftime('%c')
-            print 'Duration: %s' % format_timedelta(diff)
+            print('-' * 40)
+            print('Archive name: %s' % args.archive.archive)
+            print('Archive fingerprint: %s' % hexlify(archive.id).decode('ascii'))
+            print('Start time: %s' % t0.strftime('%c'))
+            print('End time: %s' % t.strftime('%c'))
+            print('Duration: %s' % format_timedelta(diff))
             archive.stats.print_()
-            print '-' * 40
+            print('-' * 40)
         return self.exit_code
 
     def _process(self, archive, cache, patterns, skip_inodes, path, restrict_dev):
@@ -112,7 +113,7 @@ class Archiver(object):
             return
         try:
             st = os.lstat(path)
-        except OSError, e:
+        except OSError as e:
             self.print_error('%s: %s', path, e)
             return
         if (st.st_ino, st.st_dev) in skip_inodes:
@@ -123,17 +124,17 @@ class Archiver(object):
         # Ignore unix sockets
         if stat.S_ISSOCK(st.st_mode):
             return
-        self.print_verbose(path)
+        self.print_verbose(remove_surrogates(path))
         if stat.S_ISREG(st.st_mode):
             try:
                 archive.process_file(path, st, cache)
-            except IOError, e:
+            except IOError as e:
                 self.print_error('%s: %s', path, e)
         elif stat.S_ISDIR(st.st_mode):
             archive.process_item(path, st)
             try:
                 entries = os.listdir(path)
-            except OSError, e:
+            except OSError as e:
                 self.print_error('%s: %s', path, e)
             else:
                 for filename in sorted(entries):
@@ -154,18 +155,18 @@ class Archiver(object):
         archive = Archive(store, key, manifest, args.archive.archive,
                           numeric_owner=args.numeric_owner)
         dirs = []
-        for item, peek in archive.iter_items(lambda item: not exclude_path(item['path'], args.patterns)):
-            while dirs and not item['path'].startswith(dirs[-1]['path']):
+        for item, peek in archive.iter_items(lambda item: not exclude_path(item[b'path'], args.patterns)):
+            while dirs and not item[b'path'].startswith(dirs[-1][b'path']):
                 archive.extract_item(dirs.pop(-1), args.dest)
-            self.print_verbose(item['path'])
+            self.print_verbose(remove_surrogates(item[b'path']))
             try:
-                if stat.S_ISDIR(item['mode']):
+                if stat.S_ISDIR(item[b'mode']):
                     dirs.append(item)
                     archive.extract_item(item, args.dest, restore_attrs=False)
                 else:
                     archive.extract_item(item, args.dest, peek=peek)
-            except IOError, e:
-                self.print_error('%s: %s', item['path'], e)
+            except IOError as e:
+                self.print_error('%s: %s', remove_surrogates(item[b'path']), e)
 
         while dirs:
             archive.extract_item(dirs.pop(-1), args.dest)
@@ -183,32 +184,32 @@ class Archiver(object):
         store = self.open_store(args.src)
         manifest, key = Manifest.load(store)
         if args.src.archive:
-            tmap = {1: 'p', 2: 'c', 4: 'd', 6: 'b', 010: '-', 012: 'l', 014: 's'}
+            tmap = {1: 'p', 2: 'c', 4: 'd', 6: 'b', 0o10: '-', 0o12: 'l', 0o14: 's'}
             archive = Archive(store, key, manifest, args.src.archive)
             for item, _ in archive.iter_items():
-                type = tmap.get(item['mode'] / 4096, '?')
-                mode = format_file_mode(item['mode'])
+                type = tmap.get(item[b'mode'] // 4096, '?')
+                mode = format_file_mode(item[b'mode'])
                 size = 0
                 if type == '-':
                     try:
-                        size = sum(size for _, size, _ in item['chunks'])
+                        size = sum(size for _, size, _ in item[b'chunks'])
                     except KeyError:
                         pass
-                mtime = format_time(datetime.fromtimestamp(item['mtime']))
-                if 'source' in item:
+                mtime = format_time(datetime.fromtimestamp(item[b'mtime']))
+                if b'source' in item:
                     if type == 'l':
-                        extra = ' -> %s' % item['source']
+                        extra = ' -> %s' % item[b'source']
                     else:
                         type = 'h'
-                        extra = ' link to %s' % item['source']
+                        extra = ' link to %s' % item[b'source']
                 else:
                     extra = ''
-                print '%s%s %-6s %-6s %8d %s %s%s' % (type, mode, item['user'] or item['uid'],
-                                                  item['group'] or item['gid'], size, mtime,
-                                                  item['path'], extra)
+                print('%s%s %-6s %-6s %8d %s %s%s' % (type, mode, item[b'user'] or item[b'uid'],
+                                                  item[b'group'] or item[b'gid'], size, mtime,
+                                                  remove_surrogates(item[b'path']), extra))
         else:
             for archive in sorted(Archive.list_archives(store, key, manifest), key=attrgetter('ts')):
-                print '%-20s %s' % (archive.metadata['name'], to_localtime(archive.ts).strftime('%c'))
+                print('%-20s %s' % (archive.metadata[b'name'], to_localtime(archive.ts).strftime('%c')))
         return self.exit_code
 
     def do_verify(self, args):
@@ -217,16 +218,16 @@ class Archiver(object):
         archive = Archive(store, key, manifest, args.archive.archive)
 
         def start_cb(item):
-            self.print_verbose('%s ...', item['path'], newline=False)
+            self.print_verbose('%s ...', remove_surrogates(item[b'path']), newline=False)
 
         def result_cb(item, success):
             if success:
                 self.print_verbose('OK')
             else:
                 self.print_verbose('ERROR')
-                self.print_error('%s: verification failed' % item['path'])
-        for item, peek in archive.iter_items(lambda item: not exclude_path(item['path'], args.patterns)):
-            if stat.S_ISREG(item['mode']) and 'chunks' in item:
+                self.print_error('%s: verification failed' % remove_surrogates(item[b'path']))
+        for item, peek in archive.iter_items(lambda item: not exclude_path(item[b'path'], args.patterns)):
+            if stat.S_ISREG(item[b'mode']) and b'chunks' in item:
                 archive.verify_file(item, start_cb, result_cb, peek=peek)
         return self.exit_code
 
@@ -236,12 +237,12 @@ class Archiver(object):
         cache = Cache(store, key, manifest)
         archive = Archive(store, key, manifest, args.archive.archive, cache=cache)
         stats = archive.calc_stats(cache)
-        print 'Name:', archive.name
-        print 'Fingerprint: %s' % archive.id.encode('hex')
-        print 'Hostname:', archive.metadata['hostname']
-        print 'Username:', archive.metadata['username']
-        print 'Time:', to_localtime(archive.ts).strftime('%c')
-        print 'Command line:', ' '.join(archive.metadata['cmdline'])
+        print('Name:', archive.name)
+        print('Fingerprint: %s' % hexlify(archive.id).decode('ascii'))
+        print('Hostname:', archive.metadata[b'hostname'])
+        print('Username:', archive.metadata[b'username'])
+        print('Time: %s' % to_localtime(archive.ts).strftime('%c'))
+        print('Command line:', remove_surrogates(' '.join(archive.metadata[b'cmdline'])))
         stats.print_()
         return self.exit_code
 
@@ -419,10 +420,10 @@ def main():
     except Store.AlreadyExists:
         archiver.print_error('Error: Store already exists')
         exit_code = 1
-    except Archive.AlreadyExists, e:
+    except Archive.AlreadyExists as e:
         archiver.print_error('Error: Archive "%s" already exists', e)
         exit_code = 1
-    except Archive.DoesNotExist, e:
+    except Archive.DoesNotExist as e:
         archiver.print_error('Error: Archive "%s" does not exist', e)
         exit_code = 1
     except KeyboardInterrupt:

+ 20 - 19
darc/cache.py

@@ -1,12 +1,12 @@
-from __future__ import with_statement
-from ConfigParser import RawConfigParser
+from configparser import RawConfigParser
 import fcntl
-from itertools import izip_longest
+from itertools import zip_longest
 import msgpack
 import os
+from binascii import hexlify, unhexlify
 import shutil
 
-from .helpers import get_cache_dir
+from .helpers import get_cache_dir, decode_dict
 from .hashindex import ChunkIndex
 
 
@@ -19,7 +19,7 @@ class Cache(object):
         self.store = store
         self.key = key
         self.manifest = manifest
-        self.path = os.path.join(get_cache_dir(), store.id.encode('hex'))
+        self.path = os.path.join(get_cache_dir(), hexlify(store.id).decode('ascii'))
         if not os.path.exists(self.path):
             self.create()
         self.open()
@@ -31,17 +31,17 @@ class Cache(object):
         """Create a new empty store at `path`
         """
         os.makedirs(self.path)
-        with open(os.path.join(self.path, 'README'), 'wb') as fd:
+        with open(os.path.join(self.path, 'README'), 'w') as fd:
             fd.write('This is a DARC cache')
         config = RawConfigParser()
         config.add_section('cache')
         config.set('cache', 'version', '1')
-        config.set('cache', 'store', self.store.id.encode('hex'))
+        config.set('cache', 'store', hexlify(self.store.id).decode('ascii'))
         config.set('cache', 'manifest', '')
-        with open(os.path.join(self.path, 'config'), 'wb') as fd:
+        with open(os.path.join(self.path, 'config'), 'w') as fd:
             config.write(fd)
-        ChunkIndex.create(os.path.join(self.path, 'chunks'))
-        with open(os.path.join(self.path, 'files'), 'wb') as fd:
+        ChunkIndex.create(os.path.join(self.path, 'chunks').encode('utf-8'))
+        with open(os.path.join(self.path, 'files'), 'w') as fd:
             pass  # empty file
 
     def open(self):
@@ -55,8 +55,8 @@ class Cache(object):
         if self.config.getint('cache', 'version') != 1:
             raise Exception('%s Does not look like a darc cache')
         self.id = self.config.get('cache', 'store')
-        self.manifest_id = self.config.get('cache', 'manifest').decode('hex')
-        self.chunks = ChunkIndex(os.path.join(self.path, 'chunks'))
+        self.manifest_id = unhexlify(self.config.get('cache', 'manifest').encode('ascii'))  # .encode needed for Python 3.[0-2]
+        self.chunks = ChunkIndex(os.path.join(self.path, 'chunks').encode('utf-8'))
         self.files = None
 
     def _read_files(self):
@@ -91,12 +91,12 @@ class Cache(object):
             return
         if self.files is not None:
             with open(os.path.join(self.path, 'files'), 'wb') as fd:
-                for item in self.files.iteritems():
+                for item in self.files.items():
                     # Discard cached files with the newest mtime to avoid
                     # issues with filesystem snapshots and mtime precision
                     if item[1][0] < 10 and item[1][3] < self._newest_mtime:
                         msgpack.pack(item, fd)
-        self.config.set('cache', 'manifest', self.manifest.id.encode('hex'))
+        self.config.set('cache', 'manifest', hexlify(self.manifest.id).decode('ascii'))
         with open(os.path.join(self.path, 'config'), 'w') as fd:
             self.config.write(fd)
         self.chunks.flush()
@@ -130,23 +130,24 @@ class Cache(object):
             except KeyError:
                 self.chunks[id] = 1, size, csize
         self.begin_txn()
-        print 'Initializing cache...'
+        print('Initializing cache...')
         self.chunks.clear()
         unpacker = msgpack.Unpacker()
         for name, info in self.manifest.archives.items():
-            id = info['id']
+            id = info[b'id']
             cdata = self.store.get(id)
             data = self.key.decrypt(id, cdata)
             add(id, len(data), len(cdata))
             archive = msgpack.unpackb(data)
-            print 'Analyzing archive:', archive['name']
-            for id, chunk in izip_longest(archive['items'], self.store.get_many(archive['items'])):
+            decode_dict(archive, (b'name', b'hostname', b'username', b'time'))  # fixme: argv
+            print('Analyzing archive:', archive[b'name'])
+            for id, chunk in zip_longest(archive[b'items'], self.store.get_many(archive[b'items'])):
                 data = self.key.decrypt(id, chunk)
                 add(id, len(data), len(chunk))
                 unpacker.feed(data)
                 for item in unpacker:
                     try:
-                        for id, size, csize in item['chunks']:
+                        for id, size, csize in item[b'chunks']:
                             add(id, size, csize)
                     except KeyError:
                         pass

+ 2 - 1
darc/hashindex.pyx

@@ -27,7 +27,8 @@ cdef class IndexBase:
             raise Exception('Failed to open %s' % path)
 
     def __dealloc__(self):
-        hashindex_close(self.index)
+        if self.index:
+            hashindex_close(self.index)
 
     def clear(self):
         hashindex_clear(self.index)

+ 20 - 21
darc/helpers.py

@@ -1,4 +1,3 @@
-from __future__ import with_statement
 import argparse
 from datetime import datetime, timedelta
 from fnmatch import fnmatchcase
@@ -16,7 +15,7 @@ import urllib
 
 class Manifest(object):
 
-    MANIFEST_ID = '\0' * 32
+    MANIFEST_ID = b'\0' * 32
 
     def __init__(self):
         self.archives = {}
@@ -32,10 +31,10 @@ class Manifest(object):
         data = key.decrypt(None, cdata)
         manifest.id = key.id_hash(data)
         m = msgpack.unpackb(data)
-        if not m.get('version') == 1:
+        if not m.get(b'version') == 1:
             raise ValueError('Invalid manifest version')
-        manifest.archives = m['archives']
-        manifest.config = m['config']
+        manifest.archives = dict((k.decode('utf-8'), v) for k,v in m[b'archives'].items())
+        manifest.config = m[b'config']
         return manifest, key
 
     def write(self):
@@ -75,21 +74,10 @@ class Statistics(object):
             self.usize += csize
 
     def print_(self):
-        print 'Number of files: %d' % self.nfiles
-        print 'Original size: %d (%s)' % (self.osize, format_file_size(self.osize))
-        print 'Compressed size: %s (%s)' % (self.csize, format_file_size(self.csize))
-        print 'Unique data: %d (%s)' % (self.usize, format_file_size(self.usize))
-
-
-# OSX filenames are UTF-8 Only so any non-utf8 filenames are url encoded
-if sys.platform == 'darwin':
-    def encode_filename(name):
-        try:
-            return name.decode('utf-8')
-        except UnicodeDecodeError:
-            return urllib.quote(name)
-else:
-    encode_filename = str
+        print('Number of files: %d' % self.nfiles)
+        print('Original size: %d (%s)' % (self.osize, format_file_size(self.osize)))
+        print('Compressed size: %s (%s)' % (self.csize, format_file_size(self.csize)))
+        print('Unique data: %d (%s)' % (self.usize, format_file_size(self.usize)))
 
 
 def get_keys_dir():
@@ -212,7 +200,7 @@ def format_file_mode(mod):
     def x(v):
         return ''.join(v & m and s or '-'
                        for m, s in ((4, 'r'), (2, 'w'), (1, 'x')))
-    return '%s%s%s' % (x(mod / 64), x(mod / 8), x(mod))
+    return '%s%s%s' % (x(mod // 64), x(mod // 8), x(mod))
 
 
 def format_file_size(v):
@@ -377,3 +365,14 @@ def write_msgpack(filename, d):
         fd.flush()
         os.fsync(fd)
     os.rename(filename + '.tmp', filename)
+
+
+def decode_dict(d, keys, encoding='utf-8', errors='surrogateescape'):
+    for key in keys:
+        if isinstance(d.get(key), bytes):
+            d[key] = d[key].decode(encoding, errors)
+    return d
+
+
+def remove_surrogates(s, errors='replace'):
+    return s.encode('utf-8', errors).decode('utf-8')

+ 57 - 57
darc/key.py

@@ -1,4 +1,4 @@
-from __future__ import with_statement
+from binascii import hexlify, a2b_base64, b2a_base64
 from getpass import getpass
 import os
 import msgpack
@@ -16,11 +16,11 @@ from Crypto.Protocol.KDF import PBKDF2
 
 from .helpers import IntegrityError, get_keys_dir, Location
 
-PREFIX = '\0' * 8
+PREFIX = b'\0' * 8
 
-KEYFILE = '\0'
-PASSPHRASE = '\1'
-PLAINTEXT = '\2'
+KEYFILE = b'\0'
+PASSPHRASE = b'\1'
+PLAINTEXT = b'\2'
 
 
 def key_creator(store, args):
@@ -33,11 +33,11 @@ def key_creator(store, args):
 
 
 def key_factory(store, manifest_data):
-    if manifest_data[0] == KEYFILE:
+    if manifest_data[:1] == KEYFILE:
         return KeyfileKey.detect(store, manifest_data)
-    elif manifest_data[0] == PASSPHRASE:
+    elif manifest_data[:1] == PASSPHRASE:
         return PassphraseKey.detect(store, manifest_data)
-    elif manifest_data[0] == PLAINTEXT:
+    elif manifest_data[:1] == PLAINTEXT:
         return PlaintextKey.detect(store, manifest_data)
     else:
         raise Exception('Unkown Key type %d' % ord(manifest_data[0]))
@@ -67,7 +67,7 @@ class PlaintextKey(KeyBase):
 
     @classmethod
     def create(cls, store, args):
-        print 'Encryption NOT enabled.\nUse the --key-file or --passphrase options to enable encryption.'
+        print('Encryption NOT enabled.\nUse the --key-file or --passphrase options to enable encryption.')
         return cls()
 
     @classmethod
@@ -78,12 +78,12 @@ class PlaintextKey(KeyBase):
         return SHA256.new(data).digest()
 
     def encrypt(self, data):
-        return ''.join([self.TYPE, zlib.compress(data)])
+        return b''.join([self.TYPE, zlib.compress(data)])
 
     def decrypt(self, id, data):
-        if data[0] != self.TYPE:
+        if data[:1] != self.TYPE:
             raise IntegrityError('Invalid encryption envelope')
-        data = zlib.decompress(buffer(data, 1))
+        data = zlib.decompress(memoryview(data)[1:])
         if id and SHA256.new(data).digest() != id:
             raise IntegrityError('Chunk id verification failed')
         return data
@@ -99,26 +99,26 @@ class AESKeyBase(KeyBase):
     def encrypt(self, data):
         data = zlib.compress(data)
         nonce = long_to_bytes(self.counter.next_value(), 8)
-        data = ''.join((nonce, AES.new(self.enc_key, AES.MODE_CTR, '',
+        data = b''.join((nonce, AES.new(self.enc_key, AES.MODE_CTR, b'',
                                        counter=self.counter).encrypt(data)))
         hash = HMAC.new(self.enc_hmac_key, data, SHA256).digest()
-        return ''.join((self.TYPE, hash, data))
+        return b''.join((self.TYPE, hash, data))
 
     def decrypt(self, id, data):
-        if data[0] != self.TYPE:
+        if data[:1] != self.TYPE:
             raise IntegrityError('Invalid encryption envelope')
-        hash = buffer(data, 1, 32)
-        if buffer(HMAC.new(self.enc_hmac_key, buffer(data, 33), SHA256).digest()) != hash:
+        hash = memoryview(data)[1:33]
+        if memoryview(HMAC.new(self.enc_hmac_key, memoryview(data)[33:], SHA256).digest()) != hash:
             raise IntegrityError('Encryption envelope checksum mismatch')
-        nonce = bytes_to_long(buffer(data, 33, 8))
+        nonce = bytes_to_long(memoryview(data)[33:41])
         counter = Counter.new(64, initial_value=nonce, prefix=PREFIX)
-        data = zlib.decompress(AES.new(self.enc_key, AES.MODE_CTR, counter=counter).decrypt(buffer(data, 41)))
+        data = zlib.decompress(AES.new(self.enc_key, AES.MODE_CTR, counter=counter).decrypt(memoryview(data)[41:]))
         if id and HMAC.new(self.id_key, data, SHA256).digest() != id:
             raise IntegrityError('Chunk id verification failed')
         return data
 
     def extract_iv(self, payload):
-        if payload[0] != self.TYPE:
+        if payload[:1] != self.TYPE:
             raise IntegrityError('Invalid encryption envelope')
         nonce = bytes_to_long(payload[33:41])
         return nonce
@@ -149,14 +149,14 @@ class PassphraseKey(AESKeyBase):
         while passphrase != passphrase2:
             passphrase = getpass('Enter passphrase: ')
             if not passphrase:
-                print 'Passphrase must not be blank'
+                print('Passphrase must not be blank')
                 continue
             passphrase2 = getpass('Enter same passphrase again: ')
             if passphrase != passphrase2:
-                print 'Passphrases do not match'
+                print('Passphrases do not match')
         key.init(store, passphrase)
         if passphrase:
-            print 'Remember your passphrase. Your data will be inaccessible without it.'
+            print('Remember your passphrase. Your data will be inaccessible without it.')
         return key
 
     @classmethod
@@ -198,40 +198,40 @@ class KeyfileKey(AESKeyBase):
 
     @classmethod
     def find_key_file(cls, store):
-        id = store.id.encode('hex')
+        id = hexlify(store.id).decode('ascii')
         keys_dir = get_keys_dir()
         for name in os.listdir(keys_dir):
             filename = os.path.join(keys_dir, name)
-            with open(filename, 'rb') as fd:
+            with open(filename, 'r') as fd:
                 line = fd.readline().strip()
                 if line and line.startswith(cls.FILE_ID) and line[9:] == id:
                     return filename
         raise Exception('Key file for store with ID %s not found' % id)
 
     def load(self, filename, passphrase):
-        with open(filename, 'rb') as fd:
-            cdata = (''.join(fd.readlines()[1:])).decode('base64')
+        with open(filename, 'r') as fd:
+            cdata = a2b_base64(''.join(fd.readlines()[1:]).encode('ascii'))  # .encode needed for Python 3.[0-2]
         data = self.decrypt_key_file(cdata, passphrase)
         if data:
             key = msgpack.unpackb(data)
-            if key['version'] != 1:
+            if key[b'version'] != 1:
                 raise IntegrityError('Invalid key file header')
-            self.store_id = key['store_id']
-            self.enc_key = key['enc_key']
-            self.enc_hmac_key = key['enc_hmac_key']
-            self.id_key = key['id_key']
-            self.chunk_seed = key['chunk_seed']
+            self.store_id = key[b'store_id']
+            self.enc_key = key[b'enc_key']
+            self.enc_hmac_key = key[b'enc_hmac_key']
+            self.id_key = key[b'id_key']
+            self.chunk_seed = key[b'chunk_seed']
             self.counter = Counter.new(64, initial_value=1, prefix=PREFIX)
             self.path = filename
             return True
 
     def decrypt_key_file(self, data, passphrase):
         d = msgpack.unpackb(data)
-        assert d['version'] == 1
-        assert d['algorithm'] == 'SHA256'
-        key = PBKDF2(passphrase, d['salt'], 32, d['iterations'], SHA256_PDF)
-        data = AES.new(key, AES.MODE_CTR, counter=Counter.new(128)).decrypt(d['data'])
-        if HMAC.new(key, data, SHA256).digest() != d['hash']:
+        assert d[b'version'] == 1
+        assert d[b'algorithm'] == b'SHA256'
+        key = PBKDF2(passphrase, d[b'salt'], 32, d[b'iterations'], SHA256_PDF)
+        data = AES.new(key, AES.MODE_CTR, counter=Counter.new(128)).decrypt(d[b'data'])
+        if HMAC.new(key, data, SHA256).digest() != d[b'hash']:
             return None
         return data
 
@@ -261,9 +261,9 @@ class KeyfileKey(AESKeyBase):
             'chunk_seed': self.chunk_seed,
         }
         data = self.encrypt_key_file(msgpack.packb(key), passphrase)
-        with open(path, 'wb') as fd:
-            fd.write('%s %s\n' % (self.FILE_ID, self.store_id.encode('hex')))
-            fd.write(data.encode('base64'))
+        with open(path, 'w') as fd:
+            fd.write('%s %s\n' % (self.FILE_ID, hexlify(self.store_id).decode('ascii')))
+            fd.write(b2a_base64(data).decode('ascii'))
         self.path = path
 
     def change_passphrase(self):
@@ -272,9 +272,9 @@ class KeyfileKey(AESKeyBase):
             passphrase = getpass('New passphrase: ')
             passphrase2 = getpass('Enter same passphrase again: ')
             if passphrase != passphrase2:
-                print 'Passphrases do not match'
+                print('Passphrases do not match')
         self.save(self.path, passphrase)
-        print 'Key file "%s" updated' % self.path
+        print('Key file "%s" updated' % self.path)
 
     @classmethod
     def create(cls, store, args):
@@ -293,13 +293,13 @@ class KeyfileKey(AESKeyBase):
             passphrase = getpass('Enter passphrase (empty for no passphrase):')
             passphrase2 = getpass('Enter same passphrase again: ')
             if passphrase != passphrase2:
-                print 'Passphrases do not match'
+                print('Passphrases do not match')
         key = cls()
         key.store_id = store.id
         key.init_from_random_data(get_random_bytes(100))
         key.save(path, passphrase)
-        print 'Key file "%s" created.' % key.path
-        print 'Keep this file safe. Your data will be inaccessible without it.'
+        print('Key file "%s" created.' % key.path)
+        print('Keep this file safe. Your data will be inaccessible without it.')
         return key
 
 
@@ -317,7 +317,7 @@ class KeyTestCase(unittest.TestCase):
             orig = '/some/place'
 
         _location = _Location()
-        id = '\0' * 32
+        id = b'\0' * 32
 
     def setUp(self):
         self.tmpdir = tempfile.mkdtemp()
@@ -328,8 +328,8 @@ class KeyTestCase(unittest.TestCase):
 
     def test_plaintext(self):
         key = PlaintextKey.create(None, None)
-        data = 'foo'
-        self.assertEqual(key.id_hash(data).encode('hex'), '2c26b46b68ffc68ff99b453c1d30413413422d706483bfa0f98a5e886266e7ae')
+        data = b'foo'
+        self.assertEqual(hexlify(key.id_hash(data)), b'2c26b46b68ffc68ff99b453c1d30413413422d706483bfa0f98a5e886266e7ae')
         self.assertEqual(data, key.decrypt(key.id_hash(data), key.encrypt(data)))
 
     def test_keyfile(self):
@@ -338,25 +338,25 @@ class KeyTestCase(unittest.TestCase):
         os.environ['DARC_PASSPHRASE'] = 'test'
         key = KeyfileKey.create(self.MockStore(), MockArgs())
         self.assertEqual(bytes_to_long(key.counter()), 1)
-        manifest = key.encrypt('')
+        manifest = key.encrypt(b'')
         iv = key.extract_iv(manifest)
         key2 = KeyfileKey.detect(self.MockStore(), manifest)
         self.assertEqual(bytes_to_long(key2.counter()), iv + 1000)
         # Key data sanity check
         self.assertEqual(len(set([key2.id_key, key2.enc_key, key2.enc_hmac_key])), 3)
         self.assertEqual(key2.chunk_seed == 0, False)
-        data = 'foo'
+        data = b'foo'
         self.assertEqual(data, key2.decrypt(key.id_hash(data), key.encrypt(data)))
 
     def test_passphrase(self):
         os.environ['DARC_PASSPHRASE'] = 'test'
         key = PassphraseKey.create(self.MockStore(), None)
         self.assertEqual(bytes_to_long(key.counter()), 1)
-        self.assertEqual(key.id_key.encode('hex'), 'f28e915da78a972786da47fee6c4bd2960a421b9bdbdb35a7942eb82552e9a72')
-        self.assertEqual(key.enc_hmac_key.encode('hex'), '169c6082f209e524ea97e2c75318936f6e93c101b9345942a95491e9ae1738ca')
-        self.assertEqual(key.enc_key.encode('hex'), 'c05dd423843d4dd32a52e4dc07bb11acabe215917fc5cf3a3df6c92b47af79ba')
+        self.assertEqual(hexlify(key.id_key), b'f28e915da78a972786da47fee6c4bd2960a421b9bdbdb35a7942eb82552e9a72')
+        self.assertEqual(hexlify(key.enc_hmac_key), b'169c6082f209e524ea97e2c75318936f6e93c101b9345942a95491e9ae1738ca')
+        self.assertEqual(hexlify(key.enc_key), b'c05dd423843d4dd32a52e4dc07bb11acabe215917fc5cf3a3df6c92b47af79ba')
         self.assertEqual(key.chunk_seed, -324662077)
-        manifest = key.encrypt('')
+        manifest = key.encrypt(b'')
         iv = key.extract_iv(manifest)
         key2 = PassphraseKey.detect(self.MockStore(), manifest)
         self.assertEqual(bytes_to_long(key2.counter()), iv + 1000)
@@ -364,8 +364,8 @@ class KeyTestCase(unittest.TestCase):
         self.assertEqual(key.enc_hmac_key, key2.enc_hmac_key)
         self.assertEqual(key.enc_key, key2.enc_key)
         self.assertEqual(key.chunk_seed, key2.chunk_seed)
-        data = 'foo'
-        self.assertEqual(key.id_hash(data).encode('hex'), '016c27cd40dc8e84f196f3b43a9424e8472897e09f6935d0d3a82fb41664bad7')
+        data = b'foo'
+        self.assertEqual(hexlify(key.id_hash(data)), b'016c27cd40dc8e84f196f3b43a9424e8472897e09f6935d0d3a82fb41664bad7')
         self.assertEqual(data, key2.decrypt(key2.id_hash(data), key.encrypt(data)))
 
 

+ 1 - 2
darc/lrucache.py

@@ -1,4 +1,3 @@
-from UserDict import DictMixin
 from heapq import heappush, heapify, heapreplace, heappop
 import unittest
 
@@ -56,7 +55,7 @@ class LRUCacheTestCase(unittest.TestCase):
             c[x] = i
         self.assertEqual(len(c), 2)
         self.assertEqual(set(c), set(['b', 'c']))
-        self.assertEqual(set(c.iteritems()), set([('b', 1), ('c', 2)]))
+        self.assertEqual(set(c.items()), set([('b', 1), ('c', 2)]))
         self.assertEqual(False, 'a' in c)
         self.assertEqual(True, 'b' in c)
         self.assertRaises(KeyError, lambda: c['a'])

+ 16 - 14
darc/remote.py

@@ -1,4 +1,3 @@
-from __future__ import with_statement
 import fcntl
 import msgpack
 import os
@@ -35,16 +34,17 @@ class StoreServer(object):
                     return
                 unpacker.feed(data)
                 for type, msgid, method, args in unpacker:
+                    method = method.decode('ascii')
                     try:
                         try:
                             f = getattr(self, method)
                         except AttributeError:
                             f = getattr(self.store, method)
                         res = f(*args)
-                    except Exception, e:
-                        sys.stdout.write(msgpack.packb((1, msgid, e.__class__.__name__, None)))
+                    except Exception as e:
+                        sys.stdout.buffer.write(msgpack.packb((1, msgid, e.__class__.__name__, None)))
                     else:
-                        sys.stdout.write(msgpack.packb((1, msgid, None, res)))
+                        sys.stdout.buffer.write(msgpack.packb((1, msgid, None, res)))
                     sys.stdout.flush()
             if es:
                 return
@@ -53,6 +53,7 @@ class StoreServer(object):
         return 1
 
     def open(self, path, create=False):
+        path = os.fsdecode(path)
         if path.startswith('/~'):
             path = path[1:]
         self.store = Store(os.path.expanduser(path), create)
@@ -69,7 +70,7 @@ class RemoteStore(object):
     def __init__(self, location, create=False):
         self.p = None
         self.cache = LRUCache(256)
-        self.to_send = ''
+        self.to_send = b''
         self.extra = {}
         self.pending = {}
         self.unpacker = msgpack.Unpacker(use_list=False)
@@ -89,10 +90,10 @@ class RemoteStore(object):
             raise Exception('Server insisted on using unsupported protocol version %d' % version)
         try:
             self.id = self.call('open', (location.path, create))
-        except self.RPCError, e:
-            if e.name == 'DoesNotExist':
+        except self.RPCError as e:
+            if e.name == b'DoesNotExist':
                 raise Store.DoesNotExist
-            elif e.name == 'AlreadyExists':
+            elif e.name == b'AlreadyExists':
                 raise Store.AlreadyExists
 
     def __del__(self):
@@ -127,7 +128,7 @@ class RemoteStore(object):
                 if to_send:
                     n = os.write(self.stdin_fd, to_send)
                     assert n > 0
-                    to_send = buffer(to_send, n)
+                    to_send = memoryview(to_send)[n:]
                 else:
                     w_fds = []
 
@@ -167,7 +168,7 @@ class RemoteStore(object):
                 msgid, resp, error = self.cache[args]
                 m = max(m, msgid)
                 self.extra.setdefault(m, []).append((args, resp, error))
-        return ''.join(data)
+        return b''.join(data)
 
     def gen_cache_requests(self, cmd, peek):
         data = []
@@ -183,7 +184,7 @@ class RemoteStore(object):
             self.pending[msgid] = args
             self.cache[args] = msgid, None, None
             data.append(msgpack.packb((1, msgid, cmd, args)))
-        return ''.join(data)
+        return b''.join(data)
 
     def call_multi(self, cmd, argsv, wait=True, peek=None):
         w_fds = [self.stdin_fd]
@@ -212,7 +213,8 @@ class RemoteStore(object):
                 if self.to_send:
                     n = os.write(self.stdin_fd, self.to_send)
                     assert n > 0
-                    self.to_send = buffer(self.to_send, n)
+#                    self.to_send = memoryview(self.to_send)[n:]
+                    self.to_send = self.to_send[n:]
                 else:
                     w_fds = []
                     if not wait:
@@ -231,8 +233,8 @@ class RemoteStore(object):
         try:
             for res in self.call_multi('get', [(id, )]):
                 return res
-        except self.RPCError, e:
-            if e.name == 'DoesNotExist':
+        except self.RPCError as e:
+            if e.name == b'DoesNotExist':
                 raise Store.DoesNotExist
             raise
 

+ 57 - 51
darc/store.py

@@ -1,5 +1,5 @@
-from __future__ import with_statement
-from ConfigParser import RawConfigParser
+from configparser import RawConfigParser
+from binascii import hexlify, unhexlify
 import fcntl
 import os
 import re
@@ -40,6 +40,7 @@ class Store(object):
         """Requested key does not exist"""
 
     def __init__(self, path, create=False):
+        self.io = None
         if create:
             self.create(path)
         self.open(path)
@@ -51,7 +52,7 @@ class Store(object):
             raise self.AlreadyExists(path)
         if not os.path.exists(path):
             os.mkdir(path)
-        with open(os.path.join(path, 'README'), 'wb') as fd:
+        with open(os.path.join(path, 'README'), 'w') as fd:
             fd.write('This is a DARC store')
         os.mkdir(os.path.join(path, 'data'))
         config = RawConfigParser()
@@ -59,7 +60,7 @@ class Store(object):
         config.set('store', 'version', '1')
         config.set('store', 'segments_per_dir', self.DEFAULT_SEGMENTS_PER_DIR)
         config.set('store', 'max_segment_size', self.DEFAULT_MAX_SEGMENT_SIZE)
-        config.set('store', 'id', os.urandom(32).encode('hex'))
+        config.set('store', 'id', hexlify(os.urandom(32)).decode('ascii'))
         with open(os.path.join(path, 'config'), 'w') as fd:
             config.write(fd)
 
@@ -76,10 +77,11 @@ class Store(object):
             raise Exception('%s Does not look like a darc store')
         self.max_segment_size = self.config.getint('store', 'max_segment_size')
         self.segments_per_dir = self.config.getint('store', 'segments_per_dir')
-        self.id = self.config.get('store', 'id').decode('hex')
+        self.id = unhexlify(self.config.get('store', 'id').strip().encode('ascii'))  # .encode needed for Python 3.[0-2]
         self.rollback()
 
     def close(self):
+        self.rollback()
         self.lock_fd.close()
 
     def commit(self, rollback=True):
@@ -97,26 +99,26 @@ class Store(object):
 
     def open_index(self, head, read_only=False):
         if head is None:
-            self.index = NSIndex.create(os.path.join(self.path, 'index.tmp'))
+            self.index = NSIndex.create(os.path.join(self.path, 'index.tmp').encode('utf-8'))
             self.segments = {}
             self.compact = set()
         else:
             if read_only:
-                self.index = NSIndex(os.path.join(self.path, 'index.%d') % head)
+                self.index = NSIndex((os.path.join(self.path, 'index.%d') % head).encode('utf-8'))
             else:
                 shutil.copy(os.path.join(self.path, 'index.%d' % head),
                             os.path.join(self.path, 'index.tmp'))
-                self.index = NSIndex(os.path.join(self.path, 'index.tmp'))
+                self.index = NSIndex(os.path.join(self.path, 'index.tmp').encode('utf-8'))
             hints = read_msgpack(os.path.join(self.path, 'hints.%d' % head))
-            if hints['version'] != 1:
+            if hints[b'version'] != 1:
                 raise ValueError('Unknown hints file version: %d' % hints['version'])
-            self.segments = hints['segments']
-            self.compact = set(hints['compact'])
+            self.segments = hints[b'segments']
+            self.compact = set(hints[b'compact'])
 
     def write_index(self):
-        hints = {'version': 1,
-                 'segments': self.segments,
-                 'compact': list(self.compact)}
+        hints = {b'version': 1,
+                 b'segments': self.segments,
+                 b'compact': list(self.compact)}
         write_msgpack(os.path.join(self.path, 'hints.%d' % self.io.head), hints)
         self.index.flush()
         os.rename(os.path.join(self.path, 'index.tmp'),
@@ -192,6 +194,8 @@ class Store(object):
         """
         """
         self._active_txn = False
+        if self.io:
+            self.io.close()
         self.io = LoggedIO(self.path, self.max_segment_size, self.segments_per_dir)
         if self.io.head is not None and not os.path.exists(os.path.join(self.path, 'index.%d' % self.io.head)):
             self.recover(self.path)
@@ -273,15 +277,15 @@ class LoggedIO(object):
         self.cleanup()
 
     def close(self):
-        for segment in self.fds.keys():
+        for segment in list(self.fds.keys()):
             self.fds.pop(segment).close()
         self.close_segment()
         self.fds = None  # Just to make sure we're disabled
 
     def _segment_names(self, reverse=False):
         for dirpath, dirs, filenames in os.walk(os.path.join(self.path, 'data')):
-            dirs.sort(lambda a, b: cmp(int(a), int(b)), reverse=reverse)
-            filenames.sort(lambda a, b: cmp(int(a), int(b)), reverse=reverse)
+            dirs.sort(key=int, reverse=reverse)
+            filenames.sort(key=int, reverse=reverse)
             for filename in filenames:
                 yield int(filename), os.path.join(dirpath, filename)
 
@@ -304,18 +308,18 @@ class LoggedIO(object):
             return fd.read(self.header_fmt.size) == self.COMMIT
 
     def segment_filename(self, segment):
-        return os.path.join(self.path, 'data', str(segment / self.segments_per_dir), str(segment))
+        return os.path.join(self.path, 'data', str(segment // self.segments_per_dir), str(segment))
 
     def get_write_fd(self, no_new=False):
         if not no_new and self.offset and self.offset > self.limit:
             self.close_segment()
         if not self._write_fd:
             if self.segment % self.segments_per_dir == 0:
-                dirname = os.path.join(self.path, 'data', str(self.segment / self.segments_per_dir))
+                dirname = os.path.join(self.path, 'data', str(self.segment // self.segments_per_dir))
                 if not os.path.exists(dirname):
                     os.mkdir(dirname)
             self._write_fd = open(self.segment_filename(self.segment), 'ab')
-            self._write_fd.write('DSEGMENT')
+            self._write_fd.write(b'DSEGMENT')
             self.offset = 8
         return self._write_fd
 
@@ -336,7 +340,7 @@ class LoggedIO(object):
     def iter_objects(self, segment, lookup=None, include_data=False):
         fd = self.get_fd(segment)
         fd.seek(0)
-        if fd.read(8) != 'DSEGMENT':
+        if fd.read(8) != b'DSEGMENT':
             raise IntegrityError('Invalid segment header')
         offset = 8
         header = fd.read(self.header_fmt.size)
@@ -345,7 +349,7 @@ class LoggedIO(object):
             if size > MAX_OBJECT_SIZE:
                 raise IntegrityError('Invalid segment object size')
             rest = fd.read(size - self.header_fmt.size)
-            if crc32(rest, crc32(buffer(header, 4))) & 0xffffffff != crc:
+            if crc32(rest, crc32(memoryview(header)[4:])) & 0xffffffff != crc:
                 raise IntegrityError('Segment checksum mismatch')
             if tag not in (TAG_PUT, TAG_DELETE, TAG_COMMIT):
                 raise IntegrityError('Invalid segment entry header')
@@ -370,7 +374,7 @@ class LoggedIO(object):
         if size > MAX_OBJECT_SIZE:
             raise IntegrityError('Invalid segment object size')
         data = fd.read(size - self.put_header_fmt.size)
-        if crc32(data, crc32(buffer(header, 4))) & 0xffffffff != crc:
+        if crc32(data, crc32(memoryview(header)[4:])) & 0xffffffff != crc:
             raise IntegrityError('Segment checksum mismatch')
         if tag != TAG_PUT or id != key:
             raise IntegrityError('Invalid segment entry header')
@@ -382,7 +386,7 @@ class LoggedIO(object):
         offset = self.offset
         header = self.header_no_crc_fmt.pack(size, TAG_PUT)
         crc = self.crc_fmt.pack(crc32(data, crc32(id, crc32(header))) & 0xffffffff)
-        fd.write(''.join((crc, header, id, data)))
+        fd.write(b''.join((crc, header, id, data)))
         self.offset += size
         return self.segment, offset
 
@@ -390,7 +394,7 @@ class LoggedIO(object):
         fd = self.get_write_fd()
         header = self.header_no_crc_fmt.pack(self.put_header_fmt.size, TAG_DELETE)
         crc = self.crc_fmt.pack(crc32(id, crc32(header)) & 0xffffffff)
-        fd.write(''.join((crc, header, id)))
+        fd.write(b''.join((crc, header, id)))
         self.offset += self.put_header_fmt.size
         return self.segment
 
@@ -398,7 +402,7 @@ class LoggedIO(object):
         fd = self.get_write_fd(no_new=True)
         header = self.header_no_crc_fmt.pack(self.header_fmt.size, TAG_COMMIT)
         crc = self.crc_fmt.pack(crc32(header) & 0xffffffff)
-        fd.write(''.join((crc, header)))
+        fd.write(b''.join((crc, header)))
         self.head = self.segment
         self.close_segment()
 
@@ -421,13 +425,14 @@ class StoreTestCase(unittest.TestCase):
         self.store = self.open(create=True)
 
     def tearDown(self):
+        self.store.close()
         shutil.rmtree(self.tmppath)
 
     def test1(self):
         for x in range(100):
-            self.store.put('%-32d' % x, 'SOMEDATA')
-        key50 = '%-32d' % 50
-        self.assertEqual(self.store.get(key50), 'SOMEDATA')
+            self.store.put(('%-32d' % x).encode('ascii'), b'SOMEDATA')
+        key50 = ('%-32d' % 50).encode('ascii')
+        self.assertEqual(self.store.get(key50), b'SOMEDATA')
         self.store.delete(key50)
         self.assertRaises(Store.DoesNotExist, lambda: self.store.get(key50))
         self.store.commit()
@@ -437,55 +442,56 @@ class StoreTestCase(unittest.TestCase):
         for x in range(100):
             if x == 50:
                 continue
-            self.assertEqual(store2.get('%-32d' % x), 'SOMEDATA')
+            self.assertEqual(store2.get(('%-32d' % x).encode('ascii')), b'SOMEDATA')
+        store2.close()
 
     def test2(self):
         """Test multiple sequential transactions
         """
-        self.store.put('00000000000000000000000000000000', 'foo')
-        self.store.put('00000000000000000000000000000001', 'foo')
+        self.store.put(b'00000000000000000000000000000000', b'foo')
+        self.store.put(b'00000000000000000000000000000001', b'foo')
         self.store.commit()
-        self.store.delete('00000000000000000000000000000000')
-        self.store.put('00000000000000000000000000000001', 'bar')
+        self.store.delete(b'00000000000000000000000000000000')
+        self.store.put(b'00000000000000000000000000000001', b'bar')
         self.store.commit()
-        self.assertEqual(self.store.get('00000000000000000000000000000001'), 'bar')
+        self.assertEqual(self.store.get(b'00000000000000000000000000000001'), b'bar')
 
     def test_consistency(self):
         """Test cache consistency
         """
-        self.store.put('00000000000000000000000000000000', 'foo')
-        self.assertEqual(self.store.get('00000000000000000000000000000000'), 'foo')
-        self.store.put('00000000000000000000000000000000', 'foo2')
-        self.assertEqual(self.store.get('00000000000000000000000000000000'), 'foo2')
-        self.store.put('00000000000000000000000000000000', 'bar')
-        self.assertEqual(self.store.get('00000000000000000000000000000000'), 'bar')
-        self.store.delete('00000000000000000000000000000000')
-        self.assertRaises(Store.DoesNotExist, lambda: self.store.get('00000000000000000000000000000000'))
+        self.store.put(b'00000000000000000000000000000000', b'foo')
+        self.assertEqual(self.store.get(b'00000000000000000000000000000000'), b'foo')
+        self.store.put(b'00000000000000000000000000000000', b'foo2')
+        self.assertEqual(self.store.get(b'00000000000000000000000000000000'), b'foo2')
+        self.store.put(b'00000000000000000000000000000000', b'bar')
+        self.assertEqual(self.store.get(b'00000000000000000000000000000000'), b'bar')
+        self.store.delete(b'00000000000000000000000000000000')
+        self.assertRaises(Store.DoesNotExist, lambda: self.store.get(b'00000000000000000000000000000000'))
 
     def test_consistency2(self):
         """Test cache consistency2
         """
-        self.store.put('00000000000000000000000000000000', 'foo')
-        self.assertEqual(self.store.get('00000000000000000000000000000000'), 'foo')
+        self.store.put(b'00000000000000000000000000000000', b'foo')
+        self.assertEqual(self.store.get(b'00000000000000000000000000000000'), b'foo')
         self.store.commit()
-        self.store.put('00000000000000000000000000000000', 'foo2')
-        self.assertEqual(self.store.get('00000000000000000000000000000000'), 'foo2')
+        self.store.put(b'00000000000000000000000000000000', b'foo2')
+        self.assertEqual(self.store.get(b'00000000000000000000000000000000'), b'foo2')
         self.store.rollback()
-        self.assertEqual(self.store.get('00000000000000000000000000000000'), 'foo')
+        self.assertEqual(self.store.get(b'00000000000000000000000000000000'), b'foo')
 
     def test_single_kind_transactions(self):
         # put
-        self.store.put('00000000000000000000000000000000', 'foo')
+        self.store.put(b'00000000000000000000000000000000', b'foo')
         self.store.commit()
         self.store.close()
         # replace
         self.store = self.open()
-        self.store.put('00000000000000000000000000000000', 'bar')
+        self.store.put(b'00000000000000000000000000000000', b'bar')
         self.store.commit()
         self.store.close()
         # delete
         self.store = self.open()
-        self.store.delete('00000000000000000000000000000000')
+        self.store.delete(b'00000000000000000000000000000000')
         self.store.commit()
 
 

+ 31 - 33
darc/test.py

@@ -1,14 +1,13 @@
-from __future__ import with_statement
 import doctest
 import filecmp
 import os
-from StringIO import StringIO
+from io import BytesIO, StringIO
 import stat
 import sys
 import shutil
 import tempfile
 import unittest
-from xattr import xattr, XATTR_NOFOLLOW
+import xattr
 
 from . import helpers, lrucache
 from .chunker import chunkify, buzhash, buzhash_update
@@ -17,6 +16,8 @@ from .key import suite as KeySuite
 from .store import Store, suite as StoreSuite
 from .remote import Store, suite as RemoteStoreSuite
 
+utime_supports_fd = os.utime in getattr(os, 'supports_fd', {})
+
 
 class Test(unittest.TestCase):
 
@@ -52,7 +53,7 @@ class Test(unittest.TestCase):
             ret = self.archiver.run(args)
             sys.stdout, sys.stderr = stdout, stderr
             if ret != exit_code:
-                print output.getvalue()
+                print(output.getvalue())
             self.assertEqual(exit_code, ret)
             return output.getvalue()
         finally:
@@ -67,13 +68,13 @@ class Test(unittest.TestCase):
         filename = os.path.join(self.input_path, name)
         if not os.path.exists(os.path.dirname(filename)):
             os.makedirs(os.path.dirname(filename))
-        with open(filename, 'wbx') as fd:
-            fd.write('X' * size)
+        with open(filename, 'wb') as fd:
+            fd.write(b'X' * size)
 
     def get_xattrs(self, path):
         try:
-            return dict(xattr(path, XATTR_NOFOLLOW))
-        except IOError:
+            return xattr.get_all(path, True)
+        except EnvironmentError:
             return {}
 
     def diff_dirs(self, dir1, dir2):
@@ -87,8 +88,7 @@ class Test(unittest.TestCase):
             s1 = os.lstat(path1)
             s2 = os.lstat(path2)
             attrs = ['st_mode', 'st_uid', 'st_gid', 'st_rdev']
-            # We can't restore symlink atime/mtime right now
-            if not os.path.islink(path1):
+            if not os.path.islink(path1) or utime_supports_fd:
                 attrs.append('st_mtime')
             d1 = [filename] + [getattr(s1, a) for a in attrs]
             d2 = [filename] + [getattr(s2, a) for a in attrs]
@@ -107,15 +107,13 @@ class Test(unittest.TestCase):
         # File owner
         os.chown('input/file1', 100, 200)
         # File mode
-        os.chmod('input/file1', 7755)
-        os.chmod('input/dir2', 0700)
+        os.chmod('input/file1', 0o7755)
+        os.chmod('input/dir2', 0o700)
         # Block device
-        os.mknod('input/bdev', 0600 | stat.S_IFBLK,  os.makedev(10, 20))
+        os.mknod('input/bdev', 0o600 | stat.S_IFBLK,  os.makedev(10, 20))
         # Char device
-        os.mknod('input/cdev', 0600 | stat.S_IFCHR,  os.makedev(30, 40))
-        # xattr
-        x = xattr(os.path.join(self.input_path, 'file1'))
-        x.set('user.foo', 'bar')
+        os.mknod('input/cdev', 0o600 | stat.S_IFCHR,  os.makedev(30, 40))
+        xattr.set(os.path.join(self.input_path, 'file1'), 'user.foo', 'bar')
         # Hard link
         os.link(os.path.join(self.input_path, 'file1'),
                 os.path.join(self.input_path, 'hardlink'))
@@ -193,25 +191,25 @@ class Test(unittest.TestCase):
 class ChunkTest(unittest.TestCase):
 
     def test_chunkify(self):
-        data = '0' * 1024 * 1024 * 15 + 'Y'
-        parts = [str(c) for c in chunkify(StringIO(data), 2, 0x3, 2, 0)]
+        data = b'0' * 1024 * 1024 * 15 + b'Y'
+        parts = [bytes(c) for c in chunkify(BytesIO(data), 2, 0x3, 2, 0)]
         self.assertEqual(len(parts), 2)
-        self.assertEqual(''.join(parts), data)
-        self.assertEqual([str(c) for c in chunkify(StringIO(''), 2, 0x3, 2, 0)], [])
-        self.assertEqual([str(c) for c in chunkify(StringIO('foobarboobaz' * 3), 2, 0x3, 2, 0)], ['fooba', 'rboobaz', 'fooba', 'rboobaz', 'fooba', 'rboobaz'])
-        self.assertEqual([str(c) for c in chunkify(StringIO('foobarboobaz' * 3), 2, 0x3, 2, 1)], ['fo', 'obarb', 'oob', 'azf', 'oobarb', 'oob', 'azf', 'oobarb', 'oobaz'])
-        self.assertEqual([str(c) for c in chunkify(StringIO('foobarboobaz' * 3), 2, 0x3, 2, 2)], ['foob', 'ar', 'boobazfoob', 'ar', 'boobazfoob', 'ar', 'boobaz'])
-        self.assertEqual([str(c) for c in chunkify(StringIO('foobarboobaz' * 3), 3, 0x3, 3, 0)], ['foobarboobaz' * 3])
-        self.assertEqual([str(c) for c in chunkify(StringIO('foobarboobaz' * 3), 3, 0x3, 3, 1)], ['foobar', 'boo', 'bazfo', 'obar', 'boo', 'bazfo', 'obar', 'boobaz'])
-        self.assertEqual([str(c) for c in chunkify(StringIO('foobarboobaz' * 3), 3, 0x3, 3, 2)], ['foo', 'barboobaz', 'foo', 'barboobaz', 'foo', 'barboobaz'])
-        self.assertEqual([str(c) for c in chunkify(StringIO('foobarboobaz' * 3), 3, 0x3, 4, 0)], ['foobarboobaz' * 3])
-        self.assertEqual([str(c) for c in chunkify(StringIO('foobarboobaz' * 3), 3, 0x3, 4, 1)], ['foobar', 'boobazfo', 'obar', 'boobazfo', 'obar', 'boobaz'])
-        self.assertEqual([str(c) for c in chunkify(StringIO('foobarboobaz' * 3), 3, 0x3, 4, 2)], ['foob', 'arboobaz', 'foob', 'arboobaz', 'foob', 'arboobaz'])
+        self.assertEqual(b''.join(parts), data)
+        self.assertEqual([bytes(c) for c in chunkify(BytesIO(b''), 2, 0x3, 2, 0)], [])
+        self.assertEqual([bytes(c) for c in chunkify(BytesIO(b'foobarboobaz' * 3), 2, 0x3, 2, 0)], [b'fooba', b'rboobaz', b'fooba', b'rboobaz', b'fooba', b'rboobaz'])
+        self.assertEqual([bytes(c) for c in chunkify(BytesIO(b'foobarboobaz' * 3), 2, 0x3, 2, 1)], [b'fo', b'obarb', b'oob', b'azf', b'oobarb', b'oob', b'azf', b'oobarb', b'oobaz'])
+        self.assertEqual([bytes(c) for c in chunkify(BytesIO(b'foobarboobaz' * 3), 2, 0x3, 2, 2)], [b'foob', b'ar', b'boobazfoob', b'ar', b'boobazfoob', b'ar', b'boobaz'])
+        self.assertEqual([bytes(c) for c in chunkify(BytesIO(b'foobarboobaz' * 3), 3, 0x3, 3, 0)], [b'foobarboobaz' * 3])
+        self.assertEqual([bytes(c) for c in chunkify(BytesIO(b'foobarboobaz' * 3), 3, 0x3, 3, 1)], [b'foobar', b'boo', b'bazfo', b'obar', b'boo', b'bazfo', b'obar', b'boobaz'])
+        self.assertEqual([bytes(c) for c in chunkify(BytesIO(b'foobarboobaz' * 3), 3, 0x3, 3, 2)], [b'foo', b'barboobaz', b'foo', b'barboobaz', b'foo', b'barboobaz'])
+        self.assertEqual([bytes(c) for c in chunkify(BytesIO(b'foobarboobaz' * 3), 3, 0x3, 4, 0)], [b'foobarboobaz' * 3])
+        self.assertEqual([bytes(c) for c in chunkify(BytesIO(b'foobarboobaz' * 3), 3, 0x3, 4, 1)], [b'foobar', b'boobazfo', b'obar', b'boobazfo', b'obar', b'boobaz'])
+        self.assertEqual([bytes(c) for c in chunkify(BytesIO(b'foobarboobaz' * 3), 3, 0x3, 4, 2)], [b'foob', b'arboobaz', b'foob', b'arboobaz', b'foob', b'arboobaz'])
 
     def test_buzhash(self):
-        self.assertEqual(buzhash('abcdefghijklmnop', 0), 3795437769L)
-        self.assertEqual(buzhash('abcdefghijklmnop', 1), 3795400502L)
-        self.assertEqual(buzhash('abcdefghijklmnop', 1), buzhash_update(buzhash('Xabcdefghijklmno', 1), ord('X'), ord('p'), 16, 1))
+        self.assertEqual(buzhash(b'abcdefghijklmnop', 0), 3795437769)
+        self.assertEqual(buzhash(b'abcdefghijklmnop', 1), 3795400502)
+        self.assertEqual(buzhash(b'abcdefghijklmnop', 1), buzhash_update(buzhash(b'Xabcdefghijklmno', 1), ord('X'), ord('p'), 16, 1))
 
 
 class RemoteTest(Test):

+ 2 - 7
setup.py

@@ -5,13 +5,9 @@ import sys
 from glob import glob
 import darc
 
-min_python = (2, 5)
+min_python = (3, 2)
 if sys.version_info < min_python:
-    print "Darc requires Python %d.%d or later" % min_python
-    sys.exit(1)
-
-if sys.version_info >= (3,):
-    print "Darc doesn't support Python 3 (yet)"
+    print("Darc requires Python %d.%d or later" % min_python)
     sys.exit(1)
 
 try:
@@ -31,7 +27,6 @@ try:
     class Sdist(sdist):
         def __init__(self, *args, **kwargs):
             for src in glob('darc/*.pyx'):
-                print 'src', src
                 cython_compiler.compile(glob('darc/*.pyx'),
                                         cython_compiler.default_options)
             sdist.__init__(self, *args, **kwargs)