فهرست منبع

Improved archive metadata deduplication.

Jonas Borgström 11 سال پیش
والد
کامیت
c394a31d62
4فایلهای تغییر یافته به همراه23 افزوده شده و 5 حذف شده
  1. 1 0
      CHANGES
  2. 4 4
      attic/archive.py
  3. 6 0
      attic/helpers.py
  4. 12 1
      attic/testsuite/helpers.py

+ 1 - 0
CHANGES

@@ -12,6 +12,7 @@ Version 0.11
 - Documentation improvements
 - Fix exception during "attic create" with repeated files (#39)
 - New "--exclude-from" option for attic create/extract/verify.
+- Improved archive metadata deduplication.
 
 Version 0.10
 ------------

+ 4 - 4
attic/archive.py

@@ -16,7 +16,7 @@ from attic import xattr
 from attic.chunker import chunkify
 from attic.hashindex import ChunkIndex
 from attic.helpers import Error, uid2user, user2uid, gid2group, group2gid, \
-    Manifest, Statistics, decode_dict, st_mtime_ns, make_path_safe
+    Manifest, Statistics, decode_dict, st_mtime_ns, make_path_safe, StableDict
 
 ITEMS_BUFFER = 1024 * 1024
 CHUNK_MIN = 1024
@@ -63,7 +63,7 @@ class ChunkBuffer:
         self.key = key
 
     def add(self, item):
-        self.buffer.write(self.packer.pack(item))
+        self.buffer.write(self.packer.pack(StableDict(item)))
         if self.is_full():
             self.flush()
 
@@ -348,7 +348,7 @@ class Archive:
             item[b'user'] = item[b'group'] = None
         xattrs = xattr.get_all(path, follow_symlinks=False)
         if xattrs:
-            item[b'xattrs'] = xattrs
+            item[b'xattrs'] = StableDict(xattrs)
         return item
 
     def process_item(self, path, st):
@@ -549,7 +549,7 @@ class ArchiveChecker:
 
             for state, items in groupby(archive[b'items'], missing_chunk_detector):
                 if state != prev_state:
-                    unpacker = msgpack.Unpacker()
+                    unpacker = msgpack.Unpacker(object_hook=StableDict)
                     prev_state = state
                 if state % 2:
                     self.report_progress('Archive metadata damage detected', error=True)

+ 6 - 0
attic/helpers.py

@@ -469,6 +469,12 @@ def daemonize():
     os.dup2(fd, 2)
 
 
+class StableDict(dict):
+    """A dict subclass with stable items() ordering"""
+    def items(self):
+        return sorted(super(StableDict, self).items())
+
+
 if sys.version < '3.3':
     # st_mtime_ns attribute only available in 3.3+
     def st_mtime_ns(st):

+ 12 - 1
attic/testsuite/helpers.py

@@ -1,10 +1,13 @@
+import hashlib
 from time import mktime, strptime
 from datetime import datetime, timezone, timedelta
 import os
 import tempfile
 import unittest
-from attic.helpers import adjust_patterns, exclude_path, Location, format_timedelta, IncludePattern, ExcludePattern, make_path_safe, UpgradableLock, prune_within, prune_split, to_localtime
+from attic.helpers import adjust_patterns, exclude_path, Location, format_timedelta, IncludePattern, ExcludePattern, make_path_safe, UpgradableLock, prune_within, prune_split, to_localtime, \
+    StableDict
 from attic.testsuite import AtticTestCase
+import msgpack
 
 
 class LocationTestCase(AtticTestCase):
@@ -176,3 +179,11 @@ class PruneWithinTestCase(AtticTestCase):
         dotest(test_archives, '1w',  [0, 1, 2, 3, 4, 5])
         dotest(test_archives, '1m',  [0, 1, 2, 3, 4, 5])
         dotest(test_archives, '1y',  [0, 1, 2, 3, 4, 5])
+
+
+class StableDictTestCase(AtticTestCase):
+
+    def test(self):
+        d = StableDict(foo=1, bar=2, boo=3, baz=4)
+        self.assert_equal(list(d.items()), [('bar', 2), ('baz', 4), ('boo', 3), ('foo', 1)])
+        self.assert_equal(hashlib.md5(msgpack.packb(d)).hexdigest(), 'fc78df42cd60691b3ac3dd2a2b39903f')