2
0
Эх сурвалжийг харах

Improved archive metadata deduplication.

Jonas Borgström 11 жил өмнө
parent
commit
c394a31d62

+ 1 - 0
CHANGES

@@ -12,6 +12,7 @@ Version 0.11
 - Documentation improvements
 - Documentation improvements
 - Fix exception during "attic create" with repeated files (#39)
 - Fix exception during "attic create" with repeated files (#39)
 - New "--exclude-from" option for attic create/extract/verify.
 - New "--exclude-from" option for attic create/extract/verify.
+- Improved archive metadata deduplication.
 
 
 Version 0.10
 Version 0.10
 ------------
 ------------

+ 4 - 4
attic/archive.py

@@ -16,7 +16,7 @@ from attic import xattr
 from attic.chunker import chunkify
 from attic.chunker import chunkify
 from attic.hashindex import ChunkIndex
 from attic.hashindex import ChunkIndex
 from attic.helpers import Error, uid2user, user2uid, gid2group, group2gid, \
 from attic.helpers import Error, uid2user, user2uid, gid2group, group2gid, \
-    Manifest, Statistics, decode_dict, st_mtime_ns, make_path_safe
+    Manifest, Statistics, decode_dict, st_mtime_ns, make_path_safe, StableDict
 
 
 ITEMS_BUFFER = 1024 * 1024
 ITEMS_BUFFER = 1024 * 1024
 CHUNK_MIN = 1024
 CHUNK_MIN = 1024
@@ -63,7 +63,7 @@ class ChunkBuffer:
         self.key = key
         self.key = key
 
 
     def add(self, item):
     def add(self, item):
-        self.buffer.write(self.packer.pack(item))
+        self.buffer.write(self.packer.pack(StableDict(item)))
         if self.is_full():
         if self.is_full():
             self.flush()
             self.flush()
 
 
@@ -348,7 +348,7 @@ class Archive:
             item[b'user'] = item[b'group'] = None
             item[b'user'] = item[b'group'] = None
         xattrs = xattr.get_all(path, follow_symlinks=False)
         xattrs = xattr.get_all(path, follow_symlinks=False)
         if xattrs:
         if xattrs:
-            item[b'xattrs'] = xattrs
+            item[b'xattrs'] = StableDict(xattrs)
         return item
         return item
 
 
     def process_item(self, path, st):
     def process_item(self, path, st):
@@ -549,7 +549,7 @@ class ArchiveChecker:
 
 
             for state, items in groupby(archive[b'items'], missing_chunk_detector):
             for state, items in groupby(archive[b'items'], missing_chunk_detector):
                 if state != prev_state:
                 if state != prev_state:
-                    unpacker = msgpack.Unpacker()
+                    unpacker = msgpack.Unpacker(object_hook=StableDict)
                     prev_state = state
                     prev_state = state
                 if state % 2:
                 if state % 2:
                     self.report_progress('Archive metadata damage detected', error=True)
                     self.report_progress('Archive metadata damage detected', error=True)

+ 6 - 0
attic/helpers.py

@@ -469,6 +469,12 @@ def daemonize():
     os.dup2(fd, 2)
     os.dup2(fd, 2)
 
 
 
 
+class StableDict(dict):
+    """A dict subclass with stable items() ordering"""
+    def items(self):
+        return sorted(super(StableDict, self).items())
+
+
 if sys.version < '3.3':
 if sys.version < '3.3':
     # st_mtime_ns attribute only available in 3.3+
     # st_mtime_ns attribute only available in 3.3+
     def st_mtime_ns(st):
     def st_mtime_ns(st):

+ 12 - 1
attic/testsuite/helpers.py

@@ -1,10 +1,13 @@
+import hashlib
 from time import mktime, strptime
 from time import mktime, strptime
 from datetime import datetime, timezone, timedelta
 from datetime import datetime, timezone, timedelta
 import os
 import os
 import tempfile
 import tempfile
 import unittest
 import unittest
-from attic.helpers import adjust_patterns, exclude_path, Location, format_timedelta, IncludePattern, ExcludePattern, make_path_safe, UpgradableLock, prune_within, prune_split, to_localtime
+from attic.helpers import adjust_patterns, exclude_path, Location, format_timedelta, IncludePattern, ExcludePattern, make_path_safe, UpgradableLock, prune_within, prune_split, to_localtime, \
+    StableDict
 from attic.testsuite import AtticTestCase
 from attic.testsuite import AtticTestCase
+import msgpack
 
 
 
 
 class LocationTestCase(AtticTestCase):
 class LocationTestCase(AtticTestCase):
@@ -176,3 +179,11 @@ class PruneWithinTestCase(AtticTestCase):
         dotest(test_archives, '1w',  [0, 1, 2, 3, 4, 5])
         dotest(test_archives, '1w',  [0, 1, 2, 3, 4, 5])
         dotest(test_archives, '1m',  [0, 1, 2, 3, 4, 5])
         dotest(test_archives, '1m',  [0, 1, 2, 3, 4, 5])
         dotest(test_archives, '1y',  [0, 1, 2, 3, 4, 5])
         dotest(test_archives, '1y',  [0, 1, 2, 3, 4, 5])
+
+
+class StableDictTestCase(AtticTestCase):
+
+    def test(self):
+        d = StableDict(foo=1, bar=2, boo=3, baz=4)
+        self.assert_equal(list(d.items()), [('bar', 2), ('baz', 4), ('boo', 3), ('foo', 1)])
+        self.assert_equal(hashlib.md5(msgpack.packb(d)).hexdigest(), 'fc78df42cd60691b3ac3dd2a2b39903f')