Răsfoiți Sursa

Merge pull request #1149 from ThomasWaldmann/validkeys-in-manifest

item_keys in manifest, validate item keys
enkore 9 ani în urmă
părinte
comite
90d621ce35
3 a modificat fișierele cu 31 adăugiri și 10 ștergeri
  1. 23 8
      borg/archive.py
  2. 7 1
      borg/helpers.py
  3. 1 1
      borg/testsuite/archive.py

+ 23 - 8
borg/archive.py

@@ -224,7 +224,7 @@ Number of files: {0.stats.nfiles}'''.format(
             yield item
 
     def add_item(self, item):
-        unknown_keys = set(item) - ITEM_KEYS
+        unknown_keys = set(item) - self.manifest.item_keys
         assert not unknown_keys, ('unknown item metadata keys detected, please update ITEM_KEYS: %s',
                                   ','.join(k.decode('ascii') for k in unknown_keys))
         if self.show_progress:
@@ -587,9 +587,12 @@ Number of files: {0.stats.nfiles}'''.format(
 
 
 # this set must be kept complete, otherwise the RobustUnpacker might malfunction:
-ITEM_KEYS = set([b'path', b'source', b'rdev', b'chunks',
-                 b'mode', b'user', b'group', b'uid', b'gid', b'mtime', b'atime', b'ctime',
-                 b'xattrs', b'bsdflags', b'acl_nfs4', b'acl_access', b'acl_default', b'acl_extended', ])
+ITEM_KEYS = frozenset([b'path', b'source', b'rdev', b'chunks',
+                       b'mode', b'user', b'group', b'uid', b'gid', b'mtime', b'atime', b'ctime',
+                       b'xattrs', b'bsdflags', b'acl_nfs4', b'acl_access', b'acl_default', b'acl_extended', ])
+
+# this is the set of keys that are always present in items:
+REQUIRED_ITEM_KEYS = frozenset([b'path', b'mtime', ])
 
 
 def valid_msgpacked_item(d, item_keys_serialized):
@@ -623,9 +626,9 @@ def valid_msgpacked_item(d, item_keys_serialized):
 class RobustUnpacker:
     """A restartable/robust version of the streaming msgpack unpacker
     """
-    def __init__(self, validator):
+    def __init__(self, validator, item_keys):
         super().__init__()
-        self.item_keys = [msgpack.packb(name) for name in ITEM_KEYS]
+        self.item_keys = [msgpack.packb(name) for name in item_keys]
         self.validator = validator
         self._buffered_data = []
         self._resync = False
@@ -729,6 +732,11 @@ class ArchiveChecker:
         Iterates through all objects in the repository looking for archive metadata blocks.
         """
         logger.info('Rebuilding missing manifest, this might take some time...')
+        # as we have lost the manifest, we do not know any more what valid item keys we had.
+        # collecting any key we encounter in a damaged repo seems unwise, thus we just use
+        # the hardcoded list from the source code. thus, it is not recommended to rebuild a
+        # lost manifest on a older borg version than the most recent one that was ever used
+        # within this repository (assuming that newer borg versions support more item keys).
         manifest = Manifest(self.key, self.repository)
         for chunk_id, _ in self.chunks.iteritems():
             cdata = self.repository.get(chunk_id)
@@ -806,7 +814,8 @@ class ArchiveChecker:
 
             Missing item chunks will be skipped and the msgpack stream will be restarted
             """
-            unpacker = RobustUnpacker(lambda item: isinstance(item, dict) and b'path' in item)
+            item_keys = self.manifest.item_keys
+            unpacker = RobustUnpacker(lambda item: isinstance(item, dict) and b'path' in item, item_keys)
             _state = 0
 
             def missing_chunk_detector(chunk_id):
@@ -821,6 +830,12 @@ class ArchiveChecker:
                 self.error_found = True
                 logger.error(msg)
 
+            def valid_item(obj):
+                if not isinstance(obj, StableDict):
+                    return False
+                keys = set(obj)
+                return REQUIRED_ITEM_KEYS.issubset(keys) and keys.issubset(item_keys)
+
             i = 0
             for state, items in groupby(archive[b'items'], missing_chunk_detector):
                 items = list(items)
@@ -835,7 +850,7 @@ class ArchiveChecker:
                     unpacker.feed(self.key.decrypt(chunk_id, cdata))
                     try:
                         for item in unpacker:
-                            if isinstance(item, dict):
+                            if valid_item(item):
                                 yield item
                             else:
                                 report('Did not get expected metadata dict when unpacking item metadata', chunk_id, i)

+ 7 - 1
borg/helpers.py

@@ -85,16 +85,19 @@ class Manifest:
 
     MANIFEST_ID = b'\0' * 32
 
-    def __init__(self, key, repository):
+    def __init__(self, key, repository, item_keys=None):
+        from .archive import ITEM_KEYS
         self.archives = {}
         self.config = {}
         self.key = key
         self.repository = repository
+        self.item_keys = frozenset(item_keys) if item_keys is not None else ITEM_KEYS
 
     @classmethod
     def load(cls, repository, key=None):
         from .key import key_factory
         from .repository import Repository
+        from .archive import ITEM_KEYS
         try:
             cdata = repository.get(cls.MANIFEST_ID)
         except Repository.ObjectNotFound:
@@ -112,6 +115,8 @@ class Manifest:
         if manifest.timestamp:
             manifest.timestamp = manifest.timestamp.decode('ascii')
         manifest.config = m[b'config']
+        # valid item keys are whatever is known in the repo or every key we know
+        manifest.item_keys = frozenset(m.get(b'item_keys', [])) | ITEM_KEYS
         return manifest, key
 
     def write(self):
@@ -121,6 +126,7 @@ class Manifest:
             'archives': self.archives,
             'timestamp': self.timestamp,
             'config': self.config,
+            'item_keys': tuple(self.item_keys),
         }))
         self.id = self.key.id_hash(data)
         self.repository.put(self.MANIFEST_ID, self.key.encrypt(data))

+ 1 - 1
borg/testsuite/archive.py

@@ -68,7 +68,7 @@ class RobustUnpackerTestCase(BaseTestCase):
         return isinstance(value, dict) and value.get(b'path') in (b'foo', b'bar', b'boo', b'baz')
 
     def process(self, input):
-        unpacker = RobustUnpacker(validator=self._validator)
+        unpacker = RobustUnpacker(validator=self._validator, item_keys=ITEM_KEYS)
         result = []
         for should_sync, chunks in input:
             if should_sync: