2
0
Эх сурвалжийг харах

Merge pull request #1149 from ThomasWaldmann/validkeys-in-manifest

item_keys in manifest, validate item keys
enkore 9 жил өмнө
parent
commit
90d621ce35

+ 23 - 8
borg/archive.py

@@ -224,7 +224,7 @@ Number of files: {0.stats.nfiles}'''.format(
             yield item
             yield item
 
 
     def add_item(self, item):
     def add_item(self, item):
-        unknown_keys = set(item) - ITEM_KEYS
+        unknown_keys = set(item) - self.manifest.item_keys
         assert not unknown_keys, ('unknown item metadata keys detected, please update ITEM_KEYS: %s',
         assert not unknown_keys, ('unknown item metadata keys detected, please update ITEM_KEYS: %s',
                                   ','.join(k.decode('ascii') for k in unknown_keys))
                                   ','.join(k.decode('ascii') for k in unknown_keys))
         if self.show_progress:
         if self.show_progress:
@@ -587,9 +587,12 @@ Number of files: {0.stats.nfiles}'''.format(
 
 
 
 
 # this set must be kept complete, otherwise the RobustUnpacker might malfunction:
 # this set must be kept complete, otherwise the RobustUnpacker might malfunction:
-ITEM_KEYS = set([b'path', b'source', b'rdev', b'chunks',
-                 b'mode', b'user', b'group', b'uid', b'gid', b'mtime', b'atime', b'ctime',
-                 b'xattrs', b'bsdflags', b'acl_nfs4', b'acl_access', b'acl_default', b'acl_extended', ])
+ITEM_KEYS = frozenset([b'path', b'source', b'rdev', b'chunks',
+                       b'mode', b'user', b'group', b'uid', b'gid', b'mtime', b'atime', b'ctime',
+                       b'xattrs', b'bsdflags', b'acl_nfs4', b'acl_access', b'acl_default', b'acl_extended', ])
+
+# this is the set of keys that are always present in items:
+REQUIRED_ITEM_KEYS = frozenset([b'path', b'mtime', ])
 
 
 
 
 def valid_msgpacked_item(d, item_keys_serialized):
 def valid_msgpacked_item(d, item_keys_serialized):
@@ -623,9 +626,9 @@ def valid_msgpacked_item(d, item_keys_serialized):
 class RobustUnpacker:
 class RobustUnpacker:
     """A restartable/robust version of the streaming msgpack unpacker
     """A restartable/robust version of the streaming msgpack unpacker
     """
     """
-    def __init__(self, validator):
+    def __init__(self, validator, item_keys):
         super().__init__()
         super().__init__()
-        self.item_keys = [msgpack.packb(name) for name in ITEM_KEYS]
+        self.item_keys = [msgpack.packb(name) for name in item_keys]
         self.validator = validator
         self.validator = validator
         self._buffered_data = []
         self._buffered_data = []
         self._resync = False
         self._resync = False
@@ -729,6 +732,11 @@ class ArchiveChecker:
         Iterates through all objects in the repository looking for archive metadata blocks.
         Iterates through all objects in the repository looking for archive metadata blocks.
         """
         """
         logger.info('Rebuilding missing manifest, this might take some time...')
         logger.info('Rebuilding missing manifest, this might take some time...')
+        # as we have lost the manifest, we do not know any more what valid item keys we had.
+        # collecting any key we encounter in a damaged repo seems unwise, thus we just use
+        # the hardcoded list from the source code. thus, it is not recommended to rebuild a
+        # lost manifest on a older borg version than the most recent one that was ever used
+        # within this repository (assuming that newer borg versions support more item keys).
         manifest = Manifest(self.key, self.repository)
         manifest = Manifest(self.key, self.repository)
         for chunk_id, _ in self.chunks.iteritems():
         for chunk_id, _ in self.chunks.iteritems():
             cdata = self.repository.get(chunk_id)
             cdata = self.repository.get(chunk_id)
@@ -806,7 +814,8 @@ class ArchiveChecker:
 
 
             Missing item chunks will be skipped and the msgpack stream will be restarted
             Missing item chunks will be skipped and the msgpack stream will be restarted
             """
             """
-            unpacker = RobustUnpacker(lambda item: isinstance(item, dict) and b'path' in item)
+            item_keys = self.manifest.item_keys
+            unpacker = RobustUnpacker(lambda item: isinstance(item, dict) and b'path' in item, item_keys)
             _state = 0
             _state = 0
 
 
             def missing_chunk_detector(chunk_id):
             def missing_chunk_detector(chunk_id):
@@ -821,6 +830,12 @@ class ArchiveChecker:
                 self.error_found = True
                 self.error_found = True
                 logger.error(msg)
                 logger.error(msg)
 
 
+            def valid_item(obj):
+                if not isinstance(obj, StableDict):
+                    return False
+                keys = set(obj)
+                return REQUIRED_ITEM_KEYS.issubset(keys) and keys.issubset(item_keys)
+
             i = 0
             i = 0
             for state, items in groupby(archive[b'items'], missing_chunk_detector):
             for state, items in groupby(archive[b'items'], missing_chunk_detector):
                 items = list(items)
                 items = list(items)
@@ -835,7 +850,7 @@ class ArchiveChecker:
                     unpacker.feed(self.key.decrypt(chunk_id, cdata))
                     unpacker.feed(self.key.decrypt(chunk_id, cdata))
                     try:
                     try:
                         for item in unpacker:
                         for item in unpacker:
-                            if isinstance(item, dict):
+                            if valid_item(item):
                                 yield item
                                 yield item
                             else:
                             else:
                                 report('Did not get expected metadata dict when unpacking item metadata', chunk_id, i)
                                 report('Did not get expected metadata dict when unpacking item metadata', chunk_id, i)

+ 7 - 1
borg/helpers.py

@@ -85,16 +85,19 @@ class Manifest:
 
 
     MANIFEST_ID = b'\0' * 32
     MANIFEST_ID = b'\0' * 32
 
 
-    def __init__(self, key, repository):
+    def __init__(self, key, repository, item_keys=None):
+        from .archive import ITEM_KEYS
         self.archives = {}
         self.archives = {}
         self.config = {}
         self.config = {}
         self.key = key
         self.key = key
         self.repository = repository
         self.repository = repository
+        self.item_keys = frozenset(item_keys) if item_keys is not None else ITEM_KEYS
 
 
     @classmethod
     @classmethod
     def load(cls, repository, key=None):
     def load(cls, repository, key=None):
         from .key import key_factory
         from .key import key_factory
         from .repository import Repository
         from .repository import Repository
+        from .archive import ITEM_KEYS
         try:
         try:
             cdata = repository.get(cls.MANIFEST_ID)
             cdata = repository.get(cls.MANIFEST_ID)
         except Repository.ObjectNotFound:
         except Repository.ObjectNotFound:
@@ -112,6 +115,8 @@ class Manifest:
         if manifest.timestamp:
         if manifest.timestamp:
             manifest.timestamp = manifest.timestamp.decode('ascii')
             manifest.timestamp = manifest.timestamp.decode('ascii')
         manifest.config = m[b'config']
         manifest.config = m[b'config']
+        # valid item keys are whatever is known in the repo or every key we know
+        manifest.item_keys = frozenset(m.get(b'item_keys', [])) | ITEM_KEYS
         return manifest, key
         return manifest, key
 
 
     def write(self):
     def write(self):
@@ -121,6 +126,7 @@ class Manifest:
             'archives': self.archives,
             'archives': self.archives,
             'timestamp': self.timestamp,
             'timestamp': self.timestamp,
             'config': self.config,
             'config': self.config,
+            'item_keys': tuple(self.item_keys),
         }))
         }))
         self.id = self.key.id_hash(data)
         self.id = self.key.id_hash(data)
         self.repository.put(self.MANIFEST_ID, self.key.encrypt(data))
         self.repository.put(self.MANIFEST_ID, self.key.encrypt(data))

+ 1 - 1
borg/testsuite/archive.py

@@ -68,7 +68,7 @@ class RobustUnpackerTestCase(BaseTestCase):
         return isinstance(value, dict) and value.get(b'path') in (b'foo', b'bar', b'boo', b'baz')
         return isinstance(value, dict) and value.get(b'path') in (b'foo', b'bar', b'boo', b'baz')
 
 
     def process(self, input):
     def process(self, input):
-        unpacker = RobustUnpacker(validator=self._validator)
+        unpacker = RobustUnpacker(validator=self._validator, item_keys=ITEM_KEYS)
         result = []
         result = []
         for should_sync, chunks in input:
         for should_sync, chunks in input:
             if should_sync:
             if should_sync: