Prechádzať zdrojové kódy

store item_keys into manifest, fixes #1147

we need a list of valid item metadata keys. using a list stored in the repo manifest
is more future-proof than the hardcoded ITEM_KEYS in the source code.

keys that are in union(item_keys_from_repo, item_keys_from_source) are considered valid.
Thomas Waldmann 9 rokov pred
rodič
commit
78121a8d04
3 zmenil súbory, kde vykonal 21 pridanie a 9 odobranie
  1. 13 7
      borg/archive.py
  2. 7 1
      borg/helpers.py
  3. 1 1
      borg/testsuite/archive.py

+ 13 - 7
borg/archive.py

@@ -224,7 +224,7 @@ Number of files: {0.stats.nfiles}'''.format(
             yield item
             yield item
 
 
     def add_item(self, item):
     def add_item(self, item):
-        unknown_keys = set(item) - ITEM_KEYS
+        unknown_keys = set(item) - self.manifest.item_keys
         assert not unknown_keys, ('unknown item metadata keys detected, please update ITEM_KEYS: %s',
         assert not unknown_keys, ('unknown item metadata keys detected, please update ITEM_KEYS: %s',
                                   ','.join(k.decode('ascii') for k in unknown_keys))
                                   ','.join(k.decode('ascii') for k in unknown_keys))
         if self.show_progress:
         if self.show_progress:
@@ -587,9 +587,9 @@ Number of files: {0.stats.nfiles}'''.format(
 
 
 
 
 # this set must be kept complete, otherwise the RobustUnpacker might malfunction:
 # this set must be kept complete, otherwise the RobustUnpacker might malfunction:
-ITEM_KEYS = set([b'path', b'source', b'rdev', b'chunks',
-                 b'mode', b'user', b'group', b'uid', b'gid', b'mtime', b'atime', b'ctime',
-                 b'xattrs', b'bsdflags', b'acl_nfs4', b'acl_access', b'acl_default', b'acl_extended', ])
+ITEM_KEYS = frozenset([b'path', b'source', b'rdev', b'chunks',
+                       b'mode', b'user', b'group', b'uid', b'gid', b'mtime', b'atime', b'ctime',
+                       b'xattrs', b'bsdflags', b'acl_nfs4', b'acl_access', b'acl_default', b'acl_extended', ])
 
 
 
 
 def valid_msgpacked_item(d, item_keys_serialized):
 def valid_msgpacked_item(d, item_keys_serialized):
@@ -623,9 +623,9 @@ def valid_msgpacked_item(d, item_keys_serialized):
 class RobustUnpacker:
 class RobustUnpacker:
     """A restartable/robust version of the streaming msgpack unpacker
     """A restartable/robust version of the streaming msgpack unpacker
     """
     """
-    def __init__(self, validator):
+    def __init__(self, validator, item_keys):
         super().__init__()
         super().__init__()
-        self.item_keys = [msgpack.packb(name) for name in ITEM_KEYS]
+        self.item_keys = [msgpack.packb(name) for name in item_keys]
         self.validator = validator
         self.validator = validator
         self._buffered_data = []
         self._buffered_data = []
         self._resync = False
         self._resync = False
@@ -729,6 +729,11 @@ class ArchiveChecker:
         Iterates through all objects in the repository looking for archive metadata blocks.
         Iterates through all objects in the repository looking for archive metadata blocks.
         """
         """
         logger.info('Rebuilding missing manifest, this might take some time...')
         logger.info('Rebuilding missing manifest, this might take some time...')
+        # as we have lost the manifest, we do not know any more what valid item keys we had.
+        # collecting any key we encounter in a damaged repo seems unwise, thus we just use
+        # the hardcoded list from the source code. thus, it is not recommended to rebuild a
+        # lost manifest on a older borg version than the most recent one that was ever used
+        # within this repository (assuming that newer borg versions support more item keys).
         manifest = Manifest(self.key, self.repository)
         manifest = Manifest(self.key, self.repository)
         for chunk_id, _ in self.chunks.iteritems():
         for chunk_id, _ in self.chunks.iteritems():
             cdata = self.repository.get(chunk_id)
             cdata = self.repository.get(chunk_id)
@@ -806,7 +811,8 @@ class ArchiveChecker:
 
 
             Missing item chunks will be skipped and the msgpack stream will be restarted
             Missing item chunks will be skipped and the msgpack stream will be restarted
             """
             """
-            unpacker = RobustUnpacker(lambda item: isinstance(item, dict) and b'path' in item)
+            unpacker = RobustUnpacker(lambda item: isinstance(item, dict) and b'path' in item,
+                                      self.manifest.item_keys)
             _state = 0
             _state = 0
 
 
             def missing_chunk_detector(chunk_id):
             def missing_chunk_detector(chunk_id):

+ 7 - 1
borg/helpers.py

@@ -85,16 +85,19 @@ class Manifest:
 
 
     MANIFEST_ID = b'\0' * 32
     MANIFEST_ID = b'\0' * 32
 
 
-    def __init__(self, key, repository):
+    def __init__(self, key, repository, item_keys=None):
+        from .archive import ITEM_KEYS
         self.archives = {}
         self.archives = {}
         self.config = {}
         self.config = {}
         self.key = key
         self.key = key
         self.repository = repository
         self.repository = repository
+        self.item_keys = frozenset(item_keys) if item_keys is not None else ITEM_KEYS
 
 
     @classmethod
     @classmethod
     def load(cls, repository, key=None):
     def load(cls, repository, key=None):
         from .key import key_factory
         from .key import key_factory
         from .repository import Repository
         from .repository import Repository
+        from .archive import ITEM_KEYS
         try:
         try:
             cdata = repository.get(cls.MANIFEST_ID)
             cdata = repository.get(cls.MANIFEST_ID)
         except Repository.ObjectNotFound:
         except Repository.ObjectNotFound:
@@ -112,6 +115,8 @@ class Manifest:
         if manifest.timestamp:
         if manifest.timestamp:
             manifest.timestamp = manifest.timestamp.decode('ascii')
             manifest.timestamp = manifest.timestamp.decode('ascii')
         manifest.config = m[b'config']
         manifest.config = m[b'config']
+        # valid item keys are whatever is known in the repo or every key we know
+        manifest.item_keys = frozenset(m.get(b'item_keys', [])) | ITEM_KEYS
         return manifest, key
         return manifest, key
 
 
     def write(self):
     def write(self):
@@ -121,6 +126,7 @@ class Manifest:
             'archives': self.archives,
             'archives': self.archives,
             'timestamp': self.timestamp,
             'timestamp': self.timestamp,
             'config': self.config,
             'config': self.config,
+            'item_keys': tuple(self.item_keys),
         }))
         }))
         self.id = self.key.id_hash(data)
         self.id = self.key.id_hash(data)
         self.repository.put(self.MANIFEST_ID, self.key.encrypt(data))
         self.repository.put(self.MANIFEST_ID, self.key.encrypt(data))

+ 1 - 1
borg/testsuite/archive.py

@@ -68,7 +68,7 @@ class RobustUnpackerTestCase(BaseTestCase):
         return isinstance(value, dict) and value.get(b'path') in (b'foo', b'bar', b'boo', b'baz')
         return isinstance(value, dict) and value.get(b'path') in (b'foo', b'bar', b'boo', b'baz')
 
 
     def process(self, input):
     def process(self, input):
-        unpacker = RobustUnpacker(validator=self._validator)
+        unpacker = RobustUnpacker(validator=self._validator, item_keys=ITEM_KEYS)
         result = []
         result = []
         for should_sync, chunks in input:
         for should_sync, chunks in input:
             if should_sync:
             if should_sync: