瀏覽代碼

Merge pull request #3230 from ThomasWaldmann/chunks-healthy-recreate-fix

recreate / chunks_healthy fixes
TW 7 年之前
父節點
當前提交
fcef52a5d8
共有 2 個文件被更改,包括 30 次插入7 次删除
  1. 18 6
      src/borg/archive.py
  2. 12 1
      src/borg/archiver.py

+ 18 - 6
src/borg/archive.py

@@ -967,13 +967,14 @@ class ChunksProcessor:
 
 
     def __init__(self, *, key, cache,
     def __init__(self, *, key, cache,
                  add_item, write_checkpoint,
                  add_item, write_checkpoint,
-                 checkpoint_interval):
+                 checkpoint_interval, rechunkify):
         self.key = key
         self.key = key
         self.cache = cache
         self.cache = cache
         self.add_item = add_item
         self.add_item = add_item
         self.write_checkpoint = write_checkpoint
         self.write_checkpoint = write_checkpoint
         self.checkpoint_interval = checkpoint_interval
         self.checkpoint_interval = checkpoint_interval
         self.last_checkpoint = time.monotonic()
         self.last_checkpoint = time.monotonic()
+        self.rechunkify = rechunkify
 
 
     def write_part_file(self, item, from_chunk, number):
     def write_part_file(self, item, from_chunk, number):
         item = Item(internal_dict=item.as_dict())
         item = Item(internal_dict=item.as_dict())
@@ -998,6 +999,10 @@ class ChunksProcessor:
                 return chunk_entry
                 return chunk_entry
 
 
         item.chunks = []
         item.chunks = []
+        # if we rechunkify, we'll get a fundamentally different chunks list, thus we need
+        # to get rid of .chunks_healthy, as it might not correspond to .chunks any more.
+        if self.rechunkify and 'chunks_healthy' in item:
+            del item.chunks_healthy
         from_chunk = 0
         from_chunk = 0
         part_number = 1
         part_number = 1
         for data in chunk_iter:
         for data in chunk_iter:
@@ -1502,7 +1507,12 @@ class ArchiveChecker:
             has_chunks_healthy = 'chunks_healthy' in item
             has_chunks_healthy = 'chunks_healthy' in item
             chunks_current = item.chunks
             chunks_current = item.chunks
             chunks_healthy = item.chunks_healthy if has_chunks_healthy else chunks_current
             chunks_healthy = item.chunks_healthy if has_chunks_healthy else chunks_current
-            assert len(chunks_current) == len(chunks_healthy)
+            if has_chunks_healthy and len(chunks_current) != len(chunks_healthy):
+                # should never happen, but there was issue #3218.
+                logger.warning('{}: Invalid chunks_healthy metadata removed!'.format(item.path))
+                del item.chunks_healthy
+                has_chunks_healthy = False
+                chunks_healthy = chunks_current
             for chunk_current, chunk_healthy in zip(chunks_current, chunks_healthy):
             for chunk_current, chunk_healthy in zip(chunks_current, chunks_healthy):
                 chunk_id, size, csize = chunk_healthy
                 chunk_id, size, csize = chunk_healthy
                 if chunk_id not in self.chunks:
                 if chunk_id not in self.chunks:
@@ -1758,15 +1768,17 @@ class ArchiveRecreater:
             if not matcher.match(item.path):
             if not matcher.match(item.path):
                 self.print_file_status('x', item.path)
                 self.print_file_status('x', item.path)
                 if item_is_hardlink_master(item):
                 if item_is_hardlink_master(item):
-                    hardlink_masters[item.path] = (item.get('chunks'), None)
+                    hardlink_masters[item.path] = (item.get('chunks'), item.get('chunks_healthy'), None)
                 continue
                 continue
             if target_is_subset and hardlinkable(item.mode) and item.get('source') in hardlink_masters:
             if target_is_subset and hardlinkable(item.mode) and item.get('source') in hardlink_masters:
                 # master of this hard link is outside the target subset
                 # master of this hard link is outside the target subset
-                chunks, new_source = hardlink_masters[item.source]
+                chunks, chunks_healthy, new_source = hardlink_masters[item.source]
                 if new_source is None:
                 if new_source is None:
                     # First item to use this master, move the chunks
                     # First item to use this master, move the chunks
                     item.chunks = chunks
                     item.chunks = chunks
-                    hardlink_masters[item.source] = (None, item.path)
+                    if chunks_healthy is not None:
+                        item.chunks_healthy = chunks_healthy
+                    hardlink_masters[item.source] = (None, None, item.path)
                     del item.source
                     del item.source
                 else:
                 else:
                     # Master was already moved, only update this item's source
                     # Master was already moved, only update this item's source
@@ -1891,7 +1903,7 @@ class ArchiveRecreater:
         target.process_file_chunks = ChunksProcessor(
         target.process_file_chunks = ChunksProcessor(
             cache=self.cache, key=self.key,
             cache=self.cache, key=self.key,
             add_item=target.add_item, write_checkpoint=target.write_checkpoint,
             add_item=target.add_item, write_checkpoint=target.write_checkpoint,
-            checkpoint_interval=self.checkpoint_interval).process_file_chunks
+            checkpoint_interval=self.checkpoint_interval, rechunkify=target.recreate_rechunkify).process_file_chunks
         target.chunker = Chunker(self.key.chunk_seed, *target.chunker_params)
         target.chunker = Chunker(self.key.chunk_seed, *target.chunker_params)
         return target
         return target
 
 

+ 12 - 1
src/borg/archiver.py

@@ -519,7 +519,7 @@ class Archiver:
                     nobsdflags=args.nobsdflags, numeric_owner=args.numeric_owner)
                     nobsdflags=args.nobsdflags, numeric_owner=args.numeric_owner)
                 cp = ChunksProcessor(cache=cache, key=key,
                 cp = ChunksProcessor(cache=cache, key=key,
                     add_item=archive.add_item, write_checkpoint=archive.write_checkpoint,
                     add_item=archive.add_item, write_checkpoint=archive.write_checkpoint,
-                    checkpoint_interval=args.checkpoint_interval)
+                    checkpoint_interval=args.checkpoint_interval, rechunkify=False)
                 fso = FilesystemObjectProcessors(metadata_collector=metadata_collector, cache=cache, key=key,
                 fso = FilesystemObjectProcessors(metadata_collector=metadata_collector, cache=cache, key=key,
                     process_file_chunks=cp.process_file_chunks, add_item=archive.add_item,
                     process_file_chunks=cp.process_file_chunks, add_item=archive.add_item,
                     chunker_params=args.chunker_params)
                     chunker_params=args.chunker_params)
@@ -3371,6 +3371,17 @@ class Archiver:
         deduplicated size of the archives using the previous chunker params.
         deduplicated size of the archives using the previous chunker params.
         When recompressing expect approx. (throughput / checkpoint-interval) in space usage,
         When recompressing expect approx. (throughput / checkpoint-interval) in space usage,
         assuming all chunks are recompressed.
         assuming all chunks are recompressed.
+
+        If you recently ran borg check --repair and it had to fix lost chunks with all-zero
+        replacement chunks, please first run another backup for the same data and re-run
+        borg check --repair afterwards to heal any archives that had lost chunks which are
+        still generated from the input data.
+
+        Important: running borg recreate to re-chunk will remove the chunks_healthy
+        metadata of all items with replacement chunks, so healing will not be possible
+        any more after re-chunking (it is also unlikely it would ever work: due to the
+        change of chunking parameters, the missing chunk likely will never be seen again
+        even if you still have the data that produced it).
         """)
         """)
         subparser = subparsers.add_parser('recreate', parents=[common_parser], add_help=False,
         subparser = subparsers.add_parser('recreate', parents=[common_parser], add_help=False,
                                           description=self.do_recreate.__doc__,
                                           description=self.do_recreate.__doc__,