преди 9 години · e9d7f928e2
--- a/src/borg/archive.py
+++ b/src/borg/archive.py
@@ -1394,10 +1394,6 @@ class ArchiveChecker:
 
				 
			
 
				 
			
 
				 class ArchiveRecreater:
			
 
				-    class FakeTargetArchive:
			
 
				-        def __init__(self):
			
 
				-            self.stats = Statistics()
			
 
				-
			
 
				     class Interrupted(Exception):
			
 
				         def __init__(self, metadata=None):
			
 
				             self.metadata = metadata or {}
			
@@ -1421,6 +1417,9 @@ class ArchiveRecreater:
 
				         self.exclude_if_present = exclude_if_present or []
			
 
				         self.keep_tag_files = keep_tag_files
			
 
				 
			
 
				+        self.rechunkify = chunker_params is not None
			
 
				+        if self.rechunkify:
			
 
				+            logger.debug('Rechunking archives to %s', chunker_params)
			
 
				         self.chunker_params = chunker_params or CHUNKER_PARAMS
			
 
				         self.recompress = bool(compression)
			
 
				         self.always_recompress = always_recompress
			
@@ -1434,7 +1433,7 @@ class ArchiveRecreater:
 
				         self.stats = stats
			
 
				         self.progress = progress
			
 
				         self.print_file_status = file_status_printer or (lambda *args: None)
			
 
				-        self.checkpoint_interval = checkpoint_interval
			
 
				+        self.checkpoint_interval = None if dry_run else checkpoint_interval
			
 
				 
			
 
				     def recreate(self, archive_name, comment=None, target_name=None):
			
 
				         assert not self.is_temporary_archive(archive_name)
			
@@ -1444,10 +1443,10 @@ class ArchiveRecreater:
 
				             self.matcher_add_tagged_dirs(archive)
			
 
				         if self.matcher.empty() and not self.recompress and not target.recreate_rechunkify and comment is None:
			
 
				             logger.info("Skipping archive %s, nothing to do", archive_name)
			
 
				-            return True
			
 
				+            return
			
 
				         self.process_items(archive, target)
			
 
				         replace_original = target_name is None
			
 
				-        return self.save(archive, target, comment, replace_original=replace_original)
			
 
				+        self.save(archive, target, comment, replace_original=replace_original)
			
 
				 
			
 
				     def process_items(self, archive, target):
			
 
				         matcher = self.matcher
			
@@ -1494,12 +1493,11 @@ class ArchiveRecreater:
 
				         self.print_file_status(file_status(item.mode), item.path)
			
 
				 
			
 
				     def process_chunks(self, archive, target, item):
			
 
				-        """Return new chunk ID list for 'item'."""
			
 
				         if not self.recompress and not target.recreate_rechunkify:
			
 
				             for chunk_id, size, csize in item.chunks:
			
 
				                 self.cache.chunk_incref(chunk_id, target.stats)
			
 
				             return item.chunks
			
 
				-        chunk_iterator = self.create_chunk_iterator(archive, target, list(item.chunks))
			
 
				+        chunk_iterator = self.iter_chunks(archive, target, list(item.chunks))
			
 
				         compress = self.compression_decider1.decide(item.path)
			
 
				         chunk_processor = partial(self.chunk_processor, target, compress)
			
 
				         target.chunk_file(item, self.cache, target.stats, chunk_iterator, chunk_processor)
			
@@ -1517,24 +1515,22 @@ class ArchiveRecreater:
 
				             if Compressor.detect(old_chunk.data).name == compression_spec['name']:
			
 
				                 # Stored chunk has the same compression we wanted
			
 
				                 overwrite = False
			
 
				-        chunk_id, size, csize = self.cache.add_chunk(chunk_id, chunk, target.stats, overwrite=overwrite)
			
 
				-        self.seen_chunks.add(chunk_id)
			
 
				-        return chunk_id, size, csize
			
 
				+        chunk_entry = self.cache.add_chunk(chunk_id, chunk, target.stats, overwrite=overwrite)
			
 
				+        self.seen_chunks.add(chunk_entry.id)
			
 
				+        return chunk_entry
			
 
				 
			
 
				-    def create_chunk_iterator(self, archive, target, chunks):
			
 
				-        """Return iterator of chunks to store for 'item' from 'archive' in 'target'."""
			
 
				+    def iter_chunks(self, archive, target, chunks):
			
 
				         chunk_iterator = archive.pipeline.fetch_many([chunk_id for chunk_id, _, _ in chunks])
			
 
				         if target.recreate_rechunkify:
			
 
				             # The target.chunker will read the file contents through ChunkIteratorFileWrapper chunk-by-chunk
			
 
				             # (does not load the entire file into memory)
			
 
				             file = ChunkIteratorFileWrapper(chunk_iterator)
			
 
				-            return target.chunker.chunkify(file)
			
 
				+            yield from target.chunker.chunkify(file)
			
 
				         else:
			
 
				             for chunk in chunk_iterator:
			
 
				                 yield chunk.data
			
 
				 
			
 
				     def save(self, archive, target, comment=None, replace_original=True):
			
 
				-        """Save target archive. If completed, replace source. If not, save temporary with additional 'metadata' dict."""
			
 
				         if self.dry_run:
			
 
				             return
			
 
				         timestamp = archive.ts.replace(tzinfo=None)
			
@@ -1591,12 +1587,13 @@ class ArchiveRecreater:
 
				 
			
 
				     def create_target(self, archive, target_name=None):
			
 
				         """Create target archive."""
			
 
				-        if self.dry_run:
			
 
				-            return self.FakeTargetArchive(), None
			
 
				         target_name = target_name or archive.name + '.recreate'
			
 
				         target = self.create_target_archive(target_name)
			
 
				         # If the archives use the same chunker params, then don't rechunkify
			
 
				-        target.recreate_rechunkify = tuple(archive.metadata.get('chunker_params', [])) != self.chunker_params
			
 
				+        source_chunker_params = tuple(archive.metadata.get('chunker_params', []))
			
 
				+        target.recreate_rechunkify = self.rechunkify and source_chunker_params != target.chunker_params
			
 
				+        if target.recreate_rechunkify:
			
 
				+            logger.debug('Rechunking archive from %s to %s', source_chunker_params or '(unknown)', target.chunker_params)
			
 
				         return target
			
 
				 
			
 
				     def create_target_archive(self, name):
			
--- a/src/borg/archiver.py
+++ b/src/borg/archiver.py
@@ -1101,11 +1101,11 @@ class Archiver:
 
				                 if recreater.is_temporary_archive(name):
			
 
				                     continue
			
 
				                 print('Processing', name)
			
 
				-                if not recreater.recreate(name, args.comment):
			
 
				-                    break
			
 
				-        manifest.write()
			
 
				-        repository.commit()
			
 
				-        cache.commit()
			
 
				+                recreater.recreate(name, args.comment)
			
 
				+        if not args.dry_run:
			
 
				+            manifest.write()
			
 
				+            repository.commit()
			
 
				+            cache.commit()
			
 
				         return self.exit_code
			
 
				 
			
 
				     @with_repository(manifest=False, exclusive=True)
			
@@ -2356,6 +2356,8 @@ class Archiver:
 
				         recreate_epilog = textwrap.dedent("""
			
 
				         Recreate the contents of existing archives.
			
 
				 
			
 
				+        This is an *experimental* feature. Do *not* use this on your only backup.
			
 
				+
			
 
				         --exclude, --exclude-from and PATH have the exact same semantics
			
 
				         as in "borg create". If PATHs are specified the resulting archive
			
 
				         will only contain files from these PATHs.
			
@@ -2372,15 +2374,6 @@ class Archiver:
 
				         used to have upgraded Borg 0.xx or Attic archives deduplicate with
			
 
				         Borg 1.x archives.
			
 
				 
			
 
				-        borg recreate is signal safe. Send either SIGINT (Ctrl-C on most terminals) or
			
 
				-        SIGTERM to request termination.
			
 
				-
			
 
				-        Use the *exact same* command line to resume the operation later - changing excludes
			
 
				-        or paths will lead to inconsistencies (changed excludes will only apply to newly
			
 
				-        processed files/dirs). Changing compression leads to incorrect size information
			
 
				-        (which does not cause any data loss, but can be misleading).
			
 
				-        Changing chunker params between invocations might lead to data loss.
			
 
				-
			
 
				         USE WITH CAUTION.
			
 
				         Depending on the PATHs and patterns given, recreate can be used to permanently
			
 
				         delete files from archives.
			
@@ -2395,8 +2388,8 @@ class Archiver:
 
				 
			
 
				         When rechunking space usage can be substantial, expect at least the entire
			
 
				         deduplicated size of the archives using the previous chunker params.
			
 
				-        When recompressing approximately 1 % of the repository size or 512 MB
			
 
				-        (whichever is greater) of additional space is used.
			
 
				+        When recompressing expect approx. (throughput / checkpoint-interval) in space usage,
			
 
				+        assuming all chunks are recompressed.
			
 
				         """)
			
 
				         subparser = subparsers.add_parser('recreate', parents=[common_parser], add_help=False,
			
 
				                                           description=self.do_recreate.__doc__,
			
--- a/src/borg/testsuite/archiver.py
+++ b/src/borg/testsuite/archiver.py
@@ -1823,6 +1823,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
 
				         self.cmd('recreate', self.repository_location, '--chunker-params', 'default')
			
 
				         self.check_cache()
			
 
				         # test1 and test2 do deduplicate after recreate
			
 
				+        assert int(self.cmd('list', self.repository_location + '::test1', 'input/large_file', '--format={size}'))
			
 
				         assert not int(self.cmd('list', self.repository_location + '::test1', 'input/large_file',
			
 
				                                 '--format', '{unique_chunks}'))