Переглянути джерело

Merge pull request #7082 from ThomasWaldmann/fix-chunker-params-comparison

Fix chunker params comparison
TW 2 роки тому
батько
коміт
0989cb4040
3 змінених файлів з 34 додано та 12 видалено
  1. 9 8
      src/borg/archive.py
  2. 13 4
      src/borg/archiver.py
  3. 12 0
      src/borg/helpers/misc.py

+ 9 - 8
src/borg/archive.py

@@ -28,7 +28,7 @@ from .crypto.low_level import IntegrityError as IntegrityErrorBase
 from .hashindex import ChunkIndex, ChunkIndexEntry, CacheSynchronizer
 from .helpers import Manifest
 from .helpers import hardlinkable
-from .helpers import ChunkIteratorFileWrapper, open_item
+from .helpers import ChunkIteratorFileWrapper, normalize_chunker_params, open_item
 from .helpers import Error, IntegrityError, set_ec
 from .platform import uid2user, user2uid, gid2group, group2gid
 from .helpers import parse_timestamp, to_localtime
@@ -547,12 +547,14 @@ class Archive:
         if self.create:
             info['command_line'] = sys.argv
         else:
+            cp = self.metadata.get('chunker_params')
+            cp = normalize_chunker_params(cp) if cp is not None else ''
             info.update({
                 'command_line': self.metadata.cmdline,
                 'hostname': self.metadata.hostname,
                 'username': self.metadata.username,
                 'comment': self.metadata.get('comment', ''),
-                'chunker_params': self.metadata.get('chunker_params', ''),
+                'chunker_params': cp,
             })
         return info
 
@@ -2302,13 +2304,12 @@ class ArchiveRecreater:
         target_name = target_name or archive.name + '.recreate'
         target = self.create_target_archive(target_name)
         # If the archives use the same chunker params, then don't rechunkify
-        source_chunker_params = tuple(archive.metadata.get('chunker_params', []))
-        if len(source_chunker_params) == 4 and isinstance(source_chunker_params[0], int):
-            # this is a borg < 1.2 chunker_params tuple, no chunker algo specified, but we only had buzhash:
-            source_chunker_params = (CH_BUZHASH, ) + source_chunker_params
-        target.recreate_rechunkify = self.rechunkify and source_chunker_params != target.chunker_params
+        src_cp = archive.metadata.get('chunker_params')
+        src_cp = normalize_chunker_params(src_cp) if src_cp is not None else None
+        dst_cp = target.chunker_params
+        target.recreate_rechunkify = self.rechunkify and src_cp != dst_cp
         if target.recreate_rechunkify:
-            logger.debug('Rechunking archive from %s to %s', source_chunker_params or '(unknown)', target.chunker_params)
+            logger.debug('Rechunking archive from %s to %s', src_cp or '(unknown)', dst_cp)
         target.process_file_chunks = ChunksProcessor(
             cache=self.cache, key=self.key,
             add_item=target.add_item, write_checkpoint=target.write_checkpoint,

+ 13 - 4
src/borg/archiver.py

@@ -67,7 +67,7 @@ try:
     from .helpers import ProgressIndicatorPercent
     from .helpers import basic_json_data, json_print
     from .helpers import replace_placeholders
-    from .helpers import ChunkIteratorFileWrapper
+    from .helpers import ChunkIteratorFileWrapper, normalize_chunker_params
     from .helpers import popen_with_error_handling, prepare_subprocess_env, create_filter_process
     from .helpers import dash_open
     from .helpers import umount
@@ -1125,9 +1125,18 @@ class Archiver:
         archive2 = Archive(repository, key, manifest, args.archive2,
                            consider_part_files=args.consider_part_files)
 
-        can_compare_chunk_ids = archive1.metadata.get('chunker_params', False) == archive2.metadata.get(
-            'chunker_params', True) or args.same_chunker_params
-        if not can_compare_chunk_ids:
+        cp1 = archive1.metadata.get('chunker_params')
+        cp2 = archive2.metadata.get('chunker_params')
+        if args.same_chunker_params:
+            can_compare_chunk_ids = True  # enforce it
+        elif cp1 is not None and cp2 is not None:
+            # we know chunker params of both archives
+            can_compare_chunk_ids = normalize_chunker_params(cp1) == normalize_chunker_params(cp2)
+            if not can_compare_chunk_ids:
+                self.print_warning('--chunker-params are different between archives, diff will be slow.')
+        else:
+            # we do not know chunker params of at least one of the archives
+            can_compare_chunk_ids = False
             self.print_warning('--chunker-params might be different between archives, diff will be slow.\n'
                                'If you know for certain that they are the same, pass --same-chunker-params '
                                'to override this check.')

+ 12 - 0
src/borg/helpers/misc.py

@@ -16,6 +16,7 @@ from .time import to_localtime
 from . import msgpack
 from .. import __version__ as borg_version
 from .. import chunker
+from ..constants import CH_BUZHASH, CH_FIXED
 
 
 def prune_within(archives, hours, kept_because):
@@ -119,6 +120,17 @@ def log_multi(*msgs, level=logging.INFO, logger=logger):
         logger.log(level, line)
 
 
+def normalize_chunker_params(cp):
+    assert isinstance(cp, (list, tuple))
+    if isinstance(cp, list):
+        cp = tuple(cp)
+    if len(cp) == 4 and isinstance(cp[0], int):
+        # this is a borg < 1.2 chunker_params tuple, no chunker algo specified, but we only had buzhash:
+        cp = (CH_BUZHASH, ) + cp
+    assert cp[0] in (CH_BUZHASH, CH_FIXED)
+    return cp
+
+
 class ChunkIteratorFileWrapper:
     """File-like wrapper for chunk iterators"""