Ver código fonte

Merge pull request #7082 from ThomasWaldmann/fix-chunker-params-comparison

Fix chunker params comparison
TW 2 anos atrás
pai
commit
0989cb4040
3 arquivos alterados com 34 adições e 12 exclusões
  1. 9 8
      src/borg/archive.py
  2. 13 4
      src/borg/archiver.py
  3. 12 0
      src/borg/helpers/misc.py

+ 9 - 8
src/borg/archive.py

@@ -28,7 +28,7 @@ from .crypto.low_level import IntegrityError as IntegrityErrorBase
 from .hashindex import ChunkIndex, ChunkIndexEntry, CacheSynchronizer
 from .hashindex import ChunkIndex, ChunkIndexEntry, CacheSynchronizer
 from .helpers import Manifest
 from .helpers import Manifest
 from .helpers import hardlinkable
 from .helpers import hardlinkable
-from .helpers import ChunkIteratorFileWrapper, open_item
+from .helpers import ChunkIteratorFileWrapper, normalize_chunker_params, open_item
 from .helpers import Error, IntegrityError, set_ec
 from .helpers import Error, IntegrityError, set_ec
 from .platform import uid2user, user2uid, gid2group, group2gid
 from .platform import uid2user, user2uid, gid2group, group2gid
 from .helpers import parse_timestamp, to_localtime
 from .helpers import parse_timestamp, to_localtime
@@ -547,12 +547,14 @@ class Archive:
         if self.create:
         if self.create:
             info['command_line'] = sys.argv
             info['command_line'] = sys.argv
         else:
         else:
+            cp = self.metadata.get('chunker_params')
+            cp = normalize_chunker_params(cp) if cp is not None else ''
             info.update({
             info.update({
                 'command_line': self.metadata.cmdline,
                 'command_line': self.metadata.cmdline,
                 'hostname': self.metadata.hostname,
                 'hostname': self.metadata.hostname,
                 'username': self.metadata.username,
                 'username': self.metadata.username,
                 'comment': self.metadata.get('comment', ''),
                 'comment': self.metadata.get('comment', ''),
-                'chunker_params': self.metadata.get('chunker_params', ''),
+                'chunker_params': cp,
             })
             })
         return info
         return info
 
 
@@ -2302,13 +2304,12 @@ class ArchiveRecreater:
         target_name = target_name or archive.name + '.recreate'
         target_name = target_name or archive.name + '.recreate'
         target = self.create_target_archive(target_name)
         target = self.create_target_archive(target_name)
         # If the archives use the same chunker params, then don't rechunkify
         # If the archives use the same chunker params, then don't rechunkify
-        source_chunker_params = tuple(archive.metadata.get('chunker_params', []))
-        if len(source_chunker_params) == 4 and isinstance(source_chunker_params[0], int):
-            # this is a borg < 1.2 chunker_params tuple, no chunker algo specified, but we only had buzhash:
-            source_chunker_params = (CH_BUZHASH, ) + source_chunker_params
-        target.recreate_rechunkify = self.rechunkify and source_chunker_params != target.chunker_params
+        src_cp = archive.metadata.get('chunker_params')
+        src_cp = normalize_chunker_params(src_cp) if src_cp is not None else None
+        dst_cp = target.chunker_params
+        target.recreate_rechunkify = self.rechunkify and src_cp != dst_cp
         if target.recreate_rechunkify:
         if target.recreate_rechunkify:
-            logger.debug('Rechunking archive from %s to %s', source_chunker_params or '(unknown)', target.chunker_params)
+            logger.debug('Rechunking archive from %s to %s', src_cp or '(unknown)', dst_cp)
         target.process_file_chunks = ChunksProcessor(
         target.process_file_chunks = ChunksProcessor(
             cache=self.cache, key=self.key,
             cache=self.cache, key=self.key,
             add_item=target.add_item, write_checkpoint=target.write_checkpoint,
             add_item=target.add_item, write_checkpoint=target.write_checkpoint,

+ 13 - 4
src/borg/archiver.py

@@ -67,7 +67,7 @@ try:
     from .helpers import ProgressIndicatorPercent
     from .helpers import ProgressIndicatorPercent
     from .helpers import basic_json_data, json_print
     from .helpers import basic_json_data, json_print
     from .helpers import replace_placeholders
     from .helpers import replace_placeholders
-    from .helpers import ChunkIteratorFileWrapper
+    from .helpers import ChunkIteratorFileWrapper, normalize_chunker_params
     from .helpers import popen_with_error_handling, prepare_subprocess_env, create_filter_process
     from .helpers import popen_with_error_handling, prepare_subprocess_env, create_filter_process
     from .helpers import dash_open
     from .helpers import dash_open
     from .helpers import umount
     from .helpers import umount
@@ -1125,9 +1125,18 @@ class Archiver:
         archive2 = Archive(repository, key, manifest, args.archive2,
         archive2 = Archive(repository, key, manifest, args.archive2,
                            consider_part_files=args.consider_part_files)
                            consider_part_files=args.consider_part_files)
 
 
-        can_compare_chunk_ids = archive1.metadata.get('chunker_params', False) == archive2.metadata.get(
-            'chunker_params', True) or args.same_chunker_params
-        if not can_compare_chunk_ids:
+        cp1 = archive1.metadata.get('chunker_params')
+        cp2 = archive2.metadata.get('chunker_params')
+        if args.same_chunker_params:
+            can_compare_chunk_ids = True  # enforce it
+        elif cp1 is not None and cp2 is not None:
+            # we know chunker params of both archives
+            can_compare_chunk_ids = normalize_chunker_params(cp1) == normalize_chunker_params(cp2)
+            if not can_compare_chunk_ids:
+                self.print_warning('--chunker-params are different between archives, diff will be slow.')
+        else:
+            # we do not know chunker params of at least one of the archives
+            can_compare_chunk_ids = False
             self.print_warning('--chunker-params might be different between archives, diff will be slow.\n'
             self.print_warning('--chunker-params might be different between archives, diff will be slow.\n'
                                'If you know for certain that they are the same, pass --same-chunker-params '
                                'If you know for certain that they are the same, pass --same-chunker-params '
                                'to override this check.')
                                'to override this check.')

+ 12 - 0
src/borg/helpers/misc.py

@@ -16,6 +16,7 @@ from .time import to_localtime
 from . import msgpack
 from . import msgpack
 from .. import __version__ as borg_version
 from .. import __version__ as borg_version
 from .. import chunker
 from .. import chunker
+from ..constants import CH_BUZHASH, CH_FIXED
 
 
 
 
 def prune_within(archives, hours, kept_because):
 def prune_within(archives, hours, kept_because):
@@ -119,6 +120,17 @@ def log_multi(*msgs, level=logging.INFO, logger=logger):
         logger.log(level, line)
         logger.log(level, line)
 
 
 
 
+def normalize_chunker_params(cp):
+    assert isinstance(cp, (list, tuple))
+    if isinstance(cp, list):
+        cp = tuple(cp)
+    if len(cp) == 4 and isinstance(cp[0], int):
+        # this is a borg < 1.2 chunker_params tuple, no chunker algo specified, but we only had buzhash:
+        cp = (CH_BUZHASH, ) + cp
+    assert cp[0] in (CH_BUZHASH, CH_FIXED)
+    return cp
+
+
 class ChunkIteratorFileWrapper:
 class ChunkIteratorFileWrapper:
     """File-like wrapper for chunk iterators"""
     """File-like wrapper for chunk iterators"""