Kaynağa Gözat

recreate: improve tests, docs, check chunk availability[1]

[1] So if e.g. delete/prune/check are run these can delete the
    recreate_partial_chunks. This is now caught and handled correctly.
Marian Beermann 9 yıl önce
ebeveyn
işleme
0bc0fa22b4
4 değiştirilmiş dosya ile 47 ekleme ve 42 silme
  1. 15 2
      borg/archive.py
  2. 9 11
      borg/archiver.py
  3. 16 21
      borg/testsuite/archiver.py
  4. 7 8
      docs/usage/recreate.rst.inc

+ 15 - 2
borg/archive.py

@@ -20,8 +20,9 @@ from .compress import Compressor, COMPR_BUFFER
 from .helpers import Error, uid2user, user2uid, gid2group, group2gid, \
 from .helpers import Error, uid2user, user2uid, gid2group, group2gid, \
     parse_timestamp, to_localtime, format_time, format_timedelta, \
     parse_timestamp, to_localtime, format_time, format_timedelta, \
     Manifest, Statistics, decode_dict, make_path_safe, StableDict, int_to_bigint, bigint_to_int, \
     Manifest, Statistics, decode_dict, make_path_safe, StableDict, int_to_bigint, bigint_to_int, \
-    ProgressIndicatorPercent, ChunkIteratorFileWrapper, remove_surrogates, log_multi, DASHES, PatternMatcher, \
+    ProgressIndicatorPercent, ChunkIteratorFileWrapper, remove_surrogates, log_multi, DASHES, \
     PathPrefixPattern, FnmatchPattern, open_item, file_status, format_file_size, consume
     PathPrefixPattern, FnmatchPattern, open_item, file_status, format_file_size, consume
+from .repository import Repository
 from .platform import acl_get, acl_set
 from .platform import acl_get, acl_set
 from .chunker import Chunker
 from .chunker import Chunker
 from .hashindex import ChunkIndex
 from .hashindex import ChunkIndex
@@ -1221,7 +1222,19 @@ class ArchiveRecreater:
         if self.progress:
         if self.progress:
             old_target.stats.show_progress(final=True)
             old_target.stats.show_progress(final=True)
         target.recreate_partial_chunks = old_target.metadata.get(b'recreate_partial_chunks', [])
         target.recreate_partial_chunks = old_target.metadata.get(b'recreate_partial_chunks', [])
-        for chunk_id, _, _ in target.recreate_partial_chunks:
+        for chunk_id, size, csize in target.recreate_partial_chunks:
+            if not self.cache.seen_chunk(chunk_id):
+                try:
+                    # Repository has __contains__, RemoteRepository doesn't
+                    self.repository.get(chunk_id)
+                except Repository.ObjectNotFound:
+                    # delete/prune/check between invocations: these chunks are gone.
+                    target.recreate_partial_chunks = None
+                    break
+                # fast-lane insert into chunks cache
+                self.cache.chunks[chunk_id] = (1, size, csize)
+                target.stats.update(size, csize, True)
+                continue
             # incref now, otherwise old_target.delete() might delete these chunks
             # incref now, otherwise old_target.delete() might delete these chunks
             self.cache.chunk_incref(chunk_id, target.stats)
             self.cache.chunk_incref(chunk_id, target.stats)
         old_target.delete(Statistics(), progress=self.progress)
         old_target.delete(Statistics(), progress=self.progress)

+ 9 - 11
borg/archiver.py

@@ -816,9 +816,9 @@ class Archiver:
         """Re-create archives"""
         """Re-create archives"""
         def interrupt(signal_num, stack_frame):
         def interrupt(signal_num, stack_frame):
             if recreater.interrupt:
             if recreater.interrupt:
-                print("Received signal, again. I'm not deaf.\n", file=sys.stderr)
+                print("\nReceived signal, again. I'm not deaf.", file=sys.stderr)
             else:
             else:
-                print("Received signal, will exit cleanly.\n", file=sys.stderr)
+                print("\nReceived signal, will exit cleanly.", file=sys.stderr)
             recreater.interrupt = True
             recreater.interrupt = True
 
 
         matcher, include_patterns = self.build_matcher(args.excludes, args.paths)
         matcher, include_patterns = self.build_matcher(args.excludes, args.paths)
@@ -1664,12 +1664,12 @@ class Archiver:
         Recreate the contents of existing archives.
         Recreate the contents of existing archives.
 
 
         --exclude, --exclude-from and PATH have the exact same semantics
         --exclude, --exclude-from and PATH have the exact same semantics
-        as in "borg create". If a PATH is specified the resulting archive
-        will only contain files under PATH.
+        as in "borg create". If PATHs are specified the resulting archive
+        will only contain files from these PATHs.
 
 
         --compression: all chunks seen will be stored using the given method.
         --compression: all chunks seen will be stored using the given method.
         Due to how Borg stores compressed size information this might display
         Due to how Borg stores compressed size information this might display
-        incorrect information for archives that were not rewritten at the same time.
+        incorrect information for archives that were not recreated at the same time.
         There is no risk of data loss by this.
         There is no risk of data loss by this.
 
 
         --chunker-params will re-chunk all files in the archive, this can be
         --chunker-params will re-chunk all files in the archive, this can be
@@ -1686,7 +1686,8 @@ class Archiver:
         Changing chunker params between invocations might lead to data loss.
         Changing chunker params between invocations might lead to data loss.
 
 
         USE WITH CAUTION.
         USE WITH CAUTION.
-        Permanent data loss by specifying incorrect patterns or PATHS is possible.
+        Depending on the PATHs and patterns given, recreate can be used to permanently
+        delete files from archives.
         When in doubt, use "--dry-run --verbose --list" to see how patterns/PATHS are
         When in doubt, use "--dry-run --verbose --list" to see how patterns/PATHS are
         interpreted.
         interpreted.
 
 
@@ -1695,7 +1696,7 @@ class Archiver:
         "<ARCHIVE>.recreate". The new archive will have a different archive ID.
         "<ARCHIVE>.recreate". The new archive will have a different archive ID.
 
 
         When rechunking space usage can be substantial, expect at least the entire
         When rechunking space usage can be substantial, expect at least the entire
-        deduplicated size of the archives using the older chunker params.
+        deduplicated size of the archives using the previous chunker params.
         When recompressing approximately 1 % of the repository size or 512 MB
         When recompressing approximately 1 % of the repository size or 512 MB
         (whichever is greater) of additional space is used.
         (whichever is greater) of additional space is used.
         """)
         """)
@@ -1712,7 +1713,7 @@ class Archiver:
                                help='only display items with the given status characters')
                                help='only display items with the given status characters')
         subparser.add_argument('-p', '--progress', dest='progress',
         subparser.add_argument('-p', '--progress', dest='progress',
                                action='store_true', default=False,
                                action='store_true', default=False,
-                               help='show progress display while rewriting archives')
+                               help='show progress display while recreating archives')
         subparser.add_argument('-n', '--dry-run', dest='dry_run',
         subparser.add_argument('-n', '--dry-run', dest='dry_run',
                                action='store_true', default=False,
                                action='store_true', default=False,
                                help='do not change anything')
                                help='do not change anything')
@@ -1746,9 +1747,6 @@ class Archiver:
                                    metavar='yyyy-mm-ddThh:mm:ss',
                                    metavar='yyyy-mm-ddThh:mm:ss',
                                    help='manually specify the archive creation date/time (UTC). '
                                    help='manually specify the archive creation date/time (UTC). '
                                         'alternatively, give a reference file/directory.')
                                         'alternatively, give a reference file/directory.')
-        archive_group.add_argument('-c', '--checkpoint-interval', dest='checkpoint_interval',
-                                   type=int, default=300, metavar='SECONDS',
-                                   help='write checkpoint every SECONDS seconds (Default: 300)')
         archive_group.add_argument('-C', '--compression', dest='compression',
         archive_group.add_argument('-C', '--compression', dest='compression',
                                    type=CompressionSpec, default=None, metavar='COMPRESSION',
                                    type=CompressionSpec, default=None, metavar='COMPRESSION',
                                    help='select compression algorithm (and level):\n'
                                    help='select compression algorithm (and level):\n'

+ 16 - 21
borg/testsuite/archiver.py

@@ -926,13 +926,6 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         self.assert_in("U input/file1", output)
         self.assert_in("U input/file1", output)
         self.assert_in("x input/file2", output)
         self.assert_in("x input/file2", output)
 
 
-    def test_create_delete_inbetween(self):
-        self.create_test_files()
-        self.cmd('init', self.repository_location)
-        self.cmd('create', self.repository_location + '::test1', 'input')
-        self.cmd('delete', self.repository_location + '::test1')
-        self.cmd('create', self.repository_location + '::test2', 'input')
-
     def test_create_topical(self):
     def test_create_topical(self):
         now = time.time()
         now = time.time()
         self.create_regular_file('file1', size=1024 * 80)
         self.create_regular_file('file1', size=1024 * 80)
@@ -1231,8 +1224,8 @@ class ArchiverTestCase(ArchiverTestCaseBase):
 
 
     def test_recreate_rechunkify(self):
     def test_recreate_rechunkify(self):
         with open(os.path.join(self.input_path, 'large_file'), 'wb') as fd:
         with open(os.path.join(self.input_path, 'large_file'), 'wb') as fd:
-            fd.write(b'a' * 250)
-            fd.write(b'b' * 250)
+            fd.write(b'a' * 280)
+            fd.write(b'b' * 280)
         self.cmd('init', self.repository_location)
         self.cmd('init', self.repository_location)
         self.cmd('create', '--chunker-params', '7,9,8,128', self.repository_location + '::test1', 'input')
         self.cmd('create', '--chunker-params', '7,9,8,128', self.repository_location + '::test1', 'input')
         self.cmd('create', self.repository_location + '::test2', 'input', '--no-files-cache')
         self.cmd('create', self.repository_location + '::test2', 'input', '--no-files-cache')
@@ -1249,16 +1242,17 @@ class ArchiverTestCase(ArchiverTestCaseBase):
     def test_recreate_recompress(self):
     def test_recreate_recompress(self):
         self.create_regular_file('compressible', size=10000)
         self.create_regular_file('compressible', size=10000)
         self.cmd('init', self.repository_location)
         self.cmd('init', self.repository_location)
-        self.cmd('create', self.repository_location + '::test', 'input')
-        list = self.cmd('list', self.repository_location + '::test', 'input/compressible',
-                        '--format', '{size} {csize}')
-        size, csize = map(int, list.split(' '))
-        assert csize >= size
+        self.cmd('create', self.repository_location + '::test', 'input', '-C', 'none')
+        file_list = self.cmd('list', self.repository_location + '::test', 'input/compressible',
+                             '--format', '{size} {csize} {sha256}')
+        size, csize, sha256_before = file_list.split(' ')
+        assert int(csize) >= int(size)  # >= due to metadata overhead
         self.cmd('recreate', self.repository_location, '-C', 'lz4')
         self.cmd('recreate', self.repository_location, '-C', 'lz4')
-        list = self.cmd('list', self.repository_location + '::test', 'input/compressible',
-                        '--format', '{size} {csize}')
-        size, csize = map(int, list.split(' '))
-        assert csize < size
+        file_list = self.cmd('list', self.repository_location + '::test', 'input/compressible',
+                             '--format', '{size} {csize} {sha256}')
+        size, csize, sha256_after = file_list.split(' ')
+        assert int(csize) < int(size)
+        assert sha256_before == sha256_after
 
 
     def test_recreate_dry_run(self):
     def test_recreate_dry_run(self):
         self.create_regular_file('compressible', size=10000)
         self.create_regular_file('compressible', size=10000)
@@ -1327,7 +1321,8 @@ class ArchiverTestCase(ArchiverTestCaseBase):
             frame = inspect.stack()[2]
             frame = inspect.stack()[2]
             try:
             try:
                 caller_self = frame[0].f_locals['self']
                 caller_self = frame[0].f_locals['self']
-                caller_self.interrupt = True
+                if isinstance(caller_self, ArchiveRecreater):
+                    caller_self.interrupt = True
             finally:
             finally:
                 del frame
                 del frame
             return real_add_chunk(*args, **kwargs)
             return real_add_chunk(*args, **kwargs)
@@ -1339,9 +1334,9 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         self.cmd('create', self.repository_location + '::test', 'input')
         self.cmd('create', self.repository_location + '::test', 'input')
         archive_before = self.cmd('list', self.repository_location + '::test', '--format', '{sha512}')
         archive_before = self.cmd('list', self.repository_location + '::test', '--format', '{sha512}')
         with patch.object(Cache, 'add_chunk', self._test_recreate_chunker_interrupt_patch()):
         with patch.object(Cache, 'add_chunk', self._test_recreate_chunker_interrupt_patch()):
-            self.cmd('recreate', '-p', '--chunker-params', '16,18,17,4095', self.repository_location)
+            self.cmd('recreate', '-pv', '--chunker-params', '10,12,11,4095', self.repository_location)
         assert 'test.recreate' in self.cmd('list', self.repository_location)
         assert 'test.recreate' in self.cmd('list', self.repository_location)
-        output = self.cmd('recreate', '-svp', '--debug', '--chunker-params', '16,18,17,4095', self.repository_location)
+        output = self.cmd('recreate', '-svp', '--debug', '--chunker-params', '10,12,11,4095', self.repository_location)
         assert 'Found test.recreate, will resume' in output
         assert 'Found test.recreate, will resume' in output
         assert 'Copied 1 chunks from a partially processed item' in output
         assert 'Copied 1 chunks from a partially processed item' in output
         archive_after = self.cmd('list', self.repository_location + '::test', '--format', '{sha512}')
         archive_after = self.cmd('list', self.repository_location + '::test', '--format', '{sha512}')

+ 7 - 8
docs/usage/recreate.rst.inc

@@ -18,7 +18,7 @@ optional arguments
     ``--filter STATUSCHARS``
     ``--filter STATUSCHARS``
         | only display items with the given status characters
         | only display items with the given status characters
     ``-p``, ``--progress``
     ``-p``, ``--progress``
-        | show progress display while rewriting archives
+        | show progress display while recreating archives
     ``-n``, ``--dry-run``
     ``-n``, ``--dry-run``
         | do not change anything
         | do not change anything
     ``-s``, ``--stats``
     ``-s``, ``--stats``
@@ -44,8 +44,6 @@ Archive options
         | add a comment text to the archive
         | add a comment text to the archive
     ``--timestamp yyyy-mm-ddThh:mm:ss``
     ``--timestamp yyyy-mm-ddThh:mm:ss``
         | manually specify the archive creation date/time (UTC). alternatively, give a reference file/directory.
         | manually specify the archive creation date/time (UTC). alternatively, give a reference file/directory.
-    ``-c SECONDS``, ``--checkpoint-interval SECONDS``
-        | write checkpoint every SECONDS seconds (Default: 300)
     ``-C COMPRESSION``, ``--compression COMPRESSION``
     ``-C COMPRESSION``, ``--compression COMPRESSION``
         | select compression algorithm (and level):
         | select compression algorithm (and level):
         | none == no compression (default),
         | none == no compression (default),
@@ -63,12 +61,12 @@ Description
 Recreate the contents of existing archives.
 Recreate the contents of existing archives.
 
 
 --exclude, --exclude-from and PATH have the exact same semantics
 --exclude, --exclude-from and PATH have the exact same semantics
-as in "borg create". If a PATH is specified the resulting archive
-will only contain files under PATH.
+as in "borg create". If PATHs are specified the resulting archive
+will only contain files from these PATHs.
 
 
 --compression: all chunks seen will be stored using the given method.
 --compression: all chunks seen will be stored using the given method.
 Due to how Borg stores compressed size information this might display
 Due to how Borg stores compressed size information this might display
-incorrect information for archives that were not rewritten at the same time.
+incorrect information for archives that were not recreated at the same time.
 There is no risk of data loss by this.
 There is no risk of data loss by this.
 
 
 --chunker-params will re-chunk all files in the archive, this can be
 --chunker-params will re-chunk all files in the archive, this can be
@@ -85,7 +83,8 @@ processed files/dirs). Changing compression leads to incorrect size information
 Changing chunker params between invocations might lead to data loss.
 Changing chunker params between invocations might lead to data loss.
 
 
 USE WITH CAUTION.
 USE WITH CAUTION.
-Permanent data loss by specifying incorrect patterns or PATHS is possible.
+Depending on the PATHs and patterns given, recreate can be used to permanently
+delete files from archives.
 When in doubt, use "--dry-run --verbose --list" to see how patterns/PATHS are
 When in doubt, use "--dry-run --verbose --list" to see how patterns/PATHS are
 interpreted.
 interpreted.
 
 
@@ -94,6 +93,6 @@ archive that is built during the operation exists at the same time at
 "<ARCHIVE>.recreate". The new archive will have a different archive ID.
 "<ARCHIVE>.recreate". The new archive will have a different archive ID.
 
 
 When rechunking space usage can be substantial, expect at least the entire
 When rechunking space usage can be substantial, expect at least the entire
-deduplicated size of the archives using the older chunker params.
+deduplicated size of the archives using the previous chunker params.
 When recompressing approximately 1 % of the repository size or 512 MB
 When recompressing approximately 1 % of the repository size or 512 MB
 (whichever is greater) of additional space is used.
 (whichever is greater) of additional space is used.