Просмотр исходного кода

recreate: improve tests, docs, check chunk availability[1]

[1] So if e.g. delete/prune/check are run these can delete the
    recreate_partial_chunks. This is now caught and handled correctly.
Marian Beermann 9 лет назад
Родитель
Сommit
0bc0fa22b4
4 измененных файлов с 47 добавлено и 42 удалено
  1. 15 2
      borg/archive.py
  2. 9 11
      borg/archiver.py
  3. 16 21
      borg/testsuite/archiver.py
  4. 7 8
      docs/usage/recreate.rst.inc

+ 15 - 2
borg/archive.py

@@ -20,8 +20,9 @@ from .compress import Compressor, COMPR_BUFFER
 from .helpers import Error, uid2user, user2uid, gid2group, group2gid, \
     parse_timestamp, to_localtime, format_time, format_timedelta, \
     Manifest, Statistics, decode_dict, make_path_safe, StableDict, int_to_bigint, bigint_to_int, \
-    ProgressIndicatorPercent, ChunkIteratorFileWrapper, remove_surrogates, log_multi, DASHES, PatternMatcher, \
+    ProgressIndicatorPercent, ChunkIteratorFileWrapper, remove_surrogates, log_multi, DASHES, \
     PathPrefixPattern, FnmatchPattern, open_item, file_status, format_file_size, consume
+from .repository import Repository
 from .platform import acl_get, acl_set
 from .chunker import Chunker
 from .hashindex import ChunkIndex
@@ -1221,7 +1222,19 @@ class ArchiveRecreater:
         if self.progress:
             old_target.stats.show_progress(final=True)
         target.recreate_partial_chunks = old_target.metadata.get(b'recreate_partial_chunks', [])
-        for chunk_id, _, _ in target.recreate_partial_chunks:
+        for chunk_id, size, csize in target.recreate_partial_chunks:
+            if not self.cache.seen_chunk(chunk_id):
+                try:
+                    # Repository has __contains__, RemoteRepository doesn't
+                    self.repository.get(chunk_id)
+                except Repository.ObjectNotFound:
+                    # delete/prune/check between invocations: these chunks are gone.
+                    target.recreate_partial_chunks = None
+                    break
+                # fast-lane insert into chunks cache
+                self.cache.chunks[chunk_id] = (1, size, csize)
+                target.stats.update(size, csize, True)
+                continue
             # incref now, otherwise old_target.delete() might delete these chunks
             self.cache.chunk_incref(chunk_id, target.stats)
         old_target.delete(Statistics(), progress=self.progress)

+ 9 - 11
borg/archiver.py

@@ -816,9 +816,9 @@ class Archiver:
         """Re-create archives"""
         def interrupt(signal_num, stack_frame):
             if recreater.interrupt:
-                print("Received signal, again. I'm not deaf.\n", file=sys.stderr)
+                print("\nReceived signal, again. I'm not deaf.", file=sys.stderr)
             else:
-                print("Received signal, will exit cleanly.\n", file=sys.stderr)
+                print("\nReceived signal, will exit cleanly.", file=sys.stderr)
             recreater.interrupt = True
 
         matcher, include_patterns = self.build_matcher(args.excludes, args.paths)
@@ -1664,12 +1664,12 @@ class Archiver:
         Recreate the contents of existing archives.
 
         --exclude, --exclude-from and PATH have the exact same semantics
-        as in "borg create". If a PATH is specified the resulting archive
-        will only contain files under PATH.
+        as in "borg create". If PATHs are specified the resulting archive
+        will only contain files from these PATHs.
 
         --compression: all chunks seen will be stored using the given method.
         Due to how Borg stores compressed size information this might display
-        incorrect information for archives that were not rewritten at the same time.
+        incorrect information for archives that were not recreated at the same time.
         There is no risk of data loss by this.
 
         --chunker-params will re-chunk all files in the archive, this can be
@@ -1686,7 +1686,8 @@ class Archiver:
         Changing chunker params between invocations might lead to data loss.
 
         USE WITH CAUTION.
-        Permanent data loss by specifying incorrect patterns or PATHS is possible.
+        Depending on the PATHs and patterns given, recreate can be used to permanently
+        delete files from archives.
         When in doubt, use "--dry-run --verbose --list" to see how patterns/PATHS are
         interpreted.
 
@@ -1695,7 +1696,7 @@ class Archiver:
         "<ARCHIVE>.recreate". The new archive will have a different archive ID.
 
         When rechunking space usage can be substantial, expect at least the entire
-        deduplicated size of the archives using the older chunker params.
+        deduplicated size of the archives using the previous chunker params.
         When recompressing approximately 1 % of the repository size or 512 MB
         (whichever is greater) of additional space is used.
         """)
@@ -1712,7 +1713,7 @@ class Archiver:
                                help='only display items with the given status characters')
         subparser.add_argument('-p', '--progress', dest='progress',
                                action='store_true', default=False,
-                               help='show progress display while rewriting archives')
+                               help='show progress display while recreating archives')
         subparser.add_argument('-n', '--dry-run', dest='dry_run',
                                action='store_true', default=False,
                                help='do not change anything')
@@ -1746,9 +1747,6 @@ class Archiver:
                                    metavar='yyyy-mm-ddThh:mm:ss',
                                    help='manually specify the archive creation date/time (UTC). '
                                         'alternatively, give a reference file/directory.')
-        archive_group.add_argument('-c', '--checkpoint-interval', dest='checkpoint_interval',
-                                   type=int, default=300, metavar='SECONDS',
-                                   help='write checkpoint every SECONDS seconds (Default: 300)')
         archive_group.add_argument('-C', '--compression', dest='compression',
                                    type=CompressionSpec, default=None, metavar='COMPRESSION',
                                    help='select compression algorithm (and level):\n'

+ 16 - 21
borg/testsuite/archiver.py

@@ -926,13 +926,6 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         self.assert_in("U input/file1", output)
         self.assert_in("x input/file2", output)
 
-    def test_create_delete_inbetween(self):
-        self.create_test_files()
-        self.cmd('init', self.repository_location)
-        self.cmd('create', self.repository_location + '::test1', 'input')
-        self.cmd('delete', self.repository_location + '::test1')
-        self.cmd('create', self.repository_location + '::test2', 'input')
-
     def test_create_topical(self):
         now = time.time()
         self.create_regular_file('file1', size=1024 * 80)
@@ -1231,8 +1224,8 @@ class ArchiverTestCase(ArchiverTestCaseBase):
 
     def test_recreate_rechunkify(self):
         with open(os.path.join(self.input_path, 'large_file'), 'wb') as fd:
-            fd.write(b'a' * 250)
-            fd.write(b'b' * 250)
+            fd.write(b'a' * 280)
+            fd.write(b'b' * 280)
         self.cmd('init', self.repository_location)
         self.cmd('create', '--chunker-params', '7,9,8,128', self.repository_location + '::test1', 'input')
         self.cmd('create', self.repository_location + '::test2', 'input', '--no-files-cache')
@@ -1249,16 +1242,17 @@ class ArchiverTestCase(ArchiverTestCaseBase):
     def test_recreate_recompress(self):
         self.create_regular_file('compressible', size=10000)
         self.cmd('init', self.repository_location)
-        self.cmd('create', self.repository_location + '::test', 'input')
-        list = self.cmd('list', self.repository_location + '::test', 'input/compressible',
-                        '--format', '{size} {csize}')
-        size, csize = map(int, list.split(' '))
-        assert csize >= size
+        self.cmd('create', self.repository_location + '::test', 'input', '-C', 'none')
+        file_list = self.cmd('list', self.repository_location + '::test', 'input/compressible',
+                             '--format', '{size} {csize} {sha256}')
+        size, csize, sha256_before = file_list.split(' ')
+        assert int(csize) >= int(size)  # >= due to metadata overhead
         self.cmd('recreate', self.repository_location, '-C', 'lz4')
-        list = self.cmd('list', self.repository_location + '::test', 'input/compressible',
-                        '--format', '{size} {csize}')
-        size, csize = map(int, list.split(' '))
-        assert csize < size
+        file_list = self.cmd('list', self.repository_location + '::test', 'input/compressible',
+                             '--format', '{size} {csize} {sha256}')
+        size, csize, sha256_after = file_list.split(' ')
+        assert int(csize) < int(size)
+        assert sha256_before == sha256_after
 
     def test_recreate_dry_run(self):
         self.create_regular_file('compressible', size=10000)
@@ -1327,7 +1321,8 @@ class ArchiverTestCase(ArchiverTestCaseBase):
             frame = inspect.stack()[2]
             try:
                 caller_self = frame[0].f_locals['self']
-                caller_self.interrupt = True
+                if isinstance(caller_self, ArchiveRecreater):
+                    caller_self.interrupt = True
             finally:
                 del frame
             return real_add_chunk(*args, **kwargs)
@@ -1339,9 +1334,9 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         self.cmd('create', self.repository_location + '::test', 'input')
         archive_before = self.cmd('list', self.repository_location + '::test', '--format', '{sha512}')
         with patch.object(Cache, 'add_chunk', self._test_recreate_chunker_interrupt_patch()):
-            self.cmd('recreate', '-p', '--chunker-params', '16,18,17,4095', self.repository_location)
+            self.cmd('recreate', '-pv', '--chunker-params', '10,12,11,4095', self.repository_location)
         assert 'test.recreate' in self.cmd('list', self.repository_location)
-        output = self.cmd('recreate', '-svp', '--debug', '--chunker-params', '16,18,17,4095', self.repository_location)
+        output = self.cmd('recreate', '-svp', '--debug', '--chunker-params', '10,12,11,4095', self.repository_location)
         assert 'Found test.recreate, will resume' in output
         assert 'Copied 1 chunks from a partially processed item' in output
         archive_after = self.cmd('list', self.repository_location + '::test', '--format', '{sha512}')

+ 7 - 8
docs/usage/recreate.rst.inc

@@ -18,7 +18,7 @@ optional arguments
     ``--filter STATUSCHARS``
         | only display items with the given status characters
     ``-p``, ``--progress``
-        | show progress display while rewriting archives
+        | show progress display while recreating archives
     ``-n``, ``--dry-run``
         | do not change anything
     ``-s``, ``--stats``
@@ -44,8 +44,6 @@ Archive options
         | add a comment text to the archive
     ``--timestamp yyyy-mm-ddThh:mm:ss``
         | manually specify the archive creation date/time (UTC). alternatively, give a reference file/directory.
-    ``-c SECONDS``, ``--checkpoint-interval SECONDS``
-        | write checkpoint every SECONDS seconds (Default: 300)
     ``-C COMPRESSION``, ``--compression COMPRESSION``
         | select compression algorithm (and level):
         | none == no compression (default),
@@ -63,12 +61,12 @@ Description
 Recreate the contents of existing archives.
 
 --exclude, --exclude-from and PATH have the exact same semantics
-as in "borg create". If a PATH is specified the resulting archive
-will only contain files under PATH.
+as in "borg create". If PATHs are specified the resulting archive
+will only contain files from these PATHs.
 
 --compression: all chunks seen will be stored using the given method.
 Due to how Borg stores compressed size information this might display
-incorrect information for archives that were not rewritten at the same time.
+incorrect information for archives that were not recreated at the same time.
 There is no risk of data loss by this.
 
 --chunker-params will re-chunk all files in the archive, this can be
@@ -85,7 +83,8 @@ processed files/dirs). Changing compression leads to incorrect size information
 Changing chunker params between invocations might lead to data loss.
 
 USE WITH CAUTION.
-Permanent data loss by specifying incorrect patterns or PATHS is possible.
+Depending on the PATHs and patterns given, recreate can be used to permanently
+delete files from archives.
 When in doubt, use "--dry-run --verbose --list" to see how patterns/PATHS are
 interpreted.
 
@@ -94,6 +93,6 @@ archive that is built during the operation exists at the same time at
 "<ARCHIVE>.recreate". The new archive will have a different archive ID.
 
 When rechunking space usage can be substantial, expect at least the entire
-deduplicated size of the archives using the older chunker params.
+deduplicated size of the archives using the previous chunker params.
 When recompressing approximately 1 % of the repository size or 512 MB
 (whichever is greater) of additional space is used.