浏览代码

Merge pull request #7305 from ThomasWaldmann/volume-based-checkpointing

volume based checkpointing, fix item_ptrs orphaned chunks
TW 2 年之前
父节点
当前提交
d0344cb8f8

+ 30 - 10
src/borg/archive.py

@@ -401,6 +401,7 @@ class CacheChunkBuffer(ChunkBuffer):
 
     def write_chunk(self, chunk):
         id_, _ = self.cache.add_chunk(self.key.id_hash(chunk), {}, chunk, stats=self.stats, wait=False)
+        logger.debug(f"writing item metadata stream chunk {bin_to_hex(id_)}")
         self.cache.repository.async_response(wait=False)
         return id_
 
@@ -444,6 +445,7 @@ def archive_put_items(chunk_ids, *, repo_objs, cache=None, stats=None, add_refer
     for i in range(0, len(chunk_ids), IDS_PER_CHUNK):
         data = msgpack.packb(chunk_ids[i : i + IDS_PER_CHUNK])
         id = repo_objs.id_hash(data)
+        logger.debug(f"writing item_ptrs chunk {bin_to_hex(id)}")
         if cache is not None and stats is not None:
             cache.add_chunk(id, {}, data, stats=stats)
         elif add_reference is not None:
@@ -471,7 +473,6 @@ class Archive:
         name,
         cache=None,
         create=False,
-        checkpoint_interval=1800,
         numeric_ids=False,
         noatime=False,
         noctime=False,
@@ -500,7 +501,6 @@ class Archive:
         self.name = name  # overwritten later with name from archive metadata
         self.name_in_manifest = name  # can differ from .name later (if borg check fixed duplicate archive names)
         self.comment = None
-        self.checkpoint_interval = checkpoint_interval
         self.numeric_ids = numeric_ids
         self.noatime = noatime
         self.noctime = noctime
@@ -650,9 +650,16 @@ Duration: {0.duration}
         self.items_buffer.add(item)
 
     def write_checkpoint(self):
-        self.save(self.checkpoint_name)
+        metadata = self.save(self.checkpoint_name)
+        # that .save() has committed the repo.
+        # at next commit, we won't need this checkpoint archive any more because we will then
+        # have either a newer checkpoint archive or the final archive.
+        # so we can already remove it here, the next .save() will then commit this cleanup.
+        # remove its manifest entry, remove its ArchiveItem chunk, remove its item_ptrs chunks:
         del self.manifest.archives[self.checkpoint_name]
         self.cache.chunk_decref(self.id, self.stats)
+        for id in metadata.item_ptrs:
+            self.cache.chunk_decref(id, self.stats)
 
     def save(self, name=None, comment=None, timestamp=None, stats=None, additional_metadata=None):
         name = name or self.name
@@ -714,6 +721,7 @@ Duration: {0.duration}
         self.manifest.write()
         self.repository.commit(compact=False)
         self.cache.commit()
+        return metadata
 
     def calc_stats(self, cache, want_unique=True):
         if not want_unique:
@@ -1226,14 +1234,19 @@ def cached_hash(chunk, id_hash):
 class ChunksProcessor:
     # Processes an iterator of chunks for an Item
 
-    def __init__(self, *, key, cache, add_item, write_checkpoint, checkpoint_interval, rechunkify):
+    def __init__(self, *, key, cache, add_item, write_checkpoint, checkpoint_interval, checkpoint_volume, rechunkify):
         self.key = key
         self.cache = cache
         self.add_item = add_item
         self.write_checkpoint = write_checkpoint
+        self.rechunkify = rechunkify
+        # time interval based checkpointing
         self.checkpoint_interval = checkpoint_interval
         self.last_checkpoint = time.monotonic()
-        self.rechunkify = rechunkify
+        # file content volume based checkpointing
+        self.checkpoint_volume = checkpoint_volume
+        self.current_volume = 0
+        self.last_volume_checkpoint = 0
 
     def write_part_file(self, item, from_chunk, number):
         item = Item(internal_dict=item.as_dict())
@@ -1255,13 +1268,14 @@ class ChunksProcessor:
         if (
             forced
             or sig_int_triggered
-            or self.checkpoint_interval
-            and time.monotonic() - self.last_checkpoint > self.checkpoint_interval
+            or (self.checkpoint_interval and time.monotonic() - self.last_checkpoint > self.checkpoint_interval)
+            or (self.checkpoint_volume and self.current_volume - self.last_volume_checkpoint >= self.checkpoint_volume)
         ):
             if sig_int_triggered:
                 logger.info("checkpoint requested: starting checkpoint creation...")
             from_chunk, part_number = self.write_part_file(item, from_chunk, part_number)
             self.last_checkpoint = time.monotonic()
+            self.last_volume_checkpoint = self.current_volume
             if sig_int_triggered:
                 sig_int.action_completed()
                 logger.info("checkpoint requested: finished checkpoint creation!")
@@ -1286,7 +1300,9 @@ class ChunksProcessor:
         from_chunk = 0
         part_number = 1
         for chunk in chunk_iter:
-            item.chunks.append(chunk_processor(chunk))
+            cle = chunk_processor(chunk)
+            item.chunks.append(cle)
+            self.current_volume += cle[1]
             if show_progress:
                 stats.show_progress(item=item, dt=0.2)
             from_chunk, part_number = self.maybe_checkpoint(item, from_chunk, part_number, forced=False)
@@ -2159,7 +2175,7 @@ class ArchiveChecker:
             if last and len(archive_infos) < last:
                 logger.warning("--last %d archives: only found %d archives", last, len(archive_infos))
         else:
-            archive_infos = self.manifest.archives.list(sort_by=sort_by)
+            archive_infos = self.manifest.archives.list(sort_by=sort_by, consider_checkpoints=True)
         num_archives = len(archive_infos)
 
         pi = ProgressIndicatorPercent(
@@ -2216,6 +2232,8 @@ class ArchiveChecker:
             orphaned = unused - self.possibly_superseded
             if orphaned:
                 logger.error(f"{len(orphaned)} orphaned objects found!")
+                for chunk_id in orphaned:
+                    logger.debug(f"chunk {bin_to_hex(chunk_id)} is orphaned.")
                 self.error_found = True
             if self.repair and unused:
                 logger.info(
@@ -2262,6 +2280,7 @@ class ArchiveRecreater:
         file_status_printer=None,
         timestamp=None,
         checkpoint_interval=1800,
+        checkpoint_volume=0,
     ):
         self.manifest = manifest
         self.repository = manifest.repository
@@ -2289,6 +2308,7 @@ class ArchiveRecreater:
         self.progress = progress
         self.print_file_status = file_status_printer or (lambda *args: None)
         self.checkpoint_interval = None if dry_run else checkpoint_interval
+        self.checkpoint_volume = None if dry_run else checkpoint_volume
 
     def recreate(self, archive_name, comment=None, target_name=None):
         assert not self.is_temporary_archive(archive_name)
@@ -2456,6 +2476,7 @@ class ArchiveRecreater:
             add_item=target.add_item,
             write_checkpoint=target.write_checkpoint,
             checkpoint_interval=self.checkpoint_interval,
+            checkpoint_volume=self.checkpoint_volume,
             rechunkify=target.recreate_rechunkify,
         ).process_file_chunks
         target.chunker = get_chunker(*target.chunker_params, seed=self.key.chunk_seed, sparse=False)
@@ -2469,7 +2490,6 @@ class ArchiveRecreater:
             progress=self.progress,
             chunker_params=self.chunker_params,
             cache=self.cache,
-            checkpoint_interval=self.checkpoint_interval,
         )
         return target
 

+ 9 - 1
src/borg/archiver/create_cmd.py

@@ -232,7 +232,6 @@ class CreateMixIn:
                     args.name,
                     cache=cache,
                     create=True,
-                    checkpoint_interval=args.checkpoint_interval,
                     numeric_ids=args.numeric_ids,
                     noatime=not args.atime,
                     noctime=args.noctime,
@@ -258,6 +257,7 @@ class CreateMixIn:
                     add_item=archive.add_item,
                     write_checkpoint=archive.write_checkpoint,
                     checkpoint_interval=args.checkpoint_interval,
+                    checkpoint_volume=args.checkpoint_volume,
                     rechunkify=False,
                 )
                 fso = FilesystemObjectProcessors(
@@ -845,6 +845,14 @@ class CreateMixIn:
             default=1800,
             help="write checkpoint every SECONDS seconds (Default: 1800)",
         )
+        archive_group.add_argument(
+            "--checkpoint-volume",
+            metavar="BYTES",
+            dest="checkpoint_volume",
+            type=int,
+            default=0,
+            help="write checkpoint every BYTES bytes (Default: 0, meaning no volume based checkpointing)",
+        )
         archive_group.add_argument(
             "--chunker-params",
             metavar="PARAMS",

+ 9 - 0
src/borg/archiver/recreate_cmd.py

@@ -39,6 +39,7 @@ class RecreateMixIn:
             stats=args.stats,
             file_status_printer=self.print_file_status,
             checkpoint_interval=args.checkpoint_interval,
+            checkpoint_volume=args.checkpoint_volume,
             dry_run=args.dry_run,
             timestamp=args.timestamp,
         )
@@ -160,6 +161,14 @@ class RecreateMixIn:
             metavar="SECONDS",
             help="write checkpoint every SECONDS seconds (Default: 1800)",
         )
+        archive_group.add_argument(
+            "--checkpoint-volume",
+            metavar="BYTES",
+            dest="checkpoint_volume",
+            type=int,
+            default=0,
+            help="write checkpoint every BYTES bytes (Default: 0, meaning no volume based checkpointing)",
+        )
         archive_group.add_argument(
             "--comment",
             metavar="COMMENT",

+ 9 - 1
src/borg/archiver/tar_cmds.py

@@ -261,7 +261,6 @@ class TarMixIn:
             args.name,
             cache=cache,
             create=True,
-            checkpoint_interval=args.checkpoint_interval,
             progress=args.progress,
             chunker_params=args.chunker_params,
             start=t0,
@@ -274,6 +273,7 @@ class TarMixIn:
             add_item=archive.add_item,
             write_checkpoint=archive.write_checkpoint,
             checkpoint_interval=args.checkpoint_interval,
+            checkpoint_volume=args.checkpoint_volume,
             rechunkify=False,
         )
         tfo = TarfileObjectProcessors(
@@ -515,6 +515,14 @@ class TarMixIn:
             metavar="SECONDS",
             help="write checkpoint every SECONDS seconds (Default: 1800)",
         )
+        archive_group.add_argument(
+            "--checkpoint-volume",
+            metavar="BYTES",
+            dest="checkpoint_volume",
+            type=int,
+            default=0,
+            help="write checkpoint every BYTES bytes (Default: 0, meaning no volume based checkpointing)",
+        )
         archive_group.add_argument(
             "--chunker-params",
             dest="chunker_params",

+ 40 - 0
src/borg/testsuite/archiver/create_cmd.py

@@ -167,6 +167,46 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         )
         assert extracted_data == input_data
 
+    def test_create_stdin_checkpointing(self):
+        chunk_size = 1000  # fixed chunker with this size, also volume based checkpointing after that volume
+        self.cmd(f"--repo={self.repository_location}", "rcreate", RK_ENCRYPTION)
+        input_data = b"X" * (chunk_size * 2 - 1)  # one full and one partial chunk
+        self.cmd(
+            f"--repo={self.repository_location}",
+            "create",
+            f"--chunker-params=fixed,{chunk_size}",
+            f"--checkpoint-volume={chunk_size}",
+            "test",
+            "-",
+            input=input_data,
+        )
+        # repo looking good overall? checks for rc == 0.
+        self.cmd(f"--repo={self.repository_location}", "check", "--debug")
+        # verify part files
+        out = self.cmd(
+            f"--repo={self.repository_location}",
+            "extract",
+            "test",
+            "stdin.borg_part_1",
+            "--consider-part-files",
+            "--stdout",
+            binary_output=True,
+        )
+        assert out == input_data[:chunk_size]
+        out = self.cmd(
+            f"--repo={self.repository_location}",
+            "extract",
+            "test",
+            "stdin.borg_part_2",
+            "--consider-part-files",
+            "--stdout",
+            binary_output=True,
+        )
+        assert out == input_data[: chunk_size - 1]
+        # verify full file
+        out = self.cmd(f"--repo={self.repository_location}", "extract", "test", "stdin", "--stdout", binary_output=True)
+        assert out == input_data
+
     def test_create_content_from_command(self):
         self.cmd(f"--repo={self.repository_location}", "rcreate", RK_ENCRYPTION)
         input_data = "some test content"