2 年之前 · 9b7647c89d
--- a/docs/faq.rst
+++ b/docs/faq.rst
@@ -132,10 +132,10 @@ During a backup, a special checkpoint archive named ``<archive-name>.checkpoint`
 
				 is saved at every checkpoint interval (the default value for this is 30
			
 
				 minutes) containing all the data backed-up until that point.
			
 
				 
			
 
				-This checkpoint archive is a valid archive,
			
 
				-but it is only a partial backup (not all files that you wanted to back up are
			
 
				-contained in it). Having it in the repo until a successful, full backup is
			
 
				-completed is useful because it references all the transmitted chunks up
			
 
				+This checkpoint archive is a valid archive, but it is only a partial backup
			
 
				+(not all files that you wanted to back up are contained in it and the last file
			
 
				+in it might be a partial file). Having it in the repo until a successful, full
			
 
				+backup is completed is useful because it references all the transmitted chunks up
			
 
				 to the checkpoint. This means that in case of an interruption, you only need to
			
 
				 retransfer the data since the last checkpoint.
			
 
				 
			
@@ -154,14 +154,12 @@ Once your backup has finished successfully, you can delete all
 
				 ``<archive-name>.checkpoint`` archives. If you run ``borg prune``, it will
			
 
				 also care for deleting unneeded checkpoints.
			
 
				 
			
 
				-Note: the checkpointing mechanism creates hidden, partial files in an archive,
			
 
				-so that checkpoints even work while a big file is being processed.
			
 
				-They are named ``<filename>.borg_part_<N>`` and all operations usually ignore
			
 
				-these files, but you can make them considered by giving the option
			
 
				-``--consider-part-files``. You usually only need that option if you are
			
 
				-really desperate (e.g. if you have no completed backup of that file and you'd
			
 
				-rather get a partial file extracted than nothing). You do **not** want to give
			
 
				-that option under any normal circumstances.
			
 
				+Note: the checkpointing mechanism may create a partial (truncated) last file
			
 
				+in a checkpoint archive named ``<filename>.borg_part``. Such partial files
			
 
				+won't be contained in the final archive.
			
 
				+This is done so that checkpoints work cleanly and promptly while a big
			
 
				+file is being processed.
			
 
				+
			
 
				 
			
 
				 How can I back up huge file(s) over a unstable connection?
			
 
				 ---------------------------------------------------------
			
@@ -171,10 +169,8 @@ Yes. For more details, see :ref:`checkpoints_parts`.
 
				 How can I restore huge file(s) over an unstable connection?
			
 
				 -----------------------------------------------------------
			
 
				 
			
 
				-If you cannot manage to extract the whole big file in one go, you can extract
			
 
				-all the part files and manually concatenate them together.
			
 
				-
			
 
				-For more details, see :ref:`checkpoints_parts`.
			
 
				+Try using ``borg mount`` and ``rsync`` (or a similar tool that supports
			
 
				+resuming a partial file copy from what's already copied).
			
 
				 
			
 
				 How can I switch append-only mode on and off?
			
 
				 -----------------------------------------------------------------------------------------------------------------------------------
			
--- a/src/borg/archive.py
+++ b/src/borg/archive.py
@@ -58,7 +58,6 @@ class Statistics:
 
				         self.output_json = output_json
			
 
				         self.iec = iec
			
 
				         self.osize = self.usize = self.nfiles = 0
			
 
				-        self.osize_parts = self.usize_parts = self.nfiles_parts = 0
			
 
				         self.last_progress = 0  # timestamp when last progress was shown
			
 
				         self.files_stats = defaultdict(int)
			
 
				         self.chunking_time = 0.0
			
@@ -66,15 +65,10 @@ class Statistics:
 
				         self.rx_bytes = 0
			
 
				         self.tx_bytes = 0
			
 
				 
			
 
				-    def update(self, size, unique, part=False):
			
 
				-        if not part:
			
 
				-            self.osize += size
			
 
				-            if unique:
			
 
				-                self.usize += size
			
 
				-        else:
			
 
				-            self.osize_parts += size
			
 
				-            if unique:
			
 
				-                self.usize_parts += size
			
 
				+    def update(self, size, unique):
			
 
				+        self.osize += size
			
 
				+        if unique:
			
 
				+            self.usize += size
			
 
				 
			
 
				     def __add__(self, other):
			
 
				         if not isinstance(other, Statistics):
			
@@ -83,9 +77,6 @@ class Statistics:
 
				         stats.osize = self.osize + other.osize
			
 
				         stats.usize = self.usize + other.usize
			
 
				         stats.nfiles = self.nfiles + other.nfiles
			
 
				-        stats.osize_parts = self.osize_parts + other.osize_parts
			
 
				-        stats.usize_parts = self.usize_parts + other.usize_parts
			
 
				-        stats.nfiles_parts = self.nfiles_parts + other.nfiles_parts
			
 
				         stats.chunking_time = self.chunking_time + other.chunking_time
			
 
				         stats.hashing_time = self.hashing_time + other.hashing_time
			
 
				         for key in other.files_stats:
			
@@ -134,20 +125,13 @@ Bytes sent to remote: {stats.tx_bytes}
 
				         }
			
 
				 
			
 
				     def as_raw_dict(self):
			
 
				-        return {
			
 
				-            "size": self.osize,
			
 
				-            "nfiles": self.nfiles,
			
 
				-            "size_parts": self.osize_parts,
			
 
				-            "nfiles_parts": self.nfiles_parts,
			
 
				-        }
			
 
				+        return {"size": self.osize, "nfiles": self.nfiles}
			
 
				 
			
 
				     @classmethod
			
 
				     def from_raw_dict(cls, **kw):
			
 
				         self = cls()
			
 
				         self.osize = kw["size"]
			
 
				         self.nfiles = kw["nfiles"]
			
 
				-        self.osize_parts = kw["size_parts"]
			
 
				-        self.nfiles_parts = kw["nfiles_parts"]
			
 
				         return self
			
 
				 
			
 
				     @property
			
@@ -353,6 +337,7 @@ class ChunkBuffer:
 
				         self.chunks = []
			
 
				         self.key = key
			
 
				         self.chunker = get_chunker(*chunker_params, seed=self.key.chunk_seed, sparse=False)
			
 
				+        self.saved_chunks_len = None
			
 
				 
			
 
				     def add(self, item):
			
 
				         self.buffer.write(self.packer.pack(item.as_dict()))
			
@@ -392,6 +377,18 @@ class ChunkBuffer:
 
				     def is_full(self):
			
 
				         return self.buffer.tell() > self.BUFFER_SIZE
			
 
				 
			
 
				+    def save_chunks_state(self):
			
 
				+        # as we only append to self.chunks, remembering the current length is good enough
			
 
				+        self.saved_chunks_len = len(self.chunks)
			
 
				+
			
 
				+    def restore_chunks_state(self):
			
 
				+        scl = self.saved_chunks_len
			
 
				+        assert scl is not None, "forgot to call save_chunks_state?"
			
 
				+        tail_chunks = self.chunks[scl:]
			
 
				+        del self.chunks[scl:]
			
 
				+        self.saved_chunks_len = None
			
 
				+        return tail_chunks
			
 
				+
			
 
				 
			
 
				 class CacheChunkBuffer(ChunkBuffer):
			
 
				     def __init__(self, cache, key, stats, chunker_params=ITEMS_CHUNKER_PARAMS):
			
@@ -484,7 +481,6 @@ class Archive:
 
				         start=None,
			
 
				         start_monotonic=None,
			
 
				         end=None,
			
 
				-        consider_part_files=False,
			
 
				         log_json=False,
			
 
				         iec=False,
			
 
				     ):
			
@@ -519,7 +515,6 @@ class Archive:
 
				         if end is None:
			
 
				             end = archive_ts_now()
			
 
				         self.end = end
			
 
				-        self.consider_part_files = consider_part_files
			
 
				         self.pipeline = DownloadPipeline(self.repository, self.repo_objs)
			
 
				         self.create = create
			
 
				         if self.create:
			
@@ -629,9 +624,6 @@ Duration: {0.duration}
 
				         return "Archive(%r)" % self.name
			
 
				 
			
 
				     def item_filter(self, item, filter=None):
			
 
				-        if not self.consider_part_files and "part" in item:
			
 
				-            # this is a part(ial) file, we usually don't want to consider it.
			
 
				-            return False
			
 
				         return filter(item) if filter else True
			
 
				 
			
 
				     def iter_items(self, filter=None, preload=False):
			
@@ -649,6 +641,15 @@ Duration: {0.duration}
 
				             stats.show_progress(item=item, dt=0.2)
			
 
				         self.items_buffer.add(item)
			
 
				 
			
 
				+    def prepare_checkpoint(self):
			
 
				+        # we need to flush the archive metadata stream to repo chunks, so that
			
 
				+        # we have the metadata stream chunks WITHOUT the part file item we add later.
			
 
				+        # The part file item will then get into its own metadata stream chunk, which we
			
 
				+        # can easily NOT include into the next checkpoint or the final archive.
			
 
				+        self.items_buffer.flush(flush=True)
			
 
				+        # remember the current state of self.chunks, which corresponds to the flushed chunks
			
 
				+        self.items_buffer.save_chunks_state()
			
 
				+
			
 
				     def write_checkpoint(self):
			
 
				         metadata = self.save(self.checkpoint_name)
			
 
				         # that .save() has committed the repo.
			
@@ -660,6 +661,11 @@ Duration: {0.duration}
 
				         self.cache.chunk_decref(self.id, self.stats)
			
 
				         for id in metadata.item_ptrs:
			
 
				             self.cache.chunk_decref(id, self.stats)
			
 
				+        # also get rid of that part item, we do not want to have it in next checkpoint or final archive
			
 
				+        tail_chunks = self.items_buffer.restore_chunks_state()
			
 
				+        # tail_chunks contain the tail of the archive items metadata stream, not needed for next commit.
			
 
				+        for id in tail_chunks:
			
 
				+            self.cache.chunk_decref(id, self.stats)
			
 
				 
			
 
				     def save(self, name=None, comment=None, timestamp=None, stats=None, additional_metadata=None):
			
 
				         name = name or self.name
			
@@ -694,14 +700,7 @@ Duration: {0.duration}
 
				         # because borg info relies on them. so, either use the given stats (from args)
			
 
				         # or fall back to self.stats if it was not given.
			
 
				         stats = stats or self.stats
			
 
				-        metadata.update(
			
 
				-            {
			
 
				-                "size": stats.osize,
			
 
				-                "nfiles": stats.nfiles,
			
 
				-                "size_parts": stats.osize_parts,
			
 
				-                "nfiles_parts": stats.nfiles_parts,
			
 
				-            }
			
 
				-        )
			
 
				+        metadata.update({"size": stats.osize, "nfiles": stats.nfiles})
			
 
				         metadata.update(additional_metadata or {})
			
 
				         metadata = ArchiveItem(metadata)
			
 
				         data = self.key.pack_and_authenticate_metadata(metadata.as_dict(), context=b"archive")
			
@@ -751,12 +750,9 @@ Duration: {0.duration}
 
				             pi.finish()
			
 
				 
			
 
				         stats = Statistics(iec=self.iec)
			
 
				-        stats.usize = unique_size  # the part files use same chunks as the full file
			
 
				+        stats.usize = unique_size
			
 
				         stats.nfiles = self.metadata.nfiles
			
 
				         stats.osize = self.metadata.size
			
 
				-        if self.consider_part_files:
			
 
				-            stats.nfiles += self.metadata.nfiles_parts
			
 
				-            stats.osize += self.metadata.size_parts
			
 
				         return stats
			
 
				 
			
 
				     @contextmanager
			
@@ -1038,9 +1034,9 @@ Duration: {0.duration}
 
				                 error = True
			
 
				                 return exception_ignored  # must not return None here
			
 
				 
			
 
				-        def chunk_decref(id, stats, part=False):
			
 
				+        def chunk_decref(id, stats):
			
 
				             try:
			
 
				-                self.cache.chunk_decref(id, stats, wait=False, part=part)
			
 
				+                self.cache.chunk_decref(id, stats, wait=False)
			
 
				             except KeyError:
			
 
				                 cid = bin_to_hex(id)
			
 
				                 raise ChunksIndexError(cid)
			
@@ -1064,9 +1060,8 @@ Duration: {0.duration}
 
				                     for item in unpacker:
			
 
				                         item = Item(internal_dict=item)
			
 
				                         if "chunks" in item:
			
 
				-                            part = not self.consider_part_files and "part" in item
			
 
				                             for chunk_id, size in item.chunks:
			
 
				-                                chunk_decref(chunk_id, stats, part=part)
			
 
				+                                chunk_decref(chunk_id, stats)
			
 
				                 except (TypeError, ValueError):
			
 
				                     # if items metadata spans multiple chunks and one chunk got dropped somehow,
			
 
				                     # it could be that unpacker yields bad types
			
@@ -1234,10 +1229,22 @@ def cached_hash(chunk, id_hash):
 
				 class ChunksProcessor:
			
 
				     # Processes an iterator of chunks for an Item
			
 
				 
			
 
				-    def __init__(self, *, key, cache, add_item, write_checkpoint, checkpoint_interval, checkpoint_volume, rechunkify):
			
 
				+    def __init__(
			
 
				+        self,
			
 
				+        *,
			
 
				+        key,
			
 
				+        cache,
			
 
				+        add_item,
			
 
				+        prepare_checkpoint,
			
 
				+        write_checkpoint,
			
 
				+        checkpoint_interval,
			
 
				+        checkpoint_volume,
			
 
				+        rechunkify,
			
 
				+    ):
			
 
				         self.key = key
			
 
				         self.cache = cache
			
 
				         self.add_item = add_item
			
 
				+        self.prepare_checkpoint = prepare_checkpoint
			
 
				         self.write_checkpoint = write_checkpoint
			
 
				         self.rechunkify = rechunkify
			
 
				         # time interval based checkpointing
			
@@ -1248,38 +1255,34 @@ class ChunksProcessor:
 
				         self.current_volume = 0
			
 
				         self.last_volume_checkpoint = 0
			
 
				 
			
 
				-    def write_part_file(self, item, from_chunk, number):
			
 
				+    def write_part_file(self, item):
			
 
				+        self.prepare_checkpoint()
			
 
				         item = Item(internal_dict=item.as_dict())
			
 
				-        length = len(item.chunks)
			
 
				-        # the item should only have the *additional* chunks we processed after the last partial item:
			
 
				-        item.chunks = item.chunks[from_chunk:]
			
 
				         # for borg recreate, we already have a size member in the source item (giving the total file size),
			
 
				         # but we consider only a part of the file here, thus we must recompute the size from the chunks:
			
 
				         item.get_size(memorize=True, from_chunks=True)
			
 
				-        item.path += ".borg_part_%d" % number
			
 
				-        item.part = number
			
 
				-        number += 1
			
 
				+        item.path += ".borg_part"
			
 
				         self.add_item(item, show_progress=False)
			
 
				         self.write_checkpoint()
			
 
				-        return length, number
			
 
				 
			
 
				-    def maybe_checkpoint(self, item, from_chunk, part_number, forced=False):
			
 
				+    def maybe_checkpoint(self, item):
			
 
				+        checkpoint_done = False
			
 
				         sig_int_triggered = sig_int and sig_int.action_triggered()
			
 
				         if (
			
 
				-            forced
			
 
				-            or sig_int_triggered
			
 
				+            sig_int_triggered
			
 
				             or (self.checkpoint_interval and time.monotonic() - self.last_checkpoint > self.checkpoint_interval)
			
 
				             or (self.checkpoint_volume and self.current_volume - self.last_volume_checkpoint >= self.checkpoint_volume)
			
 
				         ):
			
 
				             if sig_int_triggered:
			
 
				                 logger.info("checkpoint requested: starting checkpoint creation...")
			
 
				-            from_chunk, part_number = self.write_part_file(item, from_chunk, part_number)
			
 
				+            self.write_part_file(item)
			
 
				+            checkpoint_done = True
			
 
				             self.last_checkpoint = time.monotonic()
			
 
				             self.last_volume_checkpoint = self.current_volume
			
 
				             if sig_int_triggered:
			
 
				                 sig_int.action_completed()
			
 
				                 logger.info("checkpoint requested: finished checkpoint creation!")
			
 
				-        return from_chunk, part_number
			
 
				+        return checkpoint_done  # whether a checkpoint archive was created
			
 
				 
			
 
				     def process_file_chunks(self, item, cache, stats, show_progress, chunk_iter, chunk_processor=None):
			
 
				         if not chunk_processor:
			
@@ -1297,28 +1300,13 @@ class ChunksProcessor:
 
				         # to get rid of .chunks_healthy, as it might not correspond to .chunks any more.
			
 
				         if self.rechunkify and "chunks_healthy" in item:
			
 
				             del item.chunks_healthy
			
 
				-        from_chunk = 0
			
 
				-        part_number = 1
			
 
				         for chunk in chunk_iter:
			
 
				             cle = chunk_processor(chunk)
			
 
				             item.chunks.append(cle)
			
 
				             self.current_volume += cle[1]
			
 
				             if show_progress:
			
 
				                 stats.show_progress(item=item, dt=0.2)
			
 
				-            from_chunk, part_number = self.maybe_checkpoint(item, from_chunk, part_number, forced=False)
			
 
				-        else:
			
 
				-            if part_number > 1:
			
 
				-                if item.chunks[from_chunk:]:
			
 
				-                    # if we already have created a part item inside this file, we want to put the final
			
 
				-                    # chunks (if any) into a part item also (so all parts can be concatenated to get
			
 
				-                    # the complete file):
			
 
				-                    from_chunk, part_number = self.maybe_checkpoint(item, from_chunk, part_number, forced=True)
			
 
				-
			
 
				-                # if we created part files, we have referenced all chunks from the part files,
			
 
				-                # but we also will reference the same chunks also from the final, complete file:
			
 
				-                for chunk in item.chunks:
			
 
				-                    cache.chunk_incref(chunk.id, stats, size=chunk.size, part=True)
			
 
				-                stats.nfiles_parts += part_number - 1
			
 
				+            self.maybe_checkpoint(item)
			
 
				 
			
 
				 
			
 
				 class FilesystemObjectProcessors:
			
@@ -2474,6 +2462,7 @@ class ArchiveRecreater:
 
				             cache=self.cache,
			
 
				             key=self.key,
			
 
				             add_item=target.add_item,
			
 
				+            prepare_checkpoint=target.prepare_checkpoint,
			
 
				             write_checkpoint=target.write_checkpoint,
			
 
				             checkpoint_interval=self.checkpoint_interval,
			
 
				             checkpoint_volume=self.checkpoint_volume,
			
--- a/src/borg/archiver/_common.py
+++ b/src/borg/archiver/_common.py
@@ -149,7 +149,6 @@ def with_repository(
 
				                         progress=getattr(args, "progress", False),
			
 
				                         lock_wait=self.lock_wait,
			
 
				                         cache_mode=getattr(args, "files_cache_mode", FILES_CACHE_MODE_DISABLED),
			
 
				-                        consider_part_files=getattr(args, "consider_part_files", False),
			
 
				                         iec=getattr(args, "iec", False),
			
 
				                     ) as cache_:
			
 
				                         return method(self, args, repository=repository, cache=cache_, **kwargs)
			
@@ -214,7 +213,6 @@ def with_other_repository(manifest=False, cache=False, compatibility=None):
 
				                         progress=False,
			
 
				                         lock_wait=self.lock_wait,
			
 
				                         cache_mode=getattr(args, "files_cache_mode", FILES_CACHE_MODE_DISABLED),
			
 
				-                        consider_part_files=getattr(args, "consider_part_files", False),
			
 
				                         iec=getattr(args, "iec", False),
			
 
				                     ) as cache_:
			
 
				                         kwargs["other_cache"] = cache_
			
@@ -240,7 +238,6 @@ def with_archive(method):
 
				             noacls=getattr(args, "noacls", False),
			
 
				             noxattrs=getattr(args, "noxattrs", False),
			
 
				             cache=kwargs.get("cache"),
			
 
				-            consider_part_files=args.consider_part_files,
			
 
				             log_json=args.log_json,
			
 
				             iec=args.iec,
			
 
				         )
			
@@ -542,12 +539,6 @@ def define_common_options(add_common_option):
 
				         type=int,
			
 
				         help="set network upload buffer size in MiB. (default: 0=no buffer)",
			
 
				     )
			
 
				-    add_common_option(
			
 
				-        "--consider-part-files",
			
 
				-        dest="consider_part_files",
			
 
				-        action="store_true",
			
 
				-        help="treat part files like normal files (e.g. to list/extract them)",
			
 
				-    )
			
 
				     add_common_option(
			
 
				         "--debug-profile",
			
 
				         metavar="FILE",
			
--- a/src/borg/archiver/create_cmd.py
+++ b/src/borg/archiver/create_cmd.py
@@ -255,6 +255,7 @@ class CreateMixIn:
 
				                     cache=cache,
			
 
				                     key=key,
			
 
				                     add_item=archive.add_item,
			
 
				+                    prepare_checkpoint=archive.prepare_checkpoint,
			
 
				                     write_checkpoint=archive.write_checkpoint,
			
 
				                     checkpoint_interval=args.checkpoint_interval,
			
 
				                     checkpoint_volume=args.checkpoint_volume,
			
--- a/src/borg/archiver/debug_cmd.py
+++ b/src/borg/archiver/debug_cmd.py
@@ -33,7 +33,7 @@ class DebugMixIn:
 
				     def do_debug_dump_archive_items(self, args, repository, manifest):
			
 
				         """dump (decrypted, decompressed) archive items metadata (not: data)"""
			
 
				         repo_objs = manifest.repo_objs
			
 
				-        archive = Archive(manifest, args.name, consider_part_files=args.consider_part_files)
			
 
				+        archive = Archive(manifest, args.name)
			
 
				         for i, item_id in enumerate(archive.metadata.items):
			
 
				             _, data = repo_objs.parse(item_id, repository.get(item_id))
			
 
				             filename = "%06d_%s.items" % (i, bin_to_hex(item_id))
			
--- a/src/borg/archiver/delete_cmd.py
+++ b/src/borg/archiver/delete_cmd.py
@@ -79,9 +79,7 @@ class DeleteMixIn:
 
				                         logger_list.info(msg_delete.format(format_archive(archive_info), i, len(archive_names)))
			
 
				 
			
 
				                     if not dry_run:
			
 
				-                        archive = Archive(
			
 
				-                            manifest, archive_name, cache=cache, consider_part_files=args.consider_part_files
			
 
				-                        )
			
 
				+                        archive = Archive(manifest, archive_name, cache=cache)
			
 
				                         archive.delete(stats, progress=args.progress, forced=args.forced)
			
 
				                         checkpointed = self.maybe_checkpoint(
			
 
				                             checkpoint_func=checkpoint_func, checkpoint_interval=args.checkpoint_interval
			
--- a/src/borg/archiver/diff_cmd.py
+++ b/src/borg/archiver/diff_cmd.py
@@ -27,7 +27,7 @@ class DiffMixIn:
 
				         print_output = print_json_output if args.json_lines else print_text_output
			
 
				 
			
 
				         archive1 = archive
			
 
				-        archive2 = Archive(manifest, args.other_name, consider_part_files=args.consider_part_files)
			
 
				+        archive2 = Archive(manifest, args.other_name)
			
 
				 
			
 
				         can_compare_chunk_ids = (
			
 
				             archive1.metadata.get("chunker_params", False) == archive2.metadata.get("chunker_params", True)
			
--- a/src/borg/archiver/info_cmd.py
+++ b/src/borg/archiver/info_cmd.py
@@ -24,9 +24,7 @@ class InfoMixIn:
 
				         output_data = []
			
 
				 
			
 
				         for i, archive_name in enumerate(archive_names, 1):
			
 
				-            archive = Archive(
			
 
				-                manifest, archive_name, cache=cache, consider_part_files=args.consider_part_files, iec=args.iec
			
 
				-            )
			
 
				+            archive = Archive(manifest, archive_name, cache=cache, iec=args.iec)
			
 
				             info = archive.info()
			
 
				             if args.json:
			
 
				                 output_data.append(info)
			
--- a/src/borg/archiver/list_cmd.py
+++ b/src/borg/archiver/list_cmd.py
@@ -27,7 +27,7 @@ class ListMixIn:
 
				             format = "{mode} {user:6} {group:6} {size:8} {mtime} {path}{extra}{NL}"
			
 
				 
			
 
				         def _list_inner(cache):
			
 
				-            archive = Archive(manifest, args.name, cache=cache, consider_part_files=args.consider_part_files)
			
 
				+            archive = Archive(manifest, args.name, cache=cache)
			
 
				 
			
 
				             formatter = ItemFormatter(archive, format, json_lines=args.json_lines)
			
 
				             for item in archive.iter_items(lambda item: matcher.match(item.path)):
			
--- a/src/borg/archiver/prune_cmd.py
+++ b/src/borg/archiver/prune_cmd.py
@@ -142,7 +142,7 @@ class PruneMixIn:
 
				                     else:
			
 
				                         archives_deleted += 1
			
 
				                         log_message = "Pruning archive (%d/%d):" % (archives_deleted, to_delete_len)
			
 
				-                        archive = Archive(manifest, archive.name, cache, consider_part_files=args.consider_part_files)
			
 
				+                        archive = Archive(manifest, archive.name, cache)
			
 
				                         archive.delete(stats, forced=args.forced)
			
 
				                         checkpointed = self.maybe_checkpoint(
			
 
				                             checkpoint_func=checkpoint_func, checkpoint_interval=args.checkpoint_interval
			
--- a/src/borg/archiver/tar_cmds.py
+++ b/src/borg/archiver/tar_cmds.py
@@ -271,6 +271,7 @@ class TarMixIn:
 
				             cache=cache,
			
 
				             key=key,
			
 
				             add_item=archive.add_item,
			
 
				+            prepare_checkpoint=archive.prepare_checkpoint,
			
 
				             write_checkpoint=archive.write_checkpoint,
			
 
				             checkpoint_interval=args.checkpoint_interval,
			
 
				             checkpoint_volume=args.checkpoint_volume,
			
--- a/src/borg/archiver/transfer_cmd.py
+++ b/src/borg/archiver/transfer_cmd.py
@@ -89,6 +89,14 @@ class TransferMixIn:
 
				                 archive = Archive(manifest, name, cache=cache, create=True) if not dry_run else None
			
 
				                 upgrader.new_archive(archive=archive)
			
 
				                 for item in other_archive.iter_items():
			
 
				+                    is_part = bool(item.get("part", False))
			
 
				+                    if is_part:
			
 
				+                        # borg 1.x created part files while checkpointing (in addition to the full
			
 
				+                        # file in the final archive), like <filename>.borg_part_<part> with item.part >= 1.
			
 
				+                        # borg2 archives do not have such special part items anymore.
			
 
				+                        # so let's remove them from old archives also, considering there is no
			
 
				+                        # code any more that deals with them in special ways (e.g. to get stats right).
			
 
				+                        continue
			
 
				                     if "chunks" in item:
			
 
				                         chunks = []
			
 
				                         for chunk_id, size in item.chunks:
			
--- a/src/borg/cache.py
+++ b/src/borg/cache.py
@@ -404,7 +404,6 @@ class Cache:
 
				         lock_wait=None,
			
 
				         permit_adhoc_cache=False,
			
 
				         cache_mode=FILES_CACHE_MODE_DISABLED,
			
 
				-        consider_part_files=False,
			
 
				         iec=False,
			
 
				     ):
			
 
				         def local():
			
@@ -417,11 +416,10 @@ class Cache:
 
				                 iec=iec,
			
 
				                 lock_wait=lock_wait,
			
 
				                 cache_mode=cache_mode,
			
 
				-                consider_part_files=consider_part_files,
			
 
				             )
			
 
				 
			
 
				         def adhoc():
			
 
				-            return AdHocCache(manifest=manifest, lock_wait=lock_wait, iec=iec, consider_part_files=consider_part_files)
			
 
				+            return AdHocCache(manifest=manifest, lock_wait=lock_wait, iec=iec)
			
 
				 
			
 
				         if not permit_adhoc_cache:
			
 
				             return local()
			
@@ -464,14 +462,11 @@ Total chunks: {0.total_chunks}
 
				 
			
 
				         # XXX: this should really be moved down to `hashindex.pyx`
			
 
				         total_size, unique_size, total_unique_chunks, total_chunks = self.chunks.summarize()
			
 
				-        # the above values have the problem that they do not consider part files,
			
 
				-        # thus the total_size might be too high (chunks referenced
			
 
				-        # by the part files AND by the complete file).
			
 
				         # since borg 1.2 we have new archive metadata telling the total size per archive,
			
 
				         # so we can just sum up all archives to get the "all archives" stats:
			
 
				         total_size = 0
			
 
				         for archive_name in self.manifest.archives:
			
 
				-            archive = Archive(self.manifest, archive_name, consider_part_files=self.consider_part_files)
			
 
				+            archive = Archive(self.manifest, archive_name)
			
 
				             stats = archive.calc_stats(self, want_unique=False)
			
 
				             total_size += stats.osize
			
 
				         stats = self.Summary(total_size, unique_size, total_unique_chunks, total_chunks)._asdict()
			
@@ -498,7 +493,6 @@ class LocalCache(CacheStatsMixin):
 
				         progress=False,
			
 
				         lock_wait=None,
			
 
				         cache_mode=FILES_CACHE_MODE_DISABLED,
			
 
				-        consider_part_files=False,
			
 
				         iec=False,
			
 
				     ):
			
 
				         """
			
@@ -515,7 +509,6 @@ class LocalCache(CacheStatsMixin):
 
				         self.repo_objs = manifest.repo_objs
			
 
				         self.progress = progress
			
 
				         self.cache_mode = cache_mode
			
 
				-        self.consider_part_files = consider_part_files
			
 
				         self.timestamp = None
			
 
				         self.txn_active = False
			
 
				 
			
@@ -971,23 +964,23 @@ class LocalCache(CacheStatsMixin):
 
				             )
			
 
				         return refcount
			
 
				 
			
 
				-    def chunk_incref(self, id, stats, size=None, part=False):
			
 
				+    def chunk_incref(self, id, stats, size=None):
			
 
				         if not self.txn_active:
			
 
				             self.begin_txn()
			
 
				         count, _size = self.chunks.incref(id)
			
 
				-        stats.update(_size, False, part=part)
			
 
				+        stats.update(_size, False)
			
 
				         return ChunkListEntry(id, _size)
			
 
				 
			
 
				-    def chunk_decref(self, id, stats, wait=True, part=False):
			
 
				+    def chunk_decref(self, id, stats, wait=True):
			
 
				         if not self.txn_active:
			
 
				             self.begin_txn()
			
 
				         count, size = self.chunks.decref(id)
			
 
				         if count == 0:
			
 
				             del self.chunks[id]
			
 
				             self.repository.delete(id, wait=wait)
			
 
				-            stats.update(-size, True, part=part)
			
 
				+            stats.update(-size, True)
			
 
				         else:
			
 
				-            stats.update(-size, False, part=part)
			
 
				+            stats.update(-size, False)
			
 
				 
			
 
				     def file_known_and_unchanged(self, hashed_path, path_hash, st):
			
 
				         """
			
@@ -1084,14 +1077,13 @@ All archives:                unknown              unknown              unknown
 
				                        Unique chunks         Total chunks
			
 
				 Chunk index:    {0.total_unique_chunks:20d}             unknown"""
			
 
				 
			
 
				-    def __init__(self, manifest, warn_if_unencrypted=True, lock_wait=None, consider_part_files=False, iec=False):
			
 
				+    def __init__(self, manifest, warn_if_unencrypted=True, lock_wait=None, iec=False):
			
 
				         CacheStatsMixin.__init__(self, iec=iec)
			
 
				         assert isinstance(manifest, Manifest)
			
 
				         self.manifest = manifest
			
 
				         self.repository = manifest.repository
			
 
				         self.key = manifest.key
			
 
				         self.repo_objs = manifest.repo_objs
			
 
				-        self.consider_part_files = consider_part_files
			
 
				         self._txn_active = False
			
 
				 
			
 
				         self.security_manager = SecurityManager(self.repository)
			
@@ -1145,7 +1137,7 @@ Chunk index:    {0.total_unique_chunks:20d}             unknown"""
 
				             self.chunks[id] = entry._replace(size=size)
			
 
				         return entry.refcount
			
 
				 
			
 
				-    def chunk_incref(self, id, stats, size=None, part=False):
			
 
				+    def chunk_incref(self, id, stats, size=None):
			
 
				         if not self._txn_active:
			
 
				             self.begin_txn()
			
 
				         count, _size = self.chunks.incref(id)
			
@@ -1153,19 +1145,19 @@ Chunk index:    {0.total_unique_chunks:20d}             unknown"""
 
				         # size or add_chunk); we can't add references to those (size=0 is invalid) and generally don't try to.
			
 
				         size = _size or size
			
 
				         assert size
			
 
				-        stats.update(size, False, part=part)
			
 
				+        stats.update(size, False)
			
 
				         return ChunkListEntry(id, size)
			
 
				 
			
 
				-    def chunk_decref(self, id, stats, wait=True, part=False):
			
 
				+    def chunk_decref(self, id, stats, wait=True):
			
 
				         if not self._txn_active:
			
 
				             self.begin_txn()
			
 
				         count, size = self.chunks.decref(id)
			
 
				         if count == 0:
			
 
				             del self.chunks[id]
			
 
				             self.repository.delete(id, wait=wait)
			
 
				-            stats.update(-size, True, part=part)
			
 
				+            stats.update(-size, True)
			
 
				         else:
			
 
				-            stats.update(-size, False, part=part)
			
 
				+            stats.update(-size, False)
			
 
				 
			
 
				     def commit(self):
			
 
				         if not self._txn_active:
			
--- a/src/borg/cache_sync/cache_sync.c
+++ b/src/borg/cache_sync/cache_sync.c
@@ -38,8 +38,6 @@ cache_sync_init(HashIndex *chunks)
 
				     unpack_init(&ctx->ctx);
			
 
				     /* needs to be set only once */
			
 
				     ctx->ctx.user.chunks = chunks;
			
 
				-    ctx->ctx.user.parts.size = 0;
			
 
				-    ctx->ctx.user.parts.num_files = 0;
			
 
				     ctx->ctx.user.totals.size = 0;
			
 
				     ctx->ctx.user.totals.num_files = 0;
			
 
				     ctx->buf = NULL;
			
@@ -71,24 +69,12 @@ cache_sync_num_files_totals(const CacheSyncCtx *ctx)
 
				     return ctx->ctx.user.totals.num_files;
			
 
				 }
			
 
				 
			
 
				-static uint64_t
			
 
				-cache_sync_num_files_parts(const CacheSyncCtx *ctx)
			
 
				-{
			
 
				-    return ctx->ctx.user.parts.num_files;
			
 
				-}
			
 
				-
			
 
				 static uint64_t
			
 
				 cache_sync_size_totals(const CacheSyncCtx *ctx)
			
 
				 {
			
 
				     return ctx->ctx.user.totals.size;
			
 
				 }
			
 
				 
			
 
				-static uint64_t
			
 
				-cache_sync_size_parts(const CacheSyncCtx *ctx)
			
 
				-{
			
 
				-    return ctx->ctx.user.parts.size;
			
 
				-}
			
 
				-
			
 
				 /**
			
 
				  * feed data to the cache synchronizer
			
 
				  * 0 = abort, 1 = continue
			
--- a/src/borg/cache_sync/unpack.h
+++ b/src/borg/cache_sync/unpack.h
@@ -40,7 +40,7 @@
 
				 #endif
			
 
				 
			
 
				 typedef struct unpack_user {
			
 
				-    /* Item.chunks and Item.part are at the top level; we don't care about anything else,
			
 
				+    /* Item.chunks is at the top level; we don't care about anything else,
			
 
				      * only need to track the current level to navigate arbitrary and unknown structure.
			
 
				      * To discern keys from everything else on the top level we use expect_map_item_end.
			
 
				      */
			
@@ -58,15 +58,12 @@ typedef struct unpack_user {
 
				      */
			
 
				     int inside_chunks;
			
 
				 
			
 
				-    /* is this item a .part file (created for checkpointing inside files)? */
			
 
				-    int part;
			
 
				-
			
 
				     /* does this item have a chunks list in it? */
			
 
				     int has_chunks;
			
 
				 
			
 
				     enum {
			
 
				         /* the next thing is a map key at the Item root level,
			
 
				-         * and it might be the "chunks" or "part" key we're looking for */
			
 
				+         * and it might be e.g. the "chunks" key we're looking for */
			
 
				         expect_map_key,
			
 
				 
			
 
				         /* blocking state to expect_map_key
			
@@ -114,11 +111,6 @@ typedef struct unpack_user {
 
				         uint64_t size, num_files;
			
 
				     } totals;
			
 
				 
			
 
				-    /* total sizes and files count coming from part files */
			
 
				-    struct {
			
 
				-        uint64_t size, num_files;
			
 
				-    } parts;
			
 
				-
			
 
				 } unpack_user;
			
 
				 
			
 
				 struct unpack_context;
			
@@ -317,7 +309,6 @@ static inline int unpack_callback_map(unpack_user* u, unsigned int n)
 
				         }
			
 
				         /* This begins a new Item */
			
 
				         u->expect = expect_map_key;
			
 
				-        u->part = 0;
			
 
				         u->has_chunks = 0;
			
 
				         u->item.size = 0;
			
 
				     }
			
@@ -358,10 +349,6 @@ static inline int unpack_callback_map_end(unpack_user* u)
 
				     if(u->level == 0) {
			
 
				         /* This ends processing of an Item */
			
 
				         if(u->has_chunks) {
			
 
				-            if(u->part) {
			
 
				-                u->parts.num_files += 1;
			
 
				-                u->parts.size += u->item.size;
			
 
				-            }
			
 
				             u->totals.num_files += 1;
			
 
				             u->totals.size += u->item.size;
			
 
				         }
			
@@ -381,9 +368,6 @@ static inline int unpack_callback_raw(unpack_user* u, const char* b, const char*
 
				             u->expect = expect_chunks_begin;
			
 
				             u->inside_chunks = 1;
			
 
				             u->has_chunks = 1;
			
 
				-        } else if(length == 4 && !memcmp("part", p, 4)) {
			
 
				-            u->expect = expect_map_item_end;
			
 
				-            u->part = 1;
			
 
				         } else {
			
 
				             u->expect = expect_map_item_end;
			
 
				         }
			
--- a/src/borg/constants.py
+++ b/src/borg/constants.py
@@ -18,7 +18,9 @@ ARCHIVE_KEYS = frozenset(['version', 'name', 'hostname', 'username', 'time', 'ti
 
				                           'command_line', 'recreate_command_line',  # v2+ archives
			
 
				                           'cmdline', 'recreate_cmdline',  # legacy
			
 
				                           'recreate_source_id', 'recreate_args', 'recreate_partial_chunks',  # used in 1.1.0b1 .. b2
			
 
				-                          'size', 'nfiles', 'size_parts', 'nfiles_parts'])
			
 
				+                          'size', 'nfiles',
			
 
				+                          'size_parts', 'nfiles_parts',  # legacy v1 archives
			
 
				+                          ])
			
 
				 # fmt: on
			
 
				 
			
 
				 # this is the set of keys that are always present in archives:
			
--- a/src/borg/fuse.py
+++ b/src/borg/fuse.py
@@ -147,7 +147,7 @@ class ItemCache:
 
				         else:
			
 
				             raise ValueError("Invalid entry type in self.meta")
			
 
				 
			
 
				-    def iter_archive_items(self, archive_item_ids, filter=None, consider_part_files=False):
			
 
				+    def iter_archive_items(self, archive_item_ids, filter=None):
			
 
				         unpacker = msgpack.Unpacker()
			
 
				 
			
 
				         # Current offset in the metadata stream, which consists of all metadata chunks glued together
			
@@ -193,7 +193,7 @@ class ItemCache:
 
				                     break
			
 
				 
			
 
				                 item = Item(internal_dict=item)
			
 
				-                if filter and not filter(item) or not consider_part_files and "part" in item:
			
 
				+                if filter and not filter(item):
			
 
				                     msgpacked_bytes = b""
			
 
				                     continue
			
 
				 
			
@@ -330,15 +330,13 @@ class FuseBackend:
 
				         """Build FUSE inode hierarchy from archive metadata"""
			
 
				         self.file_versions = {}  # for versions mode: original path -> version
			
 
				         t0 = time.perf_counter()
			
 
				-        archive = Archive(self._manifest, archive_name, consider_part_files=self._args.consider_part_files)
			
 
				+        archive = Archive(self._manifest, archive_name)
			
 
				         strip_components = self._args.strip_components
			
 
				         matcher = build_matcher(self._args.patterns, self._args.paths)
			
 
				         hlm = HardLinkManager(id_type=bytes, info_type=str)  # hlid -> path
			
 
				 
			
 
				         filter = build_filter(matcher, strip_components)
			
 
				-        for item_inode, item in self.cache.iter_archive_items(
			
 
				-            archive.metadata.items, filter=filter, consider_part_files=self._args.consider_part_files
			
 
				-        ):
			
 
				+        for item_inode, item in self.cache.iter_archive_items(archive.metadata.items, filter=filter):
			
 
				             if strip_components:
			
 
				                 item.path = os.sep.join(item.path.split(os.sep)[strip_components:])
			
 
				             path = os.fsencode(item.path)
			
--- a/src/borg/hashindex.pyi
+++ b/src/borg/hashindex.pyi
@@ -79,11 +79,7 @@ class FuseVersionsIndex(IndexBase):
 
				     def __setitem__(self, key: bytes, value: Any) -> None: ...
			
 
				 
			
 
				 class CacheSynchronizer:
			
 
				-    csize_parts: int
			
 
				-    csize_totals: int
			
 
				-    num_files_parts: int
			
 
				-    num_files_totals: int
			
 
				-    size_parts: int
			
 
				     size_totals: int
			
 
				+    num_files_totals: int
			
 
				     def __init__(self, chunks_index: Any) -> None: ...
			
 
				     def feed(self, chunk: bytes) -> None: ...
			
--- a/src/borg/hashindex.pyx
+++ b/src/borg/hashindex.pyx
@@ -41,9 +41,7 @@ cdef extern from "cache_sync/cache_sync.c":
 
				     CacheSyncCtx *cache_sync_init(HashIndex *chunks)
			
 
				     const char *cache_sync_error(const CacheSyncCtx *ctx)
			
 
				     uint64_t cache_sync_num_files_totals(const CacheSyncCtx *ctx)
			
 
				-    uint64_t cache_sync_num_files_parts(const CacheSyncCtx *ctx)
			
 
				     uint64_t cache_sync_size_totals(const CacheSyncCtx *ctx)
			
 
				-    uint64_t cache_sync_size_parts(const CacheSyncCtx *ctx)
			
 
				     int cache_sync_feed(CacheSyncCtx *ctx, void *data, uint32_t length)
			
 
				     void cache_sync_free(CacheSyncCtx *ctx)
			
 
				 
			
@@ -630,14 +628,6 @@ cdef class CacheSynchronizer:
 
				     def num_files_totals(self):
			
 
				         return cache_sync_num_files_totals(self.sync)
			
 
				 
			
 
				-    @property
			
 
				-    def num_files_parts(self):
			
 
				-        return cache_sync_num_files_parts(self.sync)
			
 
				-
			
 
				     @property
			
 
				     def size_totals(self):
			
 
				         return cache_sync_size_totals(self.sync)
			
 
				-
			
 
				-    @property
			
 
				-    def size_parts(self):
			
 
				-        return cache_sync_size_parts(self.sync)
			
--- a/src/borg/item.pyi
+++ b/src/borg/item.pyi
@@ -91,10 +91,6 @@ class ArchiveItem(PropDict):
 
				     @csize.setter
			
 
				     def csize(self, val: int) -> None: ...
			
 
				     @property
			
 
				-    def csize_parts(self) -> int: ...
			
 
				-    @csize_parts.setter
			
 
				-    def csize_parts(self, val: int) -> None: ...
			
 
				-    @property
			
 
				     def items(self) -> List: ...
			
 
				     @items.setter
			
 
				     def items(self, val: List) -> None: ...
			
--- a/src/borg/item.pyx
+++ b/src/borg/item.pyx
@@ -297,7 +297,7 @@ cdef class Item(PropDict):
 
				     deleted = PropDictProperty(bool)
			
 
				     nlink = PropDictProperty(int)
			
 
				 
			
 
				-    part = PropDictProperty(int)
			
 
				+    part = PropDictProperty(int)  # legacy only
			
 
				 
			
 
				     def get_size(self, *, memorize=False, from_chunks=False, consider_ids=None):
			
 
				         """
			
@@ -516,8 +516,8 @@ cdef class ArchiveItem(PropDict):
 
				     recreate_partial_chunks = PropDictProperty(list)  # list of tuples
			
 
				     size = PropDictProperty(int)
			
 
				     nfiles = PropDictProperty(int)
			
 
				-    size_parts = PropDictProperty(int)
			
 
				-    nfiles_parts = PropDictProperty(int)
			
 
				+    size_parts = PropDictProperty(int)  # legacy only
			
 
				+    nfiles_parts = PropDictProperty(int)  # legacy only
			
 
				 
			
 
				     def update_internal(self, d):
			
 
				         # legacy support for migration (data from old msgpacks comes in as bytes always, but sometimes we want str)
			
--- a/src/borg/testsuite/archiver/create_cmd.py
+++ b/src/borg/testsuite/archiver/create_cmd.py
@@ -182,27 +182,9 @@ class ArchiverTestCase(ArchiverTestCaseBase):
 
				         )
			
 
				         # repo looking good overall? checks for rc == 0.
			
 
				         self.cmd(f"--repo={self.repository_location}", "check", "--debug")
			
 
				-        # verify part files
			
 
				-        out = self.cmd(
			
 
				-            f"--repo={self.repository_location}",
			
 
				-            "extract",
			
 
				-            "test",
			
 
				-            "stdin.borg_part_1",
			
 
				-            "--consider-part-files",
			
 
				-            "--stdout",
			
 
				-            binary_output=True,
			
 
				-        )
			
 
				-        assert out == input_data[:chunk_size]
			
 
				-        out = self.cmd(
			
 
				-            f"--repo={self.repository_location}",
			
 
				-            "extract",
			
 
				-            "test",
			
 
				-            "stdin.borg_part_2",
			
 
				-            "--consider-part-files",
			
 
				-            "--stdout",
			
 
				-            binary_output=True,
			
 
				-        )
			
 
				-        assert out == input_data[: chunk_size - 1]
			
 
				+        # verify that there are no part files in final archive
			
 
				+        out = self.cmd(f"--repo={self.repository_location}", "list", "test")
			
 
				+        assert "stdin.borg_part" not in out
			
 
				         # verify full file
			
 
				         out = self.cmd(f"--repo={self.repository_location}", "extract", "test", "stdin", "--stdout", binary_output=True)
			
 
				         assert out == input_data
			
--- a/src/borg/upgrade.py
+++ b/src/borg/upgrade.py
@@ -74,7 +74,6 @@ class UpgraderFrom12To20:
 
				             "acl_access",
			
 
				             "acl_default",
			
 
				             "acl_extended",
			
 
				-            "part",
			
 
				         }
			
 
				 
			
 
				         if self.hlm.borg1_hardlink_master(item):