Преглед на файлове

cache: renamed .chunk_incref -> .reuse_chunk, boolean .seen_chunk

reuse_chunk is the complement of add_chunk for already existing chunks.

It doesn't do refcounting anymore.

.seen_chunk does not return the refcount anymore, but just whether the chunk exists.

If we add a new chunk, it immediately sets its refcount to MAX_VALUE, so
there is no difference anymore between previously existing chunks and new
chunks added. This makes the stats even more useless, but we have less complexity.
Thomas Waldmann преди 10 месеца
родител
ревизия
ccc84c7a4e
променени са 5 файла, в които са добавени 23 реда и са изтрити 27 реда
  1. 4 4
      src/borg/archive.py
  2. 2 2
      src/borg/archiver/transfer_cmd.py
  3. 12 15
      src/borg/cache.py
  4. 4 5
      src/borg/testsuite/cache.py
  5. 1 1
      src/borg/upgrade.py

+ 4 - 4
src/borg/archive.py

@@ -1338,7 +1338,7 @@ class FilesystemObjectProcessors:
                     item.chunks = []
                     for chunk_id, chunk_size in hl_chunks:
                         # process one-by-one, so we will know in item.chunks how far we got
-                        chunk_entry = cache.chunk_incref(chunk_id, chunk_size, self.stats)
+                        chunk_entry = cache.reuse_chunk(chunk_id, chunk_size, self.stats)
                         item.chunks.append(chunk_entry)
                 else:  # normal case, no "2nd+" hardlink
                     if not is_special_file:
@@ -1364,7 +1364,7 @@ class FilesystemObjectProcessors:
                             item.chunks = []
                             for chunk in chunks:
                                 # process one-by-one, so we will know in item.chunks how far we got
-                                cache.chunk_incref(chunk.id, chunk.size, self.stats)
+                                cache.reuse_chunk(chunk.id, chunk.size, self.stats)
                                 item.chunks.append(chunk)
                             status = "U"  # regular file, unchanged
                     else:
@@ -2169,7 +2169,7 @@ class ArchiveRecreater:
     def process_chunks(self, archive, target, item):
         if not target.recreate_rechunkify:
             for chunk_id, size in item.chunks:
-                self.cache.chunk_incref(chunk_id, size, target.stats)
+                self.cache.reuse_chunk(chunk_id, size, target.stats)
             return item.chunks
         chunk_iterator = self.iter_chunks(archive, target, list(item.chunks))
         chunk_processor = partial(self.chunk_processor, target)
@@ -2179,7 +2179,7 @@ class ArchiveRecreater:
         chunk_id, data = cached_hash(chunk, self.key.id_hash)
         size = len(data)
         if chunk_id in self.seen_chunks:
-            return self.cache.chunk_incref(chunk_id, size, target.stats)
+            return self.cache.reuse_chunk(chunk_id, size, target.stats)
         chunk_entry = self.cache.add_chunk(chunk_id, {}, data, stats=target.stats, wait=False, ro_type=ROBJ_FILE_STREAM)
         self.cache.repository.async_response(wait=False)
         self.seen_chunks.add(chunk_entry.id)

+ 2 - 2
src/borg/archiver/transfer_cmd.py

@@ -100,7 +100,7 @@ class TransferMixIn:
                     if "chunks" in item:
                         chunks = []
                         for chunk_id, size in item.chunks:
-                            chunk_present = cache.seen_chunk(chunk_id, size) != 0
+                            chunk_present = cache.seen_chunk(chunk_id, size)
                             if not chunk_present:  # target repo does not yet have this chunk
                                 if not dry_run:
                                     cdata = other_repository.get(chunk_id)
@@ -147,7 +147,7 @@ class TransferMixIn:
                                 transfer_size += size
                             else:
                                 if not dry_run:
-                                    chunk_entry = cache.chunk_incref(chunk_id, size, archive.stats)
+                                    chunk_entry = cache.reuse_chunk(chunk_id, size, archive.stats)
                                     chunks.append(chunk_entry)
                                 present_size += size
                         if not dry_run:

+ 12 - 15
src/borg/cache.py

@@ -579,12 +579,6 @@ class ChunksMixin:
             self._chunks = self._load_chunks_from_repo()
         return self._chunks
 
-    def chunk_incref(self, id, size, stats):
-        assert isinstance(size, int) and size > 0
-        count, _size = self.chunks.incref(id)
-        stats.update(size, False)
-        return ChunkListEntry(id, size)
-
     def seen_chunk(self, id, size=None):
         entry = self.chunks.get(id, ChunkIndexEntry(0, None))
         if entry.refcount and size is not None:
@@ -593,7 +587,12 @@ class ChunksMixin:
                 # AdHocWithFilesCache / AdHocCache:
                 # Here *size* is used to update the chunk's size information, which will be zero for existing chunks.
                 self.chunks[id] = entry._replace(size=size)
-        return entry.refcount
+        return entry.refcount != 0
+
+    def reuse_chunk(self, id, size, stats):
+        assert isinstance(size, int) and size > 0
+        stats.update(size, False)
+        return ChunkListEntry(id, size)
 
     def add_chunk(
         self,
@@ -615,15 +614,15 @@ class ChunksMixin:
                 size = len(data)  # data is still uncompressed
             else:
                 raise ValueError("when giving compressed data for a chunk, the uncompressed size must be given also")
-        refcount = self.seen_chunk(id, size)
-        if refcount:
-            return self.chunk_incref(id, size, stats)
+        exists = self.seen_chunk(id, size)
+        if exists:
+            return self.reuse_chunk(id, size, stats)
         cdata = self.repo_objs.format(
             id, meta, data, compress=compress, size=size, ctype=ctype, clevel=clevel, ro_type=ro_type
         )
         self.repository.put(id, cdata, wait=wait)
-        self.chunks.add(id, 1, size)
-        stats.update(size, not refcount)
+        self.chunks.add(id, ChunkIndex.MAX_VALUE, size)
+        stats.update(size, not exists)
         return ChunkListEntry(id, size)
 
     def _load_chunks_from_repo(self):
@@ -639,9 +638,7 @@ class ChunksMixin:
             if not result:
                 break
             marker = result[-1][0]
-            # All chunks from the repository have a refcount of MAX_VALUE, which is sticky,
-            # therefore we can't/won't delete them. Chunks we added ourselves in this borg run
-            # are tracked correctly.
+            # All chunks have a refcount of MAX_VALUE, which is sticky, therefore we can't/won't delete them.
             init_entry = ChunkIndexEntry(refcount=ChunkIndex.MAX_VALUE, size=0)  # plaintext size
             for id, stored_size in result:
                 num_chunks += 1

+ 4 - 5
src/borg/testsuite/cache.py

@@ -45,11 +45,10 @@ class TestAdHocCache:
         assert cache.cache_mode == "d"
         assert cache.files is None
 
-    def test_incref_after_add_chunk(self, cache):
+    def test_reuse_after_add_chunk(self, cache):
         assert cache.add_chunk(H(3), {}, b"5678", stats=Statistics()) == (H(3), 4)
-        assert cache.chunk_incref(H(3), 4, Statistics()) == (H(3), 4)
+        assert cache.reuse_chunk(H(3), 4, Statistics()) == (H(3), 4)
 
-    def test_existing_incref_after_add_chunk(self, cache):
-        """This case occurs with part files, see Archive.chunk_file."""
+    def test_existing_reuse_after_add_chunk(self, cache):
         assert cache.add_chunk(H(1), {}, b"5678", stats=Statistics()) == (H(1), 4)
-        assert cache.chunk_incref(H(1), 4, Statistics()) == (H(1), 4)
+        assert cache.reuse_chunk(H(1), 4, Statistics()) == (H(1), 4)

+ 1 - 1
src/borg/upgrade.py

@@ -85,7 +85,7 @@ class UpgraderFrom12To20:
             if chunks is not None:
                 item.chunks = chunks
                 for chunk_id, chunk_size in chunks:
-                    self.cache.chunk_incref(chunk_id, chunk_size, self.archive.stats)
+                    self.cache.reuse_chunk(chunk_id, chunk_size, self.archive.stats)
             if chunks_healthy is not None:
                 item.chunks_healthy = chunks
             del item.source  # not used for hardlinks any more, replaced by hlid