فهرست منبع

cache: renamed .chunk_incref -> .reuse_chunk, boolean .seen_chunk

reuse_chunk is the complement of add_chunk for already existing chunks.

It doesn't do refcounting anymore.

.seen_chunk does not return the refcount anymore, but just whether the chunk exists.

If we add a new chunk, it immediately sets its refcount to MAX_VALUE, so
there is no difference anymore between previously existing chunks and new
chunks added. This makes the stats even more useless, but we have less complexity.
Thomas Waldmann 9 ماه پیش
والد
کامیت
ccc84c7a4e
5فایلهای تغییر یافته به همراه23 افزوده شده و 27 حذف شده
  1. 4 4
      src/borg/archive.py
  2. 2 2
      src/borg/archiver/transfer_cmd.py
  3. 12 15
      src/borg/cache.py
  4. 4 5
      src/borg/testsuite/cache.py
  5. 1 1
      src/borg/upgrade.py

+ 4 - 4
src/borg/archive.py

@@ -1338,7 +1338,7 @@ class FilesystemObjectProcessors:
                     item.chunks = []
                     item.chunks = []
                     for chunk_id, chunk_size in hl_chunks:
                     for chunk_id, chunk_size in hl_chunks:
                         # process one-by-one, so we will know in item.chunks how far we got
                         # process one-by-one, so we will know in item.chunks how far we got
-                        chunk_entry = cache.chunk_incref(chunk_id, chunk_size, self.stats)
+                        chunk_entry = cache.reuse_chunk(chunk_id, chunk_size, self.stats)
                         item.chunks.append(chunk_entry)
                         item.chunks.append(chunk_entry)
                 else:  # normal case, no "2nd+" hardlink
                 else:  # normal case, no "2nd+" hardlink
                     if not is_special_file:
                     if not is_special_file:
@@ -1364,7 +1364,7 @@ class FilesystemObjectProcessors:
                             item.chunks = []
                             item.chunks = []
                             for chunk in chunks:
                             for chunk in chunks:
                                 # process one-by-one, so we will know in item.chunks how far we got
                                 # process one-by-one, so we will know in item.chunks how far we got
-                                cache.chunk_incref(chunk.id, chunk.size, self.stats)
+                                cache.reuse_chunk(chunk.id, chunk.size, self.stats)
                                 item.chunks.append(chunk)
                                 item.chunks.append(chunk)
                             status = "U"  # regular file, unchanged
                             status = "U"  # regular file, unchanged
                     else:
                     else:
@@ -2169,7 +2169,7 @@ class ArchiveRecreater:
     def process_chunks(self, archive, target, item):
     def process_chunks(self, archive, target, item):
         if not target.recreate_rechunkify:
         if not target.recreate_rechunkify:
             for chunk_id, size in item.chunks:
             for chunk_id, size in item.chunks:
-                self.cache.chunk_incref(chunk_id, size, target.stats)
+                self.cache.reuse_chunk(chunk_id, size, target.stats)
             return item.chunks
             return item.chunks
         chunk_iterator = self.iter_chunks(archive, target, list(item.chunks))
         chunk_iterator = self.iter_chunks(archive, target, list(item.chunks))
         chunk_processor = partial(self.chunk_processor, target)
         chunk_processor = partial(self.chunk_processor, target)
@@ -2179,7 +2179,7 @@ class ArchiveRecreater:
         chunk_id, data = cached_hash(chunk, self.key.id_hash)
         chunk_id, data = cached_hash(chunk, self.key.id_hash)
         size = len(data)
         size = len(data)
         if chunk_id in self.seen_chunks:
         if chunk_id in self.seen_chunks:
-            return self.cache.chunk_incref(chunk_id, size, target.stats)
+            return self.cache.reuse_chunk(chunk_id, size, target.stats)
         chunk_entry = self.cache.add_chunk(chunk_id, {}, data, stats=target.stats, wait=False, ro_type=ROBJ_FILE_STREAM)
         chunk_entry = self.cache.add_chunk(chunk_id, {}, data, stats=target.stats, wait=False, ro_type=ROBJ_FILE_STREAM)
         self.cache.repository.async_response(wait=False)
         self.cache.repository.async_response(wait=False)
         self.seen_chunks.add(chunk_entry.id)
         self.seen_chunks.add(chunk_entry.id)

+ 2 - 2
src/borg/archiver/transfer_cmd.py

@@ -100,7 +100,7 @@ class TransferMixIn:
                     if "chunks" in item:
                     if "chunks" in item:
                         chunks = []
                         chunks = []
                         for chunk_id, size in item.chunks:
                         for chunk_id, size in item.chunks:
-                            chunk_present = cache.seen_chunk(chunk_id, size) != 0
+                            chunk_present = cache.seen_chunk(chunk_id, size)
                             if not chunk_present:  # target repo does not yet have this chunk
                             if not chunk_present:  # target repo does not yet have this chunk
                                 if not dry_run:
                                 if not dry_run:
                                     cdata = other_repository.get(chunk_id)
                                     cdata = other_repository.get(chunk_id)
@@ -147,7 +147,7 @@ class TransferMixIn:
                                 transfer_size += size
                                 transfer_size += size
                             else:
                             else:
                                 if not dry_run:
                                 if not dry_run:
-                                    chunk_entry = cache.chunk_incref(chunk_id, size, archive.stats)
+                                    chunk_entry = cache.reuse_chunk(chunk_id, size, archive.stats)
                                     chunks.append(chunk_entry)
                                     chunks.append(chunk_entry)
                                 present_size += size
                                 present_size += size
                         if not dry_run:
                         if not dry_run:

+ 12 - 15
src/borg/cache.py

@@ -579,12 +579,6 @@ class ChunksMixin:
             self._chunks = self._load_chunks_from_repo()
             self._chunks = self._load_chunks_from_repo()
         return self._chunks
         return self._chunks
 
 
-    def chunk_incref(self, id, size, stats):
-        assert isinstance(size, int) and size > 0
-        count, _size = self.chunks.incref(id)
-        stats.update(size, False)
-        return ChunkListEntry(id, size)
-
     def seen_chunk(self, id, size=None):
     def seen_chunk(self, id, size=None):
         entry = self.chunks.get(id, ChunkIndexEntry(0, None))
         entry = self.chunks.get(id, ChunkIndexEntry(0, None))
         if entry.refcount and size is not None:
         if entry.refcount and size is not None:
@@ -593,7 +587,12 @@ class ChunksMixin:
                 # AdHocWithFilesCache / AdHocCache:
                 # AdHocWithFilesCache / AdHocCache:
                 # Here *size* is used to update the chunk's size information, which will be zero for existing chunks.
                 # Here *size* is used to update the chunk's size information, which will be zero for existing chunks.
                 self.chunks[id] = entry._replace(size=size)
                 self.chunks[id] = entry._replace(size=size)
-        return entry.refcount
+        return entry.refcount != 0
+
+    def reuse_chunk(self, id, size, stats):
+        assert isinstance(size, int) and size > 0
+        stats.update(size, False)
+        return ChunkListEntry(id, size)
 
 
     def add_chunk(
     def add_chunk(
         self,
         self,
@@ -615,15 +614,15 @@ class ChunksMixin:
                 size = len(data)  # data is still uncompressed
                 size = len(data)  # data is still uncompressed
             else:
             else:
                 raise ValueError("when giving compressed data for a chunk, the uncompressed size must be given also")
                 raise ValueError("when giving compressed data for a chunk, the uncompressed size must be given also")
-        refcount = self.seen_chunk(id, size)
-        if refcount:
-            return self.chunk_incref(id, size, stats)
+        exists = self.seen_chunk(id, size)
+        if exists:
+            return self.reuse_chunk(id, size, stats)
         cdata = self.repo_objs.format(
         cdata = self.repo_objs.format(
             id, meta, data, compress=compress, size=size, ctype=ctype, clevel=clevel, ro_type=ro_type
             id, meta, data, compress=compress, size=size, ctype=ctype, clevel=clevel, ro_type=ro_type
         )
         )
         self.repository.put(id, cdata, wait=wait)
         self.repository.put(id, cdata, wait=wait)
-        self.chunks.add(id, 1, size)
-        stats.update(size, not refcount)
+        self.chunks.add(id, ChunkIndex.MAX_VALUE, size)
+        stats.update(size, not exists)
         return ChunkListEntry(id, size)
         return ChunkListEntry(id, size)
 
 
     def _load_chunks_from_repo(self):
     def _load_chunks_from_repo(self):
@@ -639,9 +638,7 @@ class ChunksMixin:
             if not result:
             if not result:
                 break
                 break
             marker = result[-1][0]
             marker = result[-1][0]
-            # All chunks from the repository have a refcount of MAX_VALUE, which is sticky,
-            # therefore we can't/won't delete them. Chunks we added ourselves in this borg run
-            # are tracked correctly.
+            # All chunks have a refcount of MAX_VALUE, which is sticky, therefore we can't/won't delete them.
             init_entry = ChunkIndexEntry(refcount=ChunkIndex.MAX_VALUE, size=0)  # plaintext size
             init_entry = ChunkIndexEntry(refcount=ChunkIndex.MAX_VALUE, size=0)  # plaintext size
             for id, stored_size in result:
             for id, stored_size in result:
                 num_chunks += 1
                 num_chunks += 1

+ 4 - 5
src/borg/testsuite/cache.py

@@ -45,11 +45,10 @@ class TestAdHocCache:
         assert cache.cache_mode == "d"
         assert cache.cache_mode == "d"
         assert cache.files is None
         assert cache.files is None
 
 
-    def test_incref_after_add_chunk(self, cache):
+    def test_reuse_after_add_chunk(self, cache):
         assert cache.add_chunk(H(3), {}, b"5678", stats=Statistics()) == (H(3), 4)
         assert cache.add_chunk(H(3), {}, b"5678", stats=Statistics()) == (H(3), 4)
-        assert cache.chunk_incref(H(3), 4, Statistics()) == (H(3), 4)
+        assert cache.reuse_chunk(H(3), 4, Statistics()) == (H(3), 4)
 
 
-    def test_existing_incref_after_add_chunk(self, cache):
-        """This case occurs with part files, see Archive.chunk_file."""
+    def test_existing_reuse_after_add_chunk(self, cache):
         assert cache.add_chunk(H(1), {}, b"5678", stats=Statistics()) == (H(1), 4)
         assert cache.add_chunk(H(1), {}, b"5678", stats=Statistics()) == (H(1), 4)
-        assert cache.chunk_incref(H(1), 4, Statistics()) == (H(1), 4)
+        assert cache.reuse_chunk(H(1), 4, Statistics()) == (H(1), 4)

+ 1 - 1
src/borg/upgrade.py

@@ -85,7 +85,7 @@ class UpgraderFrom12To20:
             if chunks is not None:
             if chunks is not None:
                 item.chunks = chunks
                 item.chunks = chunks
                 for chunk_id, chunk_size in chunks:
                 for chunk_id, chunk_size in chunks:
-                    self.cache.chunk_incref(chunk_id, chunk_size, self.archive.stats)
+                    self.cache.reuse_chunk(chunk_id, chunk_size, self.archive.stats)
             if chunks_healthy is not None:
             if chunks_healthy is not None:
                 item.chunks_healthy = chunks
                 item.chunks_healthy = chunks
             del item.source  # not used for hardlinks any more, replaced by hlid
             del item.source  # not used for hardlinks any more, replaced by hlid