浏览代码

cache: renamed .chunk_incref -> .reuse_chunk, boolean .seen_chunk

reuse_chunk is the complement of add_chunk for already existing chunks.

It doesn't do refcounting anymore.

.seen_chunk does not return the refcount anymore, but just whether the chunk exists.

If we add a new chunk, it immediately sets its refcount to MAX_VALUE, so
there is no difference anymore between previously existing chunks and new
chunks added. This makes the stats even more useless, but we have less complexity.
Thomas Waldmann 11 月之前
父节点
当前提交
ccc84c7a4e
共有 5 个文件被更改,包括 23 次插入27 次删除
  1. 4 4
      src/borg/archive.py
  2. 2 2
      src/borg/archiver/transfer_cmd.py
  3. 12 15
      src/borg/cache.py
  4. 4 5
      src/borg/testsuite/cache.py
  5. 1 1
      src/borg/upgrade.py

+ 4 - 4
src/borg/archive.py

@@ -1338,7 +1338,7 @@ class FilesystemObjectProcessors:
                     item.chunks = []
                     item.chunks = []
                     for chunk_id, chunk_size in hl_chunks:
                     for chunk_id, chunk_size in hl_chunks:
                         # process one-by-one, so we will know in item.chunks how far we got
                         # process one-by-one, so we will know in item.chunks how far we got
-                        chunk_entry = cache.chunk_incref(chunk_id, chunk_size, self.stats)
+                        chunk_entry = cache.reuse_chunk(chunk_id, chunk_size, self.stats)
                         item.chunks.append(chunk_entry)
                         item.chunks.append(chunk_entry)
                 else:  # normal case, no "2nd+" hardlink
                 else:  # normal case, no "2nd+" hardlink
                     if not is_special_file:
                     if not is_special_file:
@@ -1364,7 +1364,7 @@ class FilesystemObjectProcessors:
                             item.chunks = []
                             item.chunks = []
                             for chunk in chunks:
                             for chunk in chunks:
                                 # process one-by-one, so we will know in item.chunks how far we got
                                 # process one-by-one, so we will know in item.chunks how far we got
-                                cache.chunk_incref(chunk.id, chunk.size, self.stats)
+                                cache.reuse_chunk(chunk.id, chunk.size, self.stats)
                                 item.chunks.append(chunk)
                                 item.chunks.append(chunk)
                             status = "U"  # regular file, unchanged
                             status = "U"  # regular file, unchanged
                     else:
                     else:
@@ -2169,7 +2169,7 @@ class ArchiveRecreater:
     def process_chunks(self, archive, target, item):
     def process_chunks(self, archive, target, item):
         if not target.recreate_rechunkify:
         if not target.recreate_rechunkify:
             for chunk_id, size in item.chunks:
             for chunk_id, size in item.chunks:
-                self.cache.chunk_incref(chunk_id, size, target.stats)
+                self.cache.reuse_chunk(chunk_id, size, target.stats)
             return item.chunks
             return item.chunks
         chunk_iterator = self.iter_chunks(archive, target, list(item.chunks))
         chunk_iterator = self.iter_chunks(archive, target, list(item.chunks))
         chunk_processor = partial(self.chunk_processor, target)
         chunk_processor = partial(self.chunk_processor, target)
@@ -2179,7 +2179,7 @@ class ArchiveRecreater:
         chunk_id, data = cached_hash(chunk, self.key.id_hash)
         chunk_id, data = cached_hash(chunk, self.key.id_hash)
         size = len(data)
         size = len(data)
         if chunk_id in self.seen_chunks:
         if chunk_id in self.seen_chunks:
-            return self.cache.chunk_incref(chunk_id, size, target.stats)
+            return self.cache.reuse_chunk(chunk_id, size, target.stats)
         chunk_entry = self.cache.add_chunk(chunk_id, {}, data, stats=target.stats, wait=False, ro_type=ROBJ_FILE_STREAM)
         chunk_entry = self.cache.add_chunk(chunk_id, {}, data, stats=target.stats, wait=False, ro_type=ROBJ_FILE_STREAM)
         self.cache.repository.async_response(wait=False)
         self.cache.repository.async_response(wait=False)
         self.seen_chunks.add(chunk_entry.id)
         self.seen_chunks.add(chunk_entry.id)

+ 2 - 2
src/borg/archiver/transfer_cmd.py

@@ -100,7 +100,7 @@ class TransferMixIn:
                     if "chunks" in item:
                     if "chunks" in item:
                         chunks = []
                         chunks = []
                         for chunk_id, size in item.chunks:
                         for chunk_id, size in item.chunks:
-                            chunk_present = cache.seen_chunk(chunk_id, size) != 0
+                            chunk_present = cache.seen_chunk(chunk_id, size)
                             if not chunk_present:  # target repo does not yet have this chunk
                             if not chunk_present:  # target repo does not yet have this chunk
                                 if not dry_run:
                                 if not dry_run:
                                     cdata = other_repository.get(chunk_id)
                                     cdata = other_repository.get(chunk_id)
@@ -147,7 +147,7 @@ class TransferMixIn:
                                 transfer_size += size
                                 transfer_size += size
                             else:
                             else:
                                 if not dry_run:
                                 if not dry_run:
-                                    chunk_entry = cache.chunk_incref(chunk_id, size, archive.stats)
+                                    chunk_entry = cache.reuse_chunk(chunk_id, size, archive.stats)
                                     chunks.append(chunk_entry)
                                     chunks.append(chunk_entry)
                                 present_size += size
                                 present_size += size
                         if not dry_run:
                         if not dry_run:

+ 12 - 15
src/borg/cache.py

@@ -579,12 +579,6 @@ class ChunksMixin:
             self._chunks = self._load_chunks_from_repo()
             self._chunks = self._load_chunks_from_repo()
         return self._chunks
         return self._chunks
 
 
-    def chunk_incref(self, id, size, stats):
-        assert isinstance(size, int) and size > 0
-        count, _size = self.chunks.incref(id)
-        stats.update(size, False)
-        return ChunkListEntry(id, size)
-
     def seen_chunk(self, id, size=None):
     def seen_chunk(self, id, size=None):
         entry = self.chunks.get(id, ChunkIndexEntry(0, None))
         entry = self.chunks.get(id, ChunkIndexEntry(0, None))
         if entry.refcount and size is not None:
         if entry.refcount and size is not None:
@@ -593,7 +587,12 @@ class ChunksMixin:
                 # AdHocWithFilesCache / AdHocCache:
                 # AdHocWithFilesCache / AdHocCache:
                 # Here *size* is used to update the chunk's size information, which will be zero for existing chunks.
                 # Here *size* is used to update the chunk's size information, which will be zero for existing chunks.
                 self.chunks[id] = entry._replace(size=size)
                 self.chunks[id] = entry._replace(size=size)
-        return entry.refcount
+        return entry.refcount != 0
+
+    def reuse_chunk(self, id, size, stats):
+        assert isinstance(size, int) and size > 0
+        stats.update(size, False)
+        return ChunkListEntry(id, size)
 
 
     def add_chunk(
     def add_chunk(
         self,
         self,
@@ -615,15 +614,15 @@ class ChunksMixin:
                 size = len(data)  # data is still uncompressed
                 size = len(data)  # data is still uncompressed
             else:
             else:
                 raise ValueError("when giving compressed data for a chunk, the uncompressed size must be given also")
                 raise ValueError("when giving compressed data for a chunk, the uncompressed size must be given also")
-        refcount = self.seen_chunk(id, size)
-        if refcount:
-            return self.chunk_incref(id, size, stats)
+        exists = self.seen_chunk(id, size)
+        if exists:
+            return self.reuse_chunk(id, size, stats)
         cdata = self.repo_objs.format(
         cdata = self.repo_objs.format(
             id, meta, data, compress=compress, size=size, ctype=ctype, clevel=clevel, ro_type=ro_type
             id, meta, data, compress=compress, size=size, ctype=ctype, clevel=clevel, ro_type=ro_type
         )
         )
         self.repository.put(id, cdata, wait=wait)
         self.repository.put(id, cdata, wait=wait)
-        self.chunks.add(id, 1, size)
-        stats.update(size, not refcount)
+        self.chunks.add(id, ChunkIndex.MAX_VALUE, size)
+        stats.update(size, not exists)
         return ChunkListEntry(id, size)
         return ChunkListEntry(id, size)
 
 
     def _load_chunks_from_repo(self):
     def _load_chunks_from_repo(self):
@@ -639,9 +638,7 @@ class ChunksMixin:
             if not result:
             if not result:
                 break
                 break
             marker = result[-1][0]
             marker = result[-1][0]
-            # All chunks from the repository have a refcount of MAX_VALUE, which is sticky,
-            # therefore we can't/won't delete them. Chunks we added ourselves in this borg run
-            # are tracked correctly.
+            # All chunks have a refcount of MAX_VALUE, which is sticky, therefore we can't/won't delete them.
             init_entry = ChunkIndexEntry(refcount=ChunkIndex.MAX_VALUE, size=0)  # plaintext size
             init_entry = ChunkIndexEntry(refcount=ChunkIndex.MAX_VALUE, size=0)  # plaintext size
             for id, stored_size in result:
             for id, stored_size in result:
                 num_chunks += 1
                 num_chunks += 1

+ 4 - 5
src/borg/testsuite/cache.py

@@ -45,11 +45,10 @@ class TestAdHocCache:
         assert cache.cache_mode == "d"
         assert cache.cache_mode == "d"
         assert cache.files is None
         assert cache.files is None
 
 
-    def test_incref_after_add_chunk(self, cache):
+    def test_reuse_after_add_chunk(self, cache):
         assert cache.add_chunk(H(3), {}, b"5678", stats=Statistics()) == (H(3), 4)
         assert cache.add_chunk(H(3), {}, b"5678", stats=Statistics()) == (H(3), 4)
-        assert cache.chunk_incref(H(3), 4, Statistics()) == (H(3), 4)
+        assert cache.reuse_chunk(H(3), 4, Statistics()) == (H(3), 4)
 
 
-    def test_existing_incref_after_add_chunk(self, cache):
-        """This case occurs with part files, see Archive.chunk_file."""
+    def test_existing_reuse_after_add_chunk(self, cache):
         assert cache.add_chunk(H(1), {}, b"5678", stats=Statistics()) == (H(1), 4)
         assert cache.add_chunk(H(1), {}, b"5678", stats=Statistics()) == (H(1), 4)
-        assert cache.chunk_incref(H(1), 4, Statistics()) == (H(1), 4)
+        assert cache.reuse_chunk(H(1), 4, Statistics()) == (H(1), 4)

+ 1 - 1
src/borg/upgrade.py

@@ -85,7 +85,7 @@ class UpgraderFrom12To20:
             if chunks is not None:
             if chunks is not None:
                 item.chunks = chunks
                 item.chunks = chunks
                 for chunk_id, chunk_size in chunks:
                 for chunk_id, chunk_size in chunks:
-                    self.cache.chunk_incref(chunk_id, chunk_size, self.archive.stats)
+                    self.cache.reuse_chunk(chunk_id, chunk_size, self.archive.stats)
             if chunks_healthy is not None:
             if chunks_healthy is not None:
                 item.chunks_healthy = chunks
                 item.chunks_healthy = chunks
             del item.source  # not used for hardlinks any more, replaced by hlid
             del item.source  # not used for hardlinks any more, replaced by hlid