2
0
Эх сурвалжийг харах

Merge pull request #7846 from ThomasWaldmann/files-cache-with-size

files cache with size
TW 10 сар өмнө
parent
commit
66b62c6fc9

+ 12 - 0
docs/usage/general/environment.rst.inc

@@ -84,6 +84,18 @@ General:
         - ``pyfuse3``: only try to load pyfuse3
         - ``llfuse``: only try to load llfuse
         - ``none``: do not try to load an implementation
+    BORG_CACHE_IMPL
+        Choose the implementation for the clientside cache, choose one of:
+
+        - ``local``: uses a persistent chunks cache and keeps it in a perfect state (precise refcounts and
+          sizes), requiring a potentially resource expensive cache sync in multi-client scenarios.
+          Also has a persistent files cache.
+        - ``adhoc``: builds a non-persistent chunks cache by querying the repo. Chunks cache contents
+          are somewhat sloppy for already existing chunks, concerning their refcount ("infinite") and
+          size (0). No files cache (slow, will chunk all input files). DEPRECATED.
+        - ``adhocwithfiles``: Like ``adhoc``, but with a persistent files cache. Default implementation.
+        - ``cli``: Determine the cache implementation from cli options. Without special options, will
+          usually end up with the ``local`` implementation.
     BORG_SELFTEST
         This can be used to influence borg's builtin self-tests. The default is to execute the tests
         at the beginning of each borg command invocation.

+ 33 - 29
src/borg/archive.py

@@ -643,14 +643,14 @@ Duration: {0.duration}
         # so we can already remove it here, the next .save() will then commit this cleanup.
         # remove its manifest entry, remove its ArchiveItem chunk, remove its item_ptrs chunks:
         del self.manifest.archives[self.checkpoint_name]
-        self.cache.chunk_decref(self.id, self.stats)
+        self.cache.chunk_decref(self.id, 1, self.stats)
         for id in metadata.item_ptrs:
-            self.cache.chunk_decref(id, self.stats)
+            self.cache.chunk_decref(id, 1, self.stats)
         # also get rid of that part item, we do not want to have it in next checkpoint or final archive
         tail_chunks = self.items_buffer.restore_chunks_state()
         # tail_chunks contain the tail of the archive items metadata stream, not needed for next commit.
         for id in tail_chunks:
-            self.cache.chunk_decref(id, self.stats)
+            self.cache.chunk_decref(id, 1, self.stats)  # TODO can we have real size here?
 
     def save(self, name=None, comment=None, timestamp=None, stats=None, additional_metadata=None):
         name = name or self.name
@@ -1024,7 +1024,7 @@ Duration: {0.duration}
         new_id = self.key.id_hash(data)
         self.cache.add_chunk(new_id, {}, data, stats=self.stats, ro_type=ROBJ_ARCHIVE_META)
         self.manifest.archives[self.name] = (new_id, metadata.time)
-        self.cache.chunk_decref(self.id, self.stats)
+        self.cache.chunk_decref(self.id, 1, self.stats)
         self.id = new_id
 
     def rename(self, name):
@@ -1052,12 +1052,15 @@ Duration: {0.duration}
                 error = True
                 return exception_ignored  # must not return None here
 
-        def chunk_decref(id, stats):
+        def chunk_decref(id, size, stats):
             try:
-                self.cache.chunk_decref(id, stats, wait=False)
+                self.cache.chunk_decref(id, size, stats, wait=False)
             except KeyError:
-                cid = bin_to_hex(id)
-                raise ChunksIndexError(cid)
+                nonlocal error
+                if forced == 0:
+                    cid = bin_to_hex(id)
+                    raise ChunksIndexError(cid)
+                error = True
             else:
                 fetch_async_response(wait=False)
 
@@ -1073,13 +1076,13 @@ Duration: {0.duration}
                     pi.show(i)
                 _, data = self.repo_objs.parse(items_id, data, ro_type=ROBJ_ARCHIVE_STREAM)
                 unpacker.feed(data)
-                chunk_decref(items_id, stats)
+                chunk_decref(items_id, 1, stats)
                 try:
                     for item in unpacker:
                         item = Item(internal_dict=item)
                         if "chunks" in item:
                             for chunk_id, size in item.chunks:
-                                chunk_decref(chunk_id, stats)
+                                chunk_decref(chunk_id, size, stats)
                 except (TypeError, ValueError):
                     # if items metadata spans multiple chunks and one chunk got dropped somehow,
                     # it could be that unpacker yields bad types
@@ -1096,12 +1099,12 @@ Duration: {0.duration}
 
         # delete the blocks that store all the references that end up being loaded into metadata.items:
         for id in self.metadata.item_ptrs:
-            chunk_decref(id, stats)
+            chunk_decref(id, 1, stats)
 
         # in forced delete mode, we try hard to delete at least the manifest entry,
         # if possible also the archive superblock, even if processing the items raises
         # some harmless exception.
-        chunk_decref(self.id, stats)
+        chunk_decref(self.id, 1, stats)
         del self.manifest.archives[self.name]
         while fetch_async_response(wait=True) is not None:
             # we did async deletes, process outstanding results (== exceptions),
@@ -1510,7 +1513,7 @@ class FilesystemObjectProcessors:
         except BackupOSError:
             # see comments in process_file's exception handler, same issue here.
             for chunk in item.get("chunks", []):
-                cache.chunk_decref(chunk.id, self.stats, wait=False)
+                cache.chunk_decref(chunk.id, chunk.size, self.stats, wait=False)
             raise
         else:
             item.get_size(memorize=True)
@@ -1544,7 +1547,7 @@ class FilesystemObjectProcessors:
                         item.chunks = []
                         for chunk_id, chunk_size in hl_chunks:
                             # process one-by-one, so we will know in item.chunks how far we got
-                            chunk_entry = cache.chunk_incref(chunk_id, self.stats)
+                            chunk_entry = cache.chunk_incref(chunk_id, chunk_size, self.stats)
                             item.chunks.append(chunk_entry)
                     else:  # normal case, no "2nd+" hardlink
                         if not is_special_file:
@@ -1552,26 +1555,26 @@ class FilesystemObjectProcessors:
                             started_hashing = time.monotonic()
                             path_hash = self.key.id_hash(hashed_path)
                             self.stats.hashing_time += time.monotonic() - started_hashing
-                            known, ids = cache.file_known_and_unchanged(hashed_path, path_hash, st)
+                            known, chunks = cache.file_known_and_unchanged(hashed_path, path_hash, st)
                         else:
                             # in --read-special mode, we may be called for special files.
                             # there should be no information in the cache about special files processed in
                             # read-special mode, but we better play safe as this was wrong in the past:
                             hashed_path = path_hash = None
-                            known, ids = False, None
-                        if ids is not None:
+                            known, chunks = False, None
+                        if chunks is not None:
                             # Make sure all ids are available
-                            for id_ in ids:
-                                if not cache.seen_chunk(id_):
+                            for chunk in chunks:
+                                if not cache.seen_chunk(chunk.id):
                                     # cache said it is unmodified, but we lost a chunk: process file like modified
                                     status = "M"
                                     break
                             else:
                                 item.chunks = []
-                                for chunk_id in ids:
+                                for chunk in chunks:
                                     # process one-by-one, so we will know in item.chunks how far we got
-                                    chunk_entry = cache.chunk_incref(chunk_id, self.stats)
-                                    item.chunks.append(chunk_entry)
+                                    cache.chunk_incref(chunk.id, chunk.size, self.stats)
+                                    item.chunks.append(chunk)
                                 status = "U"  # regular file, unchanged
                         else:
                             status = "M" if known else "A"  # regular file, modified or added
@@ -1606,7 +1609,7 @@ class FilesystemObjectProcessors:
                                 # block or char device will change without its mtime/size/inode changing.
                                 # also, we must not memorize a potentially inconsistent/corrupt file that
                                 # changed while we backed it up.
-                                cache.memorize_file(hashed_path, path_hash, st, [c.id for c in item.chunks])
+                                cache.memorize_file(hashed_path, path_hash, st, item.chunks)
                         self.stats.files_stats[status] += 1  # must be done late
                         if not changed_while_backup:
                             status = None  # we already called print_file_status
@@ -1620,7 +1623,7 @@ class FilesystemObjectProcessors:
                     # but we will not add an item (see add_item in create_helper) and thus
                     # they would be orphaned chunks in case that we commit the transaction.
                     for chunk in item.get("chunks", []):
-                        cache.chunk_decref(chunk.id, self.stats, wait=False)
+                        cache.chunk_decref(chunk.id, chunk.size, self.stats, wait=False)
                     # Now that we have cleaned up the chunk references, we can re-raise the exception.
                     # This will skip processing of this file, but might retry or continue with the next one.
                     raise
@@ -1731,7 +1734,7 @@ class TarfileObjectProcessors:
             except BackupOSError:
                 # see comment in FilesystemObjectProcessors.process_file, same issue here.
                 for chunk in item.get("chunks", []):
-                    self.cache.chunk_decref(chunk.id, self.stats, wait=False)
+                    self.cache.chunk_decref(chunk.id, chunk.size, self.stats, wait=False)
                 raise
 
 
@@ -2328,10 +2331,10 @@ class ArchiveChecker:
             unused = {id_ for id_, entry in self.chunks.iteritems() if entry.refcount == 0}
             orphaned = unused - self.possibly_superseded
             if orphaned:
-                logger.error(f"{len(orphaned)} orphaned objects found!")
+                logger.info(f"{len(orphaned)} orphaned (unused) objects found.")
                 for chunk_id in orphaned:
                     logger.debug(f"chunk {bin_to_hex(chunk_id)} is orphaned.")
-                self.error_found = True
+                # To support working with AdHocCache or AdHocWithFilesCache, we do not set self.error_found = True.
             if self.repair and unused:
                 logger.info(
                     "Deleting %d orphaned and %d superseded objects..." % (len(orphaned), len(self.possibly_superseded))
@@ -2444,7 +2447,7 @@ class ArchiveRecreater:
     def process_chunks(self, archive, target, item):
         if not target.recreate_rechunkify:
             for chunk_id, size in item.chunks:
-                self.cache.chunk_incref(chunk_id, target.stats)
+                self.cache.chunk_incref(chunk_id, size, target.stats)
             return item.chunks
         chunk_iterator = self.iter_chunks(archive, target, list(item.chunks))
         chunk_processor = partial(self.chunk_processor, target)
@@ -2452,8 +2455,9 @@ class ArchiveRecreater:
 
     def chunk_processor(self, target, chunk):
         chunk_id, data = cached_hash(chunk, self.key.id_hash)
+        size = len(data)
         if chunk_id in self.seen_chunks:
-            return self.cache.chunk_incref(chunk_id, target.stats)
+            return self.cache.chunk_incref(chunk_id, size, target.stats)
         chunk_entry = self.cache.add_chunk(chunk_id, {}, data, stats=target.stats, wait=False, ro_type=ROBJ_FILE_STREAM)
         self.cache.repository.async_response(wait=False)
         self.seen_chunks.add(chunk_entry.id)

+ 2 - 6
src/borg/archiver/config_cmd.py

@@ -5,7 +5,6 @@ from ._common import with_repository
 from ..cache import Cache, assert_secure
 from ..constants import *  # NOQA
 from ..helpers import Error, CommandError
-from ..helpers import Location
 from ..helpers import parse_file_size, hex_to_bin
 from ..manifest import Manifest
 
@@ -52,11 +51,8 @@ class ConfigMixIn:
         def cache_validate(section, name, value=None, check_value=True):
             if section not in ["cache"]:
                 raise ValueError("Invalid section")
-            if name in ["previous_location"]:
-                if check_value:
-                    Location(value)
-            else:
-                raise ValueError("Invalid name")
+            # currently, we do not support setting anything in the cache via borg config.
+            raise ValueError("Invalid name")
 
         def list_config(config):
             default_values = {

+ 16 - 2
src/borg/archiver/create_cmd.py

@@ -224,7 +224,9 @@ class CreateMixIn:
                 manifest,
                 progress=args.progress,
                 lock_wait=self.lock_wait,
-                permit_adhoc_cache=args.no_cache_sync,
+                no_cache_sync_permitted=args.no_cache_sync,
+                no_cache_sync_forced=args.no_cache_sync_forced,
+                prefer_adhoc_cache=args.prefer_adhoc_cache,
                 cache_mode=args.files_cache_mode,
                 iec=args.iec,
             ) as cache:
@@ -801,7 +803,19 @@ class CreateMixIn:
             "--no-cache-sync",
             dest="no_cache_sync",
             action="store_true",
-            help="experimental: do not synchronize the cache. Implies not using the files cache.",
+            help="experimental: do not synchronize the chunks cache.",
+        )
+        subparser.add_argument(
+            "--no-cache-sync-forced",
+            dest="no_cache_sync_forced",
+            action="store_true",
+            help="experimental: do not synchronize the chunks cache (forced).",
+        )
+        subparser.add_argument(
+            "--prefer-adhoc-cache",
+            dest="prefer_adhoc_cache",
+            action="store_true",
+            help="experimental: prefer AdHocCache (w/o files cache) over AdHocWithFilesCache (with files cache).",
         )
         subparser.add_argument(
             "--stdin-name",

+ 3 - 10
src/borg/archiver/rinfo_cmd.py

@@ -59,16 +59,9 @@ class RInfoMixIn:
                 output += f" out of {format_file_size(storage_quota, iec=args.iec)}"
             output += "\n"
 
-            output += (
-                textwrap.dedent(
-                    """
-                    Cache: {cache.path}
-                    Security dir: {security_dir}
-                    """
-                )
-                .strip()
-                .format(**info)
-            )
+            if hasattr(info["cache"], "path"):
+                output += "Cache: {cache.path}\n".format(**info)
+            output += "Security dir: {security_dir}\n".format(**info)
 
             print(output)
             print(str(cache))

+ 1 - 1
src/borg/archiver/transfer_cmd.py

@@ -143,7 +143,7 @@ class TransferMixIn:
                                 transfer_size += size
                             else:
                                 if not dry_run:
-                                    chunk_entry = cache.chunk_incref(chunk_id, archive.stats)
+                                    chunk_entry = cache.chunk_incref(chunk_id, size, archive.stats)
                                     chunks.append(chunk_entry)
                                 present_size += size
                         if not dry_run:

Файлын зөрүү хэтэрхий том тул дарагдсан байна
+ 522 - 298
src/borg/cache.py


+ 2 - 2
src/borg/helpers/parseformat.py

@@ -1184,13 +1184,13 @@ class BorgJsonEncoder(json.JSONEncoder):
         from ..repository import Repository
         from ..remote import RemoteRepository
         from ..archive import Archive
-        from ..cache import LocalCache, AdHocCache
+        from ..cache import LocalCache, AdHocCache, AdHocWithFilesCache
 
         if isinstance(o, Repository) or isinstance(o, RemoteRepository):
             return {"id": bin_to_hex(o.id), "location": o._location.canonical_path()}
         if isinstance(o, Archive):
             return o.info()
-        if isinstance(o, LocalCache):
+        if isinstance(o, (LocalCache, AdHocWithFilesCache)):
             return {"path": o.path, "stats": o.stats()}
         if isinstance(o, AdHocCache):
             return {"stats": o.stats()}

+ 7 - 1
src/borg/testsuite/archiver/__init__.py

@@ -18,7 +18,7 @@ import pytest
 from ... import xattr, platform
 from ...archive import Archive
 from ...archiver import Archiver, PURE_PYTHON_MSGPACK_WARNING
-from ...cache import Cache
+from ...cache import Cache, LocalCache
 from ...constants import *  # NOQA
 from ...helpers import Location, umount
 from ...helpers import EXIT_SUCCESS
@@ -356,9 +356,15 @@ def check_cache(archiver):
         manifest = Manifest.load(repository, Manifest.NO_OPERATION_CHECK)
         with Cache(repository, manifest, sync=False) as cache:
             original_chunks = cache.chunks
+            # the LocalCache implementation has an on-disk chunks cache,
+            # but AdHocWithFilesCache and AdHocCache don't have persistent chunks cache.
+            persistent = isinstance(cache, LocalCache)
         Cache.destroy(repository)
         with Cache(repository, manifest) as cache:
             correct_chunks = cache.chunks
+    if not persistent:
+        # there is no point in doing the checks
+        return
     assert original_chunks is not correct_chunks
     seen = set()
     for id, (refcount, size) in correct_chunks.iteritems():

+ 4 - 3
src/borg/testsuite/archiver/check_cmd.py

@@ -338,10 +338,11 @@ def test_extra_chunks(archivers, request):
     with Repository(archiver.repository_location, exclusive=True) as repository:
         repository.put(b"01234567890123456789012345678901", b"xxxx")
         repository.commit(compact=False)
-    cmd(archiver, "check", exit_code=1)
-    cmd(archiver, "check", exit_code=1)
+    output = cmd(archiver, "check", "-v", exit_code=0)  # orphans are not considered warnings anymore
+    assert "1 orphaned (unused) objects found." in output
     cmd(archiver, "check", "--repair", exit_code=0)
-    cmd(archiver, "check", exit_code=0)
+    output = cmd(archiver, "check", "-v", exit_code=0)
+    assert "orphaned (unused) objects found." not in output
     cmd(archiver, "extract", "archive1", "--dry-run", exit_code=0)
 
 

+ 21 - 15
src/borg/testsuite/archiver/checks.py

@@ -4,7 +4,7 @@ from unittest.mock import patch
 
 import pytest
 
-from ...cache import Cache, LocalCache
+from ...cache import Cache, LocalCache, get_cache_impl
 from ...constants import *  # NOQA
 from ...helpers import Location, get_security_dir, bin_to_hex
 from ...helpers import EXIT_ERROR
@@ -153,32 +153,29 @@ def test_repository_move(archivers, request, monkeypatch):
     security_dir = get_security_directory(archiver.repository_path)
     os.replace(archiver.repository_path, archiver.repository_path + "_new")
     archiver.repository_location += "_new"
+    # borg should notice that the repository location changed and abort.
+    if archiver.FORK_DEFAULT:
+        cmd(archiver, "rinfo", exit_code=EXIT_ERROR)
+    else:
+        with pytest.raises(Cache.RepositoryAccessAborted):
+            cmd(archiver, "rinfo")
+    # if we explicitly allow relocated repos, it should work fine.
     monkeypatch.setenv("BORG_RELOCATED_REPO_ACCESS_IS_OK", "yes")
     cmd(archiver, "rinfo")
     monkeypatch.delenv("BORG_RELOCATED_REPO_ACCESS_IS_OK")
     with open(os.path.join(security_dir, "location")) as fd:
         location = fd.read()
         assert location == Location(archiver.repository_location).canonical_path()
-    # Needs no confirmation anymore
-    cmd(archiver, "rinfo")
-    shutil.rmtree(archiver.cache_path)
+    # after new repo location was confirmed once, it needs no further confirmation anymore.
     cmd(archiver, "rinfo")
     shutil.rmtree(security_dir)
+    # it also needs no confirmation if we have no knowledge about the previous location.
     cmd(archiver, "rinfo")
+    # it will re-create security-related infos in the security dir:
     for file in ("location", "key-type", "manifest-timestamp"):
         assert os.path.exists(os.path.join(security_dir, file))
 
 
-def test_security_dir_compat(archivers, request):
-    archiver = request.getfixturevalue(archivers)
-    cmd(archiver, "rcreate", RK_ENCRYPTION)
-    with open(os.path.join(get_security_directory(archiver.repository_path), "location"), "w") as fd:
-        fd.write("something outdated")
-    # This is fine, because the cache still has the correct information. security_dir and cache can disagree
-    # if older versions are used to confirm a renamed repository.
-    cmd(archiver, "rinfo")
-
-
 def test_unknown_unencrypted(archivers, request, monkeypatch):
     archiver = request.getfixturevalue(archivers)
     cmd(archiver, "rcreate", "--encryption=none")
@@ -207,9 +204,12 @@ def test_unknown_feature_on_create(archivers, request):
     cmd_raises_unknown_feature(archiver, ["create", "test", "input"])
 
 
+@pytest.mark.skipif(get_cache_impl() in ("adhocwithfiles", "adhoc"), reason="only works with LocalCache")
 def test_unknown_feature_on_cache_sync(archivers, request):
+    # LocalCache.sync checks repo compat
     archiver = request.getfixturevalue(archivers)
     cmd(archiver, "rcreate", RK_ENCRYPTION)
+    # delete the cache to trigger a cache sync later in borg create
     cmd(archiver, "rdelete", "--cache-only")
     add_unknown_feature(archiver.repository_path, Manifest.Operation.READ)
     cmd_raises_unknown_feature(archiver, ["create", "test", "input"])
@@ -277,6 +277,7 @@ def test_unknown_mandatory_feature_in_cache(archivers, request):
             repository._location = Location(archiver.repository_location)
         manifest = Manifest.load(repository, Manifest.NO_OPERATION_CHECK)
         with Cache(repository, manifest) as cache:
+            is_localcache = isinstance(cache, LocalCache)
             cache.begin_txn()
             cache.cache_config.mandatory_features = {"unknown-feature"}
             cache.commit()
@@ -295,7 +296,8 @@ def test_unknown_mandatory_feature_in_cache(archivers, request):
         with patch.object(LocalCache, "wipe_cache", wipe_wrapper):
             cmd(archiver, "create", "test", "input")
 
-        assert called
+        if is_localcache:
+            assert called
 
     with Repository(archiver.repository_path, exclusive=True) as repository:
         if remote_repo:
@@ -315,10 +317,14 @@ def test_check_cache(archivers, request):
             cache.begin_txn()
             cache.chunks.incref(list(cache.chunks.iteritems())[0][0])
             cache.commit()
+            persistent = isinstance(cache, LocalCache)
+    if not persistent:
+        pytest.skip("check_cache is pointless if we do not have a persistent chunks cache")
     with pytest.raises(AssertionError):
         check_cache(archiver)
 
 
+@pytest.mark.skipif(get_cache_impl() in ("adhocwithfiles", "adhoc"), reason="only works with LocalCache")
 def test_env_use_chunks_archive(archivers, request, monkeypatch):
     archiver = request.getfixturevalue(archivers)
     create_test_files(archiver.input_path)

+ 19 - 1
src/borg/testsuite/archiver/corruption.py

@@ -34,7 +34,7 @@ def test_check_corrupted_repository(archiver):
 def corrupt_archiver(archiver):
     create_test_files(archiver.input_path)
     cmd(archiver, "rcreate", RK_ENCRYPTION)
-    archiver.cache_path = json.loads(cmd(archiver, "rinfo", "--json"))["cache"]["path"]
+    archiver.cache_path = json.loads(cmd(archiver, "rinfo", "--json"))["cache"].get("path")
 
 
 def corrupt(file, amount=1):
@@ -48,9 +48,16 @@ def corrupt(file, amount=1):
 @pytest.mark.allow_cache_wipe
 def test_cache_chunks(archiver):
     corrupt_archiver(archiver)
+    if archiver.cache_path is None:
+        pytest.skip("no cache path for this kind of Cache implementation")
+
     create_src_archive(archiver, "test")
     chunks_path = os.path.join(archiver.cache_path, "chunks")
+    if not os.path.exists(chunks_path):
+        pytest.skip("no persistent chunks index for this kind of Cache implementation")
+
     chunks_before_corruption = set(ChunkIndex(path=chunks_path).iteritems())
+
     corrupt(chunks_path)
 
     assert not archiver.FORK_DEFAULT  # test does not support forking
@@ -74,6 +81,9 @@ def test_cache_chunks(archiver):
 
 def test_cache_files(archiver):
     corrupt_archiver(archiver)
+    if archiver.cache_path is None:
+        pytest.skip("no cache path for this kind of Cache implementation")
+
     cmd(archiver, "create", "test", "input")
     corrupt(os.path.join(archiver.cache_path, "files"))
     out = cmd(archiver, "create", "test1", "input")
@@ -83,6 +93,9 @@ def test_cache_files(archiver):
 
 def test_chunks_archive(archiver):
     corrupt_archiver(archiver)
+    if archiver.cache_path is None:
+        pytest.skip("no cache path for this kind of Cache implementation")
+
     cmd(archiver, "create", "test1", "input")
     # Find ID of test1, so we can corrupt it later :)
     target_id = cmd(archiver, "rlist", "--format={id}{NL}").strip()
@@ -93,6 +106,8 @@ def test_chunks_archive(archiver):
     cmd(archiver, "rinfo", "--json")
 
     chunks_archive = os.path.join(archiver.cache_path, "chunks.archive.d")
+    if not os.path.exists(chunks_archive):
+        pytest.skip("Only LocalCache has a per-archive chunks index cache.")
     assert len(os.listdir(chunks_archive)) == 4  # two archives, one chunks cache and one .integrity file each
 
     corrupt(os.path.join(chunks_archive, target_id + ".compact"))
@@ -114,6 +129,9 @@ def test_chunks_archive(archiver):
 
 def test_old_version_interfered(archiver):
     corrupt_archiver(archiver)
+    if archiver.cache_path is None:
+        pytest.skip("no cache path for this kind of Cache implementation")
+
     # Modify the main manifest ID without touching the manifest ID in the integrity section.
     # This happens if a version without integrity checking modifies the cache.
     config_path = os.path.join(archiver.cache_path, "config")

+ 6 - 4
src/borg/testsuite/archiver/create_cmd.py

@@ -12,6 +12,7 @@ import time
 import pytest
 
 from ... import platform
+from ...cache import get_cache_impl
 from ...constants import *  # NOQA
 from ...manifest import Manifest
 from ...platform import is_cygwin, is_win32, is_darwin
@@ -540,20 +541,21 @@ def test_create_pattern_intermediate_folders_first(archivers, request):
     assert out_list.index("d x/b") < out_list.index("- x/b/foo_b")
 
 
-def test_create_no_cache_sync(archivers, request):
+@pytest.mark.skipif(get_cache_impl() in ("adhocwithfiles", "local"), reason="only works with AdHocCache")
+def test_create_no_cache_sync_adhoc(archivers, request):  # TODO: add test for AdHocWithFilesCache
     archiver = request.getfixturevalue(archivers)
     create_test_files(archiver.input_path)
     cmd(archiver, "rcreate", RK_ENCRYPTION)
     cmd(archiver, "rdelete", "--cache-only")
     create_json = json.loads(
-        cmd(archiver, "create", "--no-cache-sync", "--json", "--error", "test", "input")
-    )  # ignore experimental warning
+        cmd(archiver, "create", "--no-cache-sync", "--prefer-adhoc-cache", "--json", "test", "input")
+    )
     info_json = json.loads(cmd(archiver, "info", "-a", "test", "--json"))
     create_stats = create_json["cache"]["stats"]
     info_stats = info_json["cache"]["stats"]
     assert create_stats == info_stats
     cmd(archiver, "rdelete", "--cache-only")
-    cmd(archiver, "create", "--no-cache-sync", "test2", "input")
+    cmd(archiver, "create", "--no-cache-sync", "--prefer-adhoc-cache", "test2", "input")
     cmd(archiver, "rinfo")
     cmd(archiver, "check")
 

+ 6 - 1
src/borg/testsuite/archiver/debug_cmds.py

@@ -168,7 +168,12 @@ def test_debug_refcount_obj(archivers, request):
     create_json = json.loads(cmd(archiver, "create", "--json", "test", "input"))
     archive_id = create_json["archive"]["id"]
     output = cmd(archiver, "debug", "refcount-obj", archive_id).strip()
-    assert output == f"object {archive_id} has 1 referrers [info from chunks cache]."
+    # LocalCache does precise refcounting, so we'll get 1 reference for the archive.
+    # AdHocCache or AdHocWithFilesCache doesn't, we'll get ChunkIndex.MAX_VALUE as refcount.
+    assert (
+        output == f"object {archive_id} has 1 referrers [info from chunks cache]."
+        or output == f"object {archive_id} has 4294966271 referrers [info from chunks cache]."
+    )
 
     # Invalid IDs do not abort or return an error
     output = cmd(archiver, "debug", "refcount-obj", "124", "xyza").strip()

+ 2 - 3
src/borg/testsuite/archiver/delete_cmd.py

@@ -25,9 +25,8 @@ def test_delete(archivers, request):
     cmd(archiver, "extract", "test.2", "--dry-run")
     output = cmd(archiver, "delete", "-a", "test.2", "--stats")
     assert "Original size: -" in output  # negative size == deleted data
-    # Make sure all data except the manifest has been deleted
-    with Repository(archiver.repository_path) as repository:
-        assert len(repository) == 1
+    output = cmd(archiver, "rlist")
+    assert output == ""  # no archives left!
 
 
 def test_delete_multiple(archivers, request):

+ 3 - 3
src/borg/testsuite/archiver/list_cmd.py

@@ -40,9 +40,9 @@ def test_list_chunk_counts(archivers, request):
         fd.write(b"baab" * 2000000)
     cmd(archiver, "rcreate", RK_ENCRYPTION)
     cmd(archiver, "create", "test", "input")
-    output = cmd(archiver, "list", "test", "--format", "{num_chunks} {unique_chunks} {path}{NL}")
-    assert "0 0 input/empty_file" in output
-    assert "2 2 input/two_chunks" in output
+    output = cmd(archiver, "list", "test", "--format", "{num_chunks} {path}{NL}")
+    assert "0 input/empty_file" in output
+    assert "2 input/two_chunks" in output
 
 
 def test_list_size(archivers, request):

+ 10 - 7
src/borg/testsuite/archiver/recreate_cmd.py

@@ -153,15 +153,18 @@ def test_recreate_rechunkify(archivers, request):
     cmd(archiver, "rcreate", RK_ENCRYPTION)
     cmd(archiver, "create", "test1", "input", "--chunker-params", "7,9,8,128")
     cmd(archiver, "create", "test2", "input", "--files-cache=disabled")
-    chunks_list = cmd(archiver, "list", "test1", "input/large_file", "--format", "{num_chunks} {unique_chunks}")
-    num_chunks, unique_chunks = map(int, chunks_list.split(" "))
-    # test1 and test2 do not deduplicate
-    assert num_chunks == unique_chunks
+    num_chunks1 = int(cmd(archiver, "list", "test1", "input/large_file", "--format", "{num_chunks}"))
+    num_chunks2 = int(cmd(archiver, "list", "test2", "input/large_file", "--format", "{num_chunks}"))
+    # right now, the file is chunked differently
+    assert num_chunks1 != num_chunks2
     cmd(archiver, "recreate", "--chunker-params", "default")
     check_cache(archiver)
-    # test1 and test2 do deduplicate after recreate
-    assert int(cmd(archiver, "list", "test1", "input/large_file", "--format={size}"))
-    assert not int(cmd(archiver, "list", "test1", "input/large_file", "--format", "{unique_chunks}"))
+    num_chunks1 = int(cmd(archiver, "list", "test1", "input/large_file", "--format", "{num_chunks}"))
+    num_chunks2 = int(cmd(archiver, "list", "test2", "input/large_file", "--format", "{num_chunks}"))
+    # now the files are chunked in the same way
+    # TODO: this is a rather weak test, it could be improved by comparing the IDs in the chunk lists,
+    # to make sure that everything is completely deduplicated now (both files have identical chunks).
+    assert num_chunks1 == num_chunks2
 
 
 def test_recreate_fixed_rechunkify(archivers, request):

+ 4 - 4
src/borg/testsuite/cache.py

@@ -189,7 +189,7 @@ class TestAdHocCache:
 
     def test_does_not_delete_existing_chunks(self, repository, cache):
         assert cache.seen_chunk(H(1)) == ChunkIndex.MAX_VALUE
-        cache.chunk_decref(H(1), Statistics())
+        cache.chunk_decref(H(1), 1, Statistics())
         assert repository.get(H(1)) == b"1234"
 
     def test_seen_chunk_add_chunk_size(self, cache):
@@ -199,7 +199,7 @@ class TestAdHocCache:
         """E.g. checkpoint archives"""
         cache.add_chunk(H(5), {}, b"1010", stats=Statistics())
         assert cache.seen_chunk(H(5)) == 1
-        cache.chunk_decref(H(5), Statistics())
+        cache.chunk_decref(H(5), 1, Statistics())
         assert not cache.seen_chunk(H(5))
         with pytest.raises(Repository.ObjectNotFound):
             repository.get(H(5))
@@ -220,9 +220,9 @@ class TestAdHocCache:
 
     def test_incref_after_add_chunk(self, cache):
         assert cache.add_chunk(H(3), {}, b"5678", stats=Statistics()) == (H(3), 4)
-        assert cache.chunk_incref(H(3), Statistics()) == (H(3), 4)
+        assert cache.chunk_incref(H(3), 4, Statistics()) == (H(3), 4)
 
     def test_existing_incref_after_add_chunk(self, cache):
         """This case occurs with part files, see Archive.chunk_file."""
         assert cache.add_chunk(H(1), {}, b"5678", stats=Statistics()) == (H(1), 4)
-        assert cache.chunk_incref(H(1), Statistics()) == (H(1), 4)
+        assert cache.chunk_incref(H(1), 4, Statistics()) == (H(1), 4)

+ 1 - 0
src/borg/testsuite/conftest.py

@@ -127,6 +127,7 @@ def archiver(tmp_path, set_env_variables):
     archiver.patterns_file_path = os.fspath(tmp_path / "patterns")
     os.environ["BORG_KEYS_DIR"] = archiver.keys_path
     os.environ["BORG_CACHE_DIR"] = archiver.cache_path
+    # os.environ["BORG_CACHE_IMPL"] = "adhocwithfiles"
     os.mkdir(archiver.input_path)
     os.chmod(archiver.input_path, 0o777)  # avoid troubles with fakeroot / FUSE
     os.mkdir(archiver.output_path)

+ 2 - 2
src/borg/upgrade.py

@@ -84,8 +84,8 @@ class UpgraderFrom12To20:
             chunks, chunks_healthy = self.hlm.retrieve(id=hlid, default=(None, None))
             if chunks is not None:
                 item.chunks = chunks
-                for chunk_id, _ in chunks:
-                    self.cache.chunk_incref(chunk_id, self.archive.stats)
+                for chunk_id, chunk_size in chunks:
+                    self.cache.chunk_incref(chunk_id, chunk_size, self.archive.stats)
             if chunks_healthy is not None:
                 item.chunks_healthy = chunks
             del item.source  # not used for hardlinks any more, replaced by hlid

Энэ ялгаанд хэт олон файл өөрчлөгдсөн тул зарим файлыг харуулаагүй болно