2 сар өмнө · 80624d1460
--- a/src/borg/archiver/compact_cmd.py
+++ b/src/borg/archiver/compact_cmd.py
@@ -1,9 +1,12 @@
 
															 import argparse
														
 
															+import os
														
 
															 from typing import Tuple, Set
														
 
															 from ._common import with_repository
														
 
															 from ..archive import Archive
														
 
															 from ..cache import write_chunkindex_to_repo_cache, build_chunkindex_from_repo
														
 
															+from ..cache import files_cache_name, discover_files_cache_names
														
 
															+from ..helpers import get_cache_dir
														
 
															 from ..constants import *  # NOQA
														
 
															 from ..hashindex import ChunkIndex, ChunkIndexEntry
														
 
															 from ..helpers import set_ec, EXIT_ERROR, format_file_size, bin_to_hex
														
@@ -43,6 +46,7 @@ class ArchiveGarbageCollector:
 
															         (self.missing_chunks, self.total_files, self.total_size, self.archives_count) = self.analyze_archives()
														
 
															         self.report_and_delete()
														
 
															         self.save_chunk_index()
														
 
															+        self.cleanup_files_cache()
														
 
															         logger.info("Finished compaction / garbage collection...")
														
 
															     def get_repository_chunks(self) -> ChunkIndex:
														
@@ -71,6 +75,43 @@ class ArchiveGarbageCollector:
 
															         )
														
 
															         self.chunks = None  # nothing there (cleared!)
														
 
															+    def cleanup_files_cache(self):
														
 
															+        """
														
 
															+        Clean up files cache files for archive series names that no longer exist in the repository.
														
 
															+
														
 
															+        Note: this only works perfectly if the files cache filename suffixes are automatically generated
														
 
															+        and the user does not manually control them via more than one BORG_FILES_CACHE_SUFFIX env var value.
														
 
															+        """
														
 
															+        logger.info("Cleaning up files cache...")
														
 
															+
														
 
															+        cache_dir = os.path.join(get_cache_dir(), self.repository.id_str)
														
 
															+        if not os.path.exists(cache_dir):
														
 
															+            logger.debug("Cache directory does not exist, skipping files cache cleanup")
														
 
															+            return
														
 
															+
														
 
															+        # Get all existing archive series names
														
 
															+        existing_series = set(self.manifest.archives.names())
														
 
															+        logger.debug(f"Found {len(existing_series)} existing archive series.")
														
 
															+
														
 
															+        # Get the set of all existing files cache file names.
														
 
															+        try:
														
 
															+            files_cache_names = set(discover_files_cache_names(cache_dir))
														
 
															+            logger.debug(f"Found {len(files_cache_names)} files cache files.")
														
 
															+        except (FileNotFoundError, PermissionError) as e:
														
 
															+            logger.warning(f"Could not access cache directory: {e}")
														
 
															+            return
														
 
															+
														
 
															+        used_files_cache_names = set(files_cache_name(series_name) for series_name in existing_series)
														
 
															+        unused_files_cache_names = files_cache_names - used_files_cache_names
														
 
															+
														
 
															+        for cache_filename in unused_files_cache_names:
														
 
															+            cache_path = os.path.join(cache_dir, cache_filename)
														
 
															+            try:
														
 
															+                os.unlink(cache_path)
														
 
															+            except (FileNotFoundError, PermissionError) as e:
														
 
															+                logger.warning(f"Could not access cache file: {e}")
														
 
															+        logger.info(f"Removed {len(unused_files_cache_names)} unused files cache files.")
														
 
															+
														
 
															     def analyze_archives(self) -> Tuple[Set, int, int, int]:
														
 
															         """Iterate over all items in all archives, create the dicts id -> size of all used chunks."""
														
--- a/src/borg/cache.py
+++ b/src/borg/cache.py
@@ -37,6 +37,35 @@ from .platform import SaveFile
 
															 from .remote import RemoteRepository
														
 
															 from .repository import LIST_SCAN_LIMIT, Repository, StoreObjectNotFound, repo_lister
														
 
															+
														
 
															+def files_cache_name(archive_name, files_cache_name="files"):
														
 
															+    """
														
 
															+    Return the name of the files cache file for the given archive name.
														
 
															+
														
 
															+    :param archive_name: name of the archive (ideally a series name)
														
 
															+    :param files_cache_name: base name of the files cache file
														
 
															+    :return: name of the files cache file
														
 
															+    """
														
 
															+    suffix = os.environ.get("BORG_FILES_CACHE_SUFFIX", "")
														
 
															+    # when using archive series, we automatically make up a separate cache file per series.
														
 
															+    # when not, the user may manually do that by using the env var.
														
 
															+    if not suffix:
														
 
															+        # avoid issues with too complex or long archive_name by hashing it:
														
 
															+        suffix = bin_to_hex(xxh64(archive_name.encode()))
														
 
															+    return files_cache_name + "." + suffix
														
 
															+
														
 
															+
														
 
															+def discover_files_cache_names(path, files_cache_name="files"):
														
 
															+    """
														
 
															+    Return a list of all files cache file names in the given directory.
														
 
															+
														
 
															+    :param path: path to the directory to search in
														
 
															+    :param files_cache_name: base name of the files cache files
														
 
															+    :return: list of files cache file names
														
 
															+    """
														
 
															+    return [fn for fn in os.listdir(path) if fn.startswith(files_cache_name + ".")]
														
 
															+
														
 
															+
														
 
															 # chunks is a list of ChunkListEntry
														
 
															 FileCacheEntry = namedtuple("FileCacheEntry", "age inode size ctime mtime chunks")
														
@@ -495,16 +524,10 @@ class FilesCacheMixin:
 
															         return files
														
 
															     def files_cache_name(self):
														
 
															-        suffix = os.environ.get("BORG_FILES_CACHE_SUFFIX", "")
														
 
															-        # when using archive series, we automatically make up a separate cache file per series.
														
 
															-        # when not, the user may manually do that by using the env var.
														
 
															-        if not suffix:
														
 
															-            # avoid issues with too complex or long archive_name by hashing it:
														
 
															-            suffix = bin_to_hex(xxh64(self.archive_name.encode()))
														
 
															-        return self.FILES_CACHE_NAME + "." + suffix
														
 
															+        return files_cache_name(self.archive_name, self.FILES_CACHE_NAME)
														
 
															     def discover_files_cache_names(self, path):
														
 
															-        return [fn for fn in os.listdir(path) if fn.startswith(self.FILES_CACHE_NAME + ".")]
														
 
															+        return discover_files_cache_names(path, self.FILES_CACHE_NAME)
														
 
															     def _read_files_cache(self):
														
 
															         """read files cache from cache directory"""
														
--- a/src/borg/testsuite/archiver/compact_cmd_test.py
+++ b/src/borg/testsuite/archiver/compact_cmd_test.py
@@ -1,6 +1,9 @@
 
															+import os
														
 
															 import pytest
														
 
															 from ...constants import *  # NOQA
														
 
															+from ...helpers import get_cache_dir
														
 
															+from ...cache import files_cache_name, discover_files_cache_names
														
 
															 from . import cmd, create_src_archive, generate_archiver_tests, RK_ENCRYPTION
														
 
															 pytest_generate_tests = lambda metafunc: generate_archiver_tests(metafunc, kinds="local,remote,binary")  # NOQA
														
@@ -79,3 +82,55 @@ def test_compact_index_corruption(archivers, request):
 
															     output = cmd(archiver, "compact", "-v", "--stats", exit_code=0)
														
 
															     assert "missing objects" not in output
														
 
															+
														
 
															+
														
 
															+def test_compact_files_cache_cleanup(archivers, request):
														
 
															+    """Test that files cache files for deleted archives are removed during compact."""
														
 
															+    archiver = request.getfixturevalue(archivers)
														
 
															+
														
 
															+    # Create repository and archives
														
 
															+    cmd(archiver, "repo-create", RK_ENCRYPTION)
														
 
															+    create_src_archive(archiver, "archive1")
														
 
															+    create_src_archive(archiver, "archive2")
														
 
															+    create_src_archive(archiver, "archive3")
														
 
															+
														
 
															+    # Get repository ID
														
 
															+    output = cmd(archiver, "repo-info")
														
 
															+    for line in output.splitlines():
														
 
															+        if "Repository ID:" in line:
														
 
															+            repo_id = line.split(":", 1)[1].strip()
														
 
															+            break
														
 
															+    else:
														
 
															+        pytest.fail("Could not find repository ID in info output")
														
 
															+
														
 
															+    # Check cache directory for files cache files
														
 
															+    cache_dir = os.path.join(get_cache_dir(), repo_id)
														
 
															+    if not os.path.exists(cache_dir):
														
 
															+        pytest.skip("Cache directory does not exist, skipping test")
														
 
															+
														
 
															+    # Get initial files cache files
														
 
															+    try:
														
 
															+        initial_cache_files = set(discover_files_cache_names(cache_dir))
														
 
															+    except (FileNotFoundError, PermissionError):
														
 
															+        pytest.skip("Could not access cache directory, skipping test")
														
 
															+
														
 
															+    # Get expected cache files for remaining archives
														
 
															+    expected_cache_files = {files_cache_name(name) for name in ["archive1", "archive2", "archive3"]}
														
 
															+    assert expected_cache_files == initial_cache_files, "Unexpected cache files found"
														
 
															+
														
 
															+    # Delete one archive
														
 
															+    cmd(archiver, "delete", "-a", "archive2")
														
 
															+
														
 
															+    # Run compact
														
 
															+    output = cmd(archiver, "compact", "-v")
														
 
															+    assert "Cleaning up files cache" in output
														
 
															+
														
 
															+    # Check that files cache for deleted archive is removed
														
 
															+    try:
														
 
															+        remaining_cache_files = set(discover_files_cache_names(cache_dir))
														
 
															+    except (FileNotFoundError, PermissionError):
														
 
															+        pytest.fail("Could not access cache directory after compact")
														
 
															+
														
 
															+    # Get expected cache files for remaining archives
														
 
															+    expected_cache_files = {files_cache_name(name) for name in ["archive1", "archive3"]}
														
 
															+    assert expected_cache_files == remaining_cache_files, "Unexpected cache files found"