浏览代码

Merge pull request #8849 from ThomasWaldmann/fd-based-dir-is-tagged

dir_is_tagged/_is_cachedir: add fd-based operations
TW 2 周之前
父节点
当前提交
b73af3642d
共有 3 个文件被更改,包括 169 次插入24 次删除
  1. 1 1
      src/borg/archiver/create_cmd.py
  2. 43 23
      src/borg/helpers/fs.py
  3. 125 0
      src/borg/testsuite/helpers_test.py

+ 1 - 1
src/borg/archiver/create_cmd.py

@@ -484,7 +484,7 @@ class CreateMixIn:
                         with backup_io("fstat"):
                             st = stat_update_check(st, os.fstat(child_fd))
                     if recurse:
-                        tag_names = dir_is_tagged(path, exclude_caches, exclude_if_present)
+                        tag_names = dir_is_tagged(path, exclude_caches, exclude_if_present, dir_fd=child_fd)
                         if tag_names:
                             # if we are already recursing in an excluded dir, we do not need to do anything else than
                             # returning (we do not need to archive or recurse into tagged directories), see #3991:

+ 43 - 23
src/borg/helpers/fs.py

@@ -178,40 +178,60 @@ def get_config_dir(*, legacy=False, create=True):
     return config_dir
 
 
-def dir_is_cachedir(path):
-    """Determines whether the specified path is a cache directory (and
+def dir_is_cachedir(path=None, dir_fd=None):
+    """Determines whether the specified directory is a cache directory (and
     therefore should potentially be excluded from the backup) according to
-    the CACHEDIR.TAG protocol
-    (http://www.bford.info/cachedir/spec.html).
-    """
+    the CACHEDIR.TAG protocol (http://www.bford.info/cachedir/spec.html).
 
-    tag_path = os.path.join(path, CACHE_TAG_NAME)
+    If dir_fd is provided, operations will be based on the directory file descriptor.
+    Otherwise (path is provided), operations will be based on the directory path.
+    """
+    tag_fd = None
     try:
-        if os.path.exists(tag_path):
-            with open(tag_path, "rb") as tag_file:
-                tag_data = tag_file.read(len(CACHE_TAG_CONTENTS))
-                if tag_data == CACHE_TAG_CONTENTS:
-                    return True
-    except OSError:
-        pass
-    return False
+        if dir_fd is not None:
+            tag_fd = os.open(CACHE_TAG_NAME, os.O_RDONLY, dir_fd=dir_fd)
+        else:
+            tag_fd = os.open(os.path.join(path, CACHE_TAG_NAME), os.O_RDONLY)
+        return os.read(tag_fd, len(CACHE_TAG_CONTENTS)) == CACHE_TAG_CONTENTS
+    except (FileNotFoundError, OSError):
+        return False
+    finally:
+        if tag_fd is not None:
+            os.close(tag_fd)
 
 
-def dir_is_tagged(path, exclude_caches, exclude_if_present):
+def dir_is_tagged(path=None, exclude_caches=None, exclude_if_present=None, dir_fd=None):
     """Determines whether the specified path is excluded by being a cache
     directory or containing user-specified tag files/directories. Returns a
     list of the names of the tag files/directories (either CACHEDIR.TAG or the
     matching user-specified files/directories).
+
+    If dir_fd is provided, operations will be based on the directory file descriptor.
+    Otherwise (path is provided), operations will be based on the directory path.
     """
-    # TODO: do operations based on the directory fd
     tag_names = []
-    if exclude_caches and dir_is_cachedir(path):
-        tag_names.append(CACHE_TAG_NAME)
-    if exclude_if_present is not None:
-        for tag in exclude_if_present:
-            tag_path = os.path.join(path, tag)
-            if os.path.exists(tag_path):
-                tag_names.append(tag)
+
+    if dir_fd is not None:
+        # Use file descriptor-based operations
+        if exclude_caches and dir_is_cachedir(dir_fd=dir_fd):
+            tag_names.append(CACHE_TAG_NAME)
+        if exclude_if_present is not None:
+            for tag in exclude_if_present:
+                try:
+                    os.stat(tag, dir_fd=dir_fd)
+                    tag_names.append(tag)
+                except FileNotFoundError:
+                    pass
+    else:
+        # Use path-based operations (for backward compatibility)
+        if exclude_caches and dir_is_cachedir(path=path):
+            tag_names.append(CACHE_TAG_NAME)
+        if exclude_if_present is not None:
+            for tag in exclude_if_present:
+                tag_path = os.path.join(path, tag)
+                if os.path.exists(tag_path):
+                    tag_names.append(tag)
+
     return tag_names
 
 

+ 125 - 0
src/borg/testsuite/helpers_test.py

@@ -6,6 +6,7 @@ import os
 import shutil
 import sys
 from argparse import ArgumentTypeError
+from contextlib import contextmanager
 from datetime import datetime, timezone, timedelta
 from io import StringIO, BytesIO
 
@@ -14,6 +15,8 @@ import pytest
 from ..archiver.prune_cmd import prune_within, prune_split
 from .. import platform
 from ..constants import *  # NOQA
+from ..constants import CACHE_TAG_NAME, CACHE_TAG_CONTENTS
+from ..helpers.fs import dir_is_tagged
 from ..helpers import Location
 from ..helpers import Buffer
 from ..helpers import (
@@ -1519,3 +1522,125 @@ def test_ec_invalid():
 )
 def test_max_ec(ec1, ec2, ec_max):
     assert max_ec(ec1, ec2) == ec_max
+
+
+def test_dir_is_tagged(tmpdir):
+    """Test dir_is_tagged with both path-based and file descriptor-based operations."""
+
+    @contextmanager
+    def open_dir(path):
+        fd = os.open(path, os.O_RDONLY)
+        try:
+            yield fd
+        finally:
+            os.close(fd)
+
+    # Create directories for testing exclude_caches
+    cache_dir = tmpdir.mkdir("cache_dir")
+    cache_tag_path = cache_dir.join(CACHE_TAG_NAME)
+    cache_tag_path.write_binary(CACHE_TAG_CONTENTS)
+
+    invalid_cache_dir = tmpdir.mkdir("invalid_cache_dir")
+    invalid_cache_tag_path = invalid_cache_dir.join(CACHE_TAG_NAME)
+    invalid_cache_tag_path.write_binary(b"invalid signature")
+
+    # Create directories for testing exclude_if_present
+    tagged_dir = tmpdir.mkdir("tagged_dir")
+    tag_file = tagged_dir.join(".NOBACKUP")
+    tag_file.write("test")
+
+    other_tagged_dir = tmpdir.mkdir("other_tagged_dir")
+    other_tag_file = other_tagged_dir.join(".DONOTBACKUP")
+    other_tag_file.write("test")
+
+    # Create a directory with both a CACHEDIR.TAG and a custom tag file
+    both_dir = tmpdir.mkdir("both_dir")
+    cache_tag_path = both_dir.join(CACHE_TAG_NAME)
+    cache_tag_path.write_binary(CACHE_TAG_CONTENTS)
+    custom_tag_path = both_dir.join(".NOBACKUP")
+    custom_tag_path.write("test")
+
+    # Create a directory without any tag files
+    normal_dir = tmpdir.mkdir("normal_dir")
+
+    # Test edge cases
+    test_dir = tmpdir.mkdir("test_dir")
+    assert dir_is_tagged(path=str(test_dir), exclude_caches=None, exclude_if_present=None) == []
+    assert dir_is_tagged(path=str(test_dir), exclude_if_present=[]) == []
+
+    # Test with non-existent directory (should not raise an exception)
+    non_existent_dir = str(tmpdir.join("non_existent"))
+    result = dir_is_tagged(path=non_existent_dir, exclude_caches=True, exclude_if_present=[".NOBACKUP"])
+    assert result == []
+
+    # Test 1: exclude_caches with path-based operations
+    assert dir_is_tagged(path=str(cache_dir), exclude_caches=True) == [CACHE_TAG_NAME]
+    assert dir_is_tagged(path=str(invalid_cache_dir), exclude_caches=True) == []
+    assert dir_is_tagged(path=str(normal_dir), exclude_caches=True) == []
+
+    assert dir_is_tagged(path=str(cache_dir), exclude_caches=False) == []
+    assert dir_is_tagged(path=str(invalid_cache_dir), exclude_caches=False) == []
+    assert dir_is_tagged(path=str(normal_dir), exclude_caches=False) == []
+
+    # Test 2: exclude_caches with file-descriptor-based operations
+    with open_dir(str(cache_dir)) as fd:
+        assert dir_is_tagged(dir_fd=fd, exclude_caches=True) == [CACHE_TAG_NAME]
+    with open_dir(str(invalid_cache_dir)) as fd:
+        assert dir_is_tagged(dir_fd=fd, exclude_caches=True) == []
+    with open_dir(str(normal_dir)) as fd:
+        assert dir_is_tagged(dir_fd=fd, exclude_caches=True) == []
+
+    with open_dir(str(cache_dir)) as fd:
+        assert dir_is_tagged(dir_fd=fd, exclude_caches=False) == []
+    with open_dir(str(invalid_cache_dir)) as fd:
+        assert dir_is_tagged(dir_fd=fd, exclude_caches=False) == []
+    with open_dir(str(normal_dir)) as fd:
+        assert dir_is_tagged(dir_fd=fd, exclude_caches=False) == []
+
+    # Test 3: exclude_if_present with path-based operations
+    tags = [".NOBACKUP"]
+    assert dir_is_tagged(path=str(tagged_dir), exclude_if_present=tags) == [".NOBACKUP"]
+    assert dir_is_tagged(path=str(other_tagged_dir), exclude_if_present=tags) == []
+    assert dir_is_tagged(path=str(normal_dir), exclude_if_present=tags) == []
+
+    tags = [".NOBACKUP", ".DONOTBACKUP"]
+    assert dir_is_tagged(path=str(tagged_dir), exclude_if_present=tags) == [".NOBACKUP"]
+    assert dir_is_tagged(path=str(other_tagged_dir), exclude_if_present=tags) == [".DONOTBACKUP"]
+    assert dir_is_tagged(path=str(normal_dir), exclude_if_present=tags) == []
+
+    # Test 4: exclude_if_present with file descriptor-based operations
+    tags = [".NOBACKUP"]
+    with open_dir(str(tagged_dir)) as fd:
+        assert dir_is_tagged(dir_fd=fd, exclude_if_present=tags) == [".NOBACKUP"]
+    with open_dir(str(other_tagged_dir)) as fd:
+        assert dir_is_tagged(dir_fd=fd, exclude_if_present=tags) == []
+    with open_dir(str(normal_dir)) as fd:
+        assert dir_is_tagged(dir_fd=fd, exclude_if_present=tags) == []
+
+    tags = [".NOBACKUP", ".DONOTBACKUP"]
+    with open_dir(str(tagged_dir)) as fd:
+        assert dir_is_tagged(dir_fd=fd, exclude_if_present=tags) == [".NOBACKUP"]
+    with open_dir(str(other_tagged_dir)) as fd:
+        assert dir_is_tagged(dir_fd=fd, exclude_if_present=tags) == [".DONOTBACKUP"]
+    with open_dir(str(normal_dir)) as fd:
+        assert dir_is_tagged(dir_fd=fd, exclude_if_present=tags) == []
+
+    # Test 5: both exclude types with path-based operations
+    assert sorted(dir_is_tagged(path=str(both_dir), exclude_caches=True, exclude_if_present=[".NOBACKUP"])) == [
+        ".NOBACKUP",
+        CACHE_TAG_NAME,
+    ]
+    assert dir_is_tagged(path=str(cache_dir), exclude_caches=True, exclude_if_present=[".NOBACKUP"]) == [CACHE_TAG_NAME]
+    assert dir_is_tagged(path=str(tagged_dir), exclude_caches=True, exclude_if_present=[".NOBACKUP"]) == [".NOBACKUP"]
+    assert dir_is_tagged(path=str(normal_dir), exclude_caches=True, exclude_if_present=[".NOBACKUP"]) == []
+
+    # Test 6: both exclude types with file descriptor-based operations
+    with open_dir(str(both_dir)) as fd:
+        result = dir_is_tagged(dir_fd=fd, exclude_caches=True, exclude_if_present=[".NOBACKUP"])
+        assert sorted(result) == [".NOBACKUP", CACHE_TAG_NAME]
+    with open_dir(str(cache_dir)) as fd:
+        assert dir_is_tagged(dir_fd=fd, exclude_caches=True, exclude_if_present=[".NOBACKUP"]) == [CACHE_TAG_NAME]
+    with open_dir(str(tagged_dir)) as fd:
+        assert dir_is_tagged(dir_fd=fd, exclude_caches=True, exclude_if_present=[".NOBACKUP"]) == [".NOBACKUP"]
+    with open_dir(str(normal_dir)) as fd:
+        assert dir_is_tagged(dir_fd=fd, exclude_caches=True, exclude_if_present=[".NOBACKUP"]) == []