فهرست منبع

Merge pull request #5228 from ThomasWaldmann/fix-4911-1.1

hardlinked CACHEDIR.TAG issue
TW 5 سال پیش
والد
کامیت
c0fe7648eb
2فایلهای تغییر یافته به همراه34 افزوده شده و 11 حذف شده
  1. 20 11
      src/borg/archive.py
  2. 14 0
      src/borg/testsuite/archiver.py

+ 20 - 11
src/borg/archive.py

@@ -1842,24 +1842,33 @@ class ArchiveRecreater:
         matcher = self.matcher
         matcher = self.matcher
         tag_files = []
         tag_files = []
         tagged_dirs = []
         tagged_dirs = []
-        # build hardlink masters, but only for paths ending in CACHE_TAG_NAME, so we can read hard-linked TAGs
+
+        # to support reading hard-linked CACHEDIR.TAGs (aka CACHE_TAG_NAME), similar to hardlink_masters:
         cachedir_masters = {}
         cachedir_masters = {}
 
 
+        if self.exclude_caches:
+            # sadly, due to how CACHEDIR.TAG works (filename AND file [header] contents) and
+            # how borg deals with hardlinks (slave hardlinks referring back to master hardlinks),
+            # we need to pass over the archive collecting hardlink master paths.
+            # as seen in issue #4911, the master paths can have an arbitrary filenames,
+            # not just CACHEDIR.TAG.
+            for item in archive.iter_items(filter=lambda item: os.path.basename(item.path) == CACHE_TAG_NAME):
+                if stat.S_ISREG(item.mode) and 'chunks' not in item and 'source' in item:
+                    # this is a hardlink slave, referring back to its hardlink master (via item.source)
+                    cachedir_masters[item.source] = None  # we know the key (path), but not the value (item) yet
+
         for item in archive.iter_items(
         for item in archive.iter_items(
-                filter=lambda item: item.path.endswith(CACHE_TAG_NAME) or matcher.match(item.path)):
-            if item.path.endswith(CACHE_TAG_NAME):
+                filter=lambda item: os.path.basename(item.path) == CACHE_TAG_NAME or matcher.match(item.path)):
+            if self.exclude_caches and item.path in cachedir_masters:
                 cachedir_masters[item.path] = item
                 cachedir_masters[item.path] = item
             dir, tag_file = os.path.split(item.path)
             dir, tag_file = os.path.split(item.path)
             if tag_file in self.exclude_if_present:
             if tag_file in self.exclude_if_present:
                 exclude(dir, item)
                 exclude(dir, item)
-            if stat.S_ISREG(item.mode):
-                if self.exclude_caches and tag_file == CACHE_TAG_NAME:
-                    if 'chunks' in item:
-                        file = open_item(archive, item)
-                    else:
-                        file = open_item(archive, cachedir_masters[item.source])
-                    if file.read(len(CACHE_TAG_CONTENTS)).startswith(CACHE_TAG_CONTENTS):
-                        exclude(dir, item)
+            elif self.exclude_caches and tag_file == CACHE_TAG_NAME and stat.S_ISREG(item.mode):
+                content_item = item if 'chunks' in item else cachedir_masters[item.source]
+                file = open_item(archive, content_item)
+                if file.read(len(CACHE_TAG_CONTENTS)) == CACHE_TAG_CONTENTS:
+                    exclude(dir, item)
         matcher.add(tag_files, IECommand.Include)
         matcher.add(tag_files, IECommand.Include)
         matcher.add(tagged_dirs, IECommand.ExcludeNoRecurse)
         matcher.add(tagged_dirs, IECommand.ExcludeNoRecurse)
 
 

+ 14 - 0
src/borg/testsuite/archiver.py

@@ -1258,6 +1258,20 @@ class ArchiverTestCase(ArchiverTestCaseBase):
                  '--exclude-caches', '--keep-exclude-tags', self.repository_location + '::test')
                  '--exclude-caches', '--keep-exclude-tags', self.repository_location + '::test')
         self._assert_test_keep_tagged()
         self._assert_test_keep_tagged()
 
 
+    @pytest.mark.skipif(not are_hardlinks_supported(), reason='hardlinks not supported')
+    def test_recreate_hardlinked_tags(self):  # test for issue #4911
+        self.cmd('init', '--encryption=none', self.repository_location)
+        self.create_regular_file('file1', contents=CACHE_TAG_CONTENTS)  # "wrong" filename, but correct tag contents
+        os.mkdir(os.path.join(self.input_path, 'subdir'))  # to make sure the tag is encountered *after* file1
+        os.link(os.path.join(self.input_path, 'file1'),
+                os.path.join(self.input_path, 'subdir', CACHE_TAG_NAME))  # correct tag name, hardlink to file1
+        self.cmd('create', self.repository_location + '::test', 'input')
+        # in the "test" archive, we now have, in this order:
+        # - a regular file item for "file1"
+        # - a hardlink item for "CACHEDIR.TAG" referring back to file1 for its contents
+        self.cmd('recreate', '--exclude-caches', '--keep-exclude-tags', self.repository_location + '::test')
+        # if issue #4911 is present, the recreate will crash with a KeyError for "input/file1"
+
     @pytest.mark.skipif(not xattr.XATTR_FAKEROOT, reason='Linux capabilities test, requires fakeroot >= 1.20.2')
     @pytest.mark.skipif(not xattr.XATTR_FAKEROOT, reason='Linux capabilities test, requires fakeroot >= 1.20.2')
     def test_extract_capabilities(self):
     def test_extract_capabilities(self):
         fchown = os.fchown
         fchown = os.fchown