Przeglądaj źródła

borg mount: support hardlinks correctly, add tests

previous commit did not yet support hardlinks correctly, if the
hardlink master was excluded somehow.

added some tests for this, also refactored related tests slightly.

(cherry picked from commit e97deafb16ac44450b542e4e23d57556631f647b)
Thomas Waldmann 7 lat temu
rodzic
commit
0e07647576
2 zmienionych plików z 94 dodań i 37 usunięć
  1. 48 27
      src/borg/fuse.py
  2. 46 10
      src/borg/testsuite/archiver.py

+ 48 - 27
src/borg/fuse.py

@@ -333,8 +333,15 @@ class FuseOperations(llfuse.Operations):
                           consider_part_files=self.args.consider_part_files)
         strip_components = self.args.strip_components
         matcher = Archiver.build_matcher(self.args.patterns, self.args.paths)
-        dummy = lambda x, y: None  # TODO: add hardlink_master support code, see Archiver
-        filter = Archiver.build_filter(matcher, dummy, strip_components)
+        partial_extract = not matcher.empty() or strip_components
+        hardlink_masters = {} if partial_extract else None
+
+        def peek_and_store_hardlink_masters(item, matched):
+            if (partial_extract and not matched and hardlinkable(item.mode) and
+                    item.get('hardlink_master', True) and 'source' not in item):
+                hardlink_masters[item.get('path')] = (item.get('chunks'), None)
+
+        filter = Archiver.build_filter(matcher, peek_and_store_hardlink_masters, strip_components)
         for item_inode, item in self.cache.iter_archive_items(archive.metadata.items, filter=filter):
             if strip_components:
                 item.path = os.sep.join(item.path.split(os.sep)[strip_components:])
@@ -355,11 +362,16 @@ class FuseOperations(llfuse.Operations):
             parent = 1
             for segment in segments[:-1]:
                 parent = self.process_inner(segment, parent)
-            self.process_leaf(segments[-1], item, parent, prefix, is_dir, item_inode)
+            self.process_leaf(segments[-1], item, parent, prefix, is_dir, item_inode,
+                              hardlink_masters, strip_components)
         duration = time.perf_counter() - t0
         logger.debug('fuse: process_archive completed in %.1f s for archive %s', duration, archive.name)
 
-    def process_leaf(self, name, item, parent, prefix, is_dir, item_inode):
+    def process_leaf(self, name, item, parent, prefix, is_dir, item_inode, hardlink_masters, stripped_components):
+        path = item.path
+        del item.path  # save some space
+        hardlink_masters = hardlink_masters or {}
+
         def file_version(item, path):
             if 'chunks' in item:
                 file_id = blake2b_128(path)
@@ -384,35 +396,44 @@ class FuseOperations(llfuse.Operations):
             version_enc = os.fsencode('.%05d' % version)
             return name + version_enc + ext
 
+        if 'source' in item and hardlinkable(item.mode):
+            source = os.path.join(*item.source.split(os.sep)[stripped_components:])
+            chunks, link_target = hardlink_masters.get(item.source, (None, source))
+            if link_target:
+                # Hard link was extracted previously, just link
+                link_target = os.fsencode(link_target)
+                if self.versions:
+                    # adjust link target name with version
+                    version = self.file_versions[link_target]
+                    link_target = make_versioned_name(link_target, version, add_dir=True)
+                try:
+                    inode = self._find_inode(link_target, prefix)
+                except KeyError:
+                    logger.warning('Skipping broken hard link: %s -> %s', path, source)
+                    return
+                item = self.get_item(inode)
+                item.nlink = item.get('nlink', 1) + 1
+                self.items[inode] = item
+            elif chunks is not None:
+                # assign chunks to this item, since the item which had the chunks was not extracted
+                item.chunks = chunks
+                inode = item_inode
+                self.items[inode] = item
+                if hardlink_masters:
+                    # Update master entry with extracted item path, so that following hardlinks don't extract twice.
+                    hardlink_masters[item.source] = (None, path)
+        else:
+            inode = item_inode
+
         if self.versions and not is_dir:
             parent = self.process_inner(name, parent)
-            path = os.fsencode(item.path)
-            version = file_version(item, path)
+            enc_path = os.fsencode(path)
+            version = file_version(item, enc_path)
             if version is not None:
                 # regular file, with contents - maybe a hardlink master
                 name = make_versioned_name(name, version)
-                self.file_versions[path] = version
+                self.file_versions[enc_path] = version
 
-        path = item.path
-        del item.path  # save some space
-        if 'source' in item and hardlinkable(item.mode):
-            # a hardlink, no contents, <source> is the hardlink master
-            source = os.fsencode(item.source)
-            if self.versions:
-                # adjust source name with version
-                version = self.file_versions[source]
-                source = make_versioned_name(source, version, add_dir=True)
-                name = make_versioned_name(name, version)
-            try:
-                inode = self._find_inode(source, prefix)
-            except KeyError:
-                logger.warning('Skipping broken hard link: %s -> %s', path, item.source)
-                return
-            item = self.cache.get(inode)
-            item.nlink = item.get('nlink', 1) + 1
-            self.items[inode] = item
-        else:
-            inode = item_inode
         self.parent[inode] = parent
         if name:
             self.contents[parent][name] = inode

+ 46 - 10
src/borg/testsuite/archiver.py

@@ -759,7 +759,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         os.mkdir(os.path.join(self.input_path, 'dir1'))
         os.mkdir(os.path.join(self.input_path, 'dir1/subdir'))
 
-        self.create_regular_file('source')
+        self.create_regular_file('source', contents=b'123456')
         os.link(os.path.join(self.input_path, 'source'),
                 os.path.join(self.input_path, 'abba'))
         os.link(os.path.join(self.input_path, 'source'),
@@ -777,30 +777,56 @@ class ArchiverTestCase(ArchiverTestCaseBase):
     requires_hardlinks = pytest.mark.skipif(not are_hardlinks_supported(), reason='hardlinks not supported')
 
     @requires_hardlinks
-    def test_strip_components_links(self):
+    @unittest.skipUnless(has_llfuse, 'llfuse not installed')
+    def test_mount_hardlinks(self):
         self._extract_hardlinks_setup()
-        with changedir('output'):
-            self.cmd('extract', self.repository_location + '::test', '--strip-components', '2')
+        mountpoint = os.path.join(self.tmpdir, 'mountpoint')
+        with self.fuse_mount(self.repository_location + '::test', mountpoint, '--strip-components=2'), \
+             changedir(mountpoint):
             assert os.stat('hardlink').st_nlink == 2
             assert os.stat('subdir/hardlink').st_nlink == 2
+            assert open('subdir/hardlink', 'rb').read() == b'123456'
             assert os.stat('aaaa').st_nlink == 2
             assert os.stat('source2').st_nlink == 2
-        with changedir('output'):
-            self.cmd('extract', self.repository_location + '::test')
+        with self.fuse_mount(self.repository_location + '::test', mountpoint, 'input/dir1'), \
+             changedir(mountpoint):
+            assert os.stat('input/dir1/hardlink').st_nlink == 2
+            assert os.stat('input/dir1/subdir/hardlink').st_nlink == 2
+            assert open('input/dir1/subdir/hardlink', 'rb').read() == b'123456'
+            assert os.stat('input/dir1/aaaa').st_nlink == 2
+            assert os.stat('input/dir1/source2').st_nlink == 2
+        with self.fuse_mount(self.repository_location + '::test', mountpoint), \
+             changedir(mountpoint):
+            assert os.stat('input/source').st_nlink == 4
+            assert os.stat('input/abba').st_nlink == 4
             assert os.stat('input/dir1/hardlink').st_nlink == 4
+            assert os.stat('input/dir1/subdir/hardlink').st_nlink == 4
+            assert open('input/dir1/subdir/hardlink', 'rb').read() == b'123456'
 
     @requires_hardlinks
     def test_extract_hardlinks(self):
         self._extract_hardlinks_setup()
+        with changedir('output'):
+            self.cmd('extract', self.repository_location + '::test', '--strip-components', '2')
+            assert os.stat('hardlink').st_nlink == 2
+            assert os.stat('subdir/hardlink').st_nlink == 2
+            assert open('subdir/hardlink', 'rb').read() == b'123456'
+            assert os.stat('aaaa').st_nlink == 2
+            assert os.stat('source2').st_nlink == 2
         with changedir('output'):
             self.cmd('extract', self.repository_location + '::test', 'input/dir1')
             assert os.stat('input/dir1/hardlink').st_nlink == 2
             assert os.stat('input/dir1/subdir/hardlink').st_nlink == 2
+            assert open('input/dir1/subdir/hardlink', 'rb').read() == b'123456'
             assert os.stat('input/dir1/aaaa').st_nlink == 2
             assert os.stat('input/dir1/source2').st_nlink == 2
         with changedir('output'):
             self.cmd('extract', self.repository_location + '::test')
+            assert os.stat('input/source').st_nlink == 4
+            assert os.stat('input/abba').st_nlink == 4
             assert os.stat('input/dir1/hardlink').st_nlink == 4
+            assert os.stat('input/dir1/subdir/hardlink').st_nlink == 4
+            assert open('input/dir1/subdir/hardlink', 'rb').read() == b'123456'
 
     def test_extract_include_exclude(self):
         self.cmd('init', '--encryption=repokey', self.repository_location)
@@ -2182,8 +2208,9 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         self.cmd('init', '--encryption=repokey', self.repository_location)
         self.create_regular_file('test', contents=b'first')
         if are_hardlinks_supported():
-            self.create_regular_file('hardlink1', contents=b'')
+            self.create_regular_file('hardlink1', contents=b'123456')
             os.link('input/hardlink1', 'input/hardlink2')
+            os.link('input/hardlink1', 'input/hardlink3')
         self.cmd('create', self.repository_location + '::archive1', 'input')
         self.create_regular_file('test', contents=b'second')
         self.cmd('create', self.repository_location + '::archive2', 'input')
@@ -2195,9 +2222,18 @@ class ArchiverTestCase(ArchiverTestCaseBase):
             assert all(f.startswith('test.') for f in files)  # ... with files test.xxxxx in there
             assert {b'first', b'second'} == {open(os.path.join(path, f), 'rb').read() for f in files}
             if are_hardlinks_supported():
-                st1 = os.stat(os.path.join(mountpoint, 'input', 'hardlink1', 'hardlink1.00001'))
-                st2 = os.stat(os.path.join(mountpoint, 'input', 'hardlink2', 'hardlink2.00001'))
-                assert st1.st_ino == st2.st_ino
+                hl1 = os.path.join(mountpoint, 'input', 'hardlink1', 'hardlink1.00001')
+                hl2 = os.path.join(mountpoint, 'input', 'hardlink2', 'hardlink2.00001')
+                hl3 = os.path.join(mountpoint, 'input', 'hardlink3', 'hardlink3.00001')
+                assert os.stat(hl1).st_ino == os.stat(hl2).st_ino == os.stat(hl3).st_ino
+                assert open(hl3, 'rb').read() == b'123456'
+        # similar again, but exclude the hardlink master:
+        with self.fuse_mount(self.repository_location, mountpoint, '-o', 'versions', '-e', 'input/hardlink1'):
+            if are_hardlinks_supported():
+                hl2 = os.path.join(mountpoint, 'input', 'hardlink2', 'hardlink2.00001')
+                hl3 = os.path.join(mountpoint, 'input', 'hardlink3', 'hardlink3.00001')
+                assert os.stat(hl2).st_ino == os.stat(hl3).st_ino
+                assert open(hl3, 'rb').read() == b'123456'
 
     @unittest.skipUnless(has_llfuse, 'llfuse not installed')
     def test_fuse_allow_damaged_files(self):