Parcourir la source

borg mount: support hardlinks correctly, add tests

previous commit did not yet support hardlinks correctly, if the
hardlink master was excluded somehow.

added some tests for this, also refactored related tests slightly.
Thomas Waldmann il y a 7 ans
Parent
commit
e97deafb16
2 fichiers modifiés avec 94 ajouts et 37 suppressions
  1. 48 27
      src/borg/fuse.py
  2. 46 10
      src/borg/testsuite/archiver.py

+ 48 - 27
src/borg/fuse.py

@@ -297,8 +297,15 @@ class FuseBackend(object):
                           consider_part_files=self._args.consider_part_files)
                           consider_part_files=self._args.consider_part_files)
         strip_components = self._args.strip_components
         strip_components = self._args.strip_components
         matcher = Archiver.build_matcher(self._args.patterns, self._args.paths)
         matcher = Archiver.build_matcher(self._args.patterns, self._args.paths)
-        dummy = lambda x, y: None  # TODO: add hardlink_master support code, see Archiver
-        filter = Archiver.build_filter(matcher, dummy, strip_components)
+        partial_extract = not matcher.empty() or strip_components
+        hardlink_masters = {} if partial_extract else None
+
+        def peek_and_store_hardlink_masters(item, matched):
+            if (partial_extract and not matched and hardlinkable(item.mode) and
+                    item.get('hardlink_master', True) and 'source' not in item):
+                hardlink_masters[item.get('path')] = (item.get('chunks'), None)
+
+        filter = Archiver.build_filter(matcher, peek_and_store_hardlink_masters, strip_components)
         for item_inode, item in self.cache.iter_archive_items(archive.metadata.items, filter=filter):
         for item_inode, item in self.cache.iter_archive_items(archive.metadata.items, filter=filter):
             if strip_components:
             if strip_components:
                 item.path = os.sep.join(item.path.split(os.sep)[strip_components:])
                 item.path = os.sep.join(item.path.split(os.sep)[strip_components:])
@@ -319,11 +326,16 @@ class FuseBackend(object):
             parent = 1
             parent = 1
             for segment in segments[:-1]:
             for segment in segments[:-1]:
                 parent = self._process_inner(segment, parent)
                 parent = self._process_inner(segment, parent)
-            self._process_leaf(segments[-1], item, parent, prefix, is_dir, item_inode)
+            self._process_leaf(segments[-1], item, parent, prefix, is_dir, item_inode,
+                               hardlink_masters, strip_components)
         duration = time.perf_counter() - t0
         duration = time.perf_counter() - t0
         logger.debug('fuse: _process_archive completed in %.1f s for archive %s', duration, archive.name)
         logger.debug('fuse: _process_archive completed in %.1f s for archive %s', duration, archive.name)
 
 
-    def _process_leaf(self, name, item, parent, prefix, is_dir, item_inode):
+    def _process_leaf(self, name, item, parent, prefix, is_dir, item_inode, hardlink_masters, stripped_components):
+        path = item.path
+        del item.path  # save some space
+        hardlink_masters = hardlink_masters or {}
+
         def file_version(item, path):
         def file_version(item, path):
             if 'chunks' in item:
             if 'chunks' in item:
                 file_id = blake2b_128(path)
                 file_id = blake2b_128(path)
@@ -348,35 +360,44 @@ class FuseBackend(object):
             version_enc = os.fsencode('.%05d' % version)
             version_enc = os.fsencode('.%05d' % version)
             return name + version_enc + ext
             return name + version_enc + ext
 
 
+        if 'source' in item and hardlinkable(item.mode):
+            source = os.path.join(*item.source.split(os.sep)[stripped_components:])
+            chunks, link_target = hardlink_masters.get(item.source, (None, source))
+            if link_target:
+                # Hard link was extracted previously, just link
+                link_target = os.fsencode(link_target)
+                if self.versions:
+                    # adjust link target name with version
+                    version = self.file_versions[link_target]
+                    link_target = make_versioned_name(link_target, version, add_dir=True)
+                try:
+                    inode = self.find_inode(link_target, prefix)
+                except KeyError:
+                    logger.warning('Skipping broken hard link: %s -> %s', path, source)
+                    return
+                item = self.get_item(inode)
+                item.nlink = item.get('nlink', 1) + 1
+                self._items[inode] = item
+            elif chunks is not None:
+                # assign chunks to this item, since the item which had the chunks was not extracted
+                item.chunks = chunks
+                inode = item_inode
+                self._items[inode] = item
+                if hardlink_masters:
+                    # Update master entry with extracted item path, so that following hardlinks don't extract twice.
+                    hardlink_masters[item.source] = (None, path)
+        else:
+            inode = item_inode
+
         if self.versions and not is_dir:
         if self.versions and not is_dir:
             parent = self._process_inner(name, parent)
             parent = self._process_inner(name, parent)
-            path = os.fsencode(item.path)
-            version = file_version(item, path)
+            enc_path = os.fsencode(path)
+            version = file_version(item, enc_path)
             if version is not None:
             if version is not None:
                 # regular file, with contents - maybe a hardlink master
                 # regular file, with contents - maybe a hardlink master
                 name = make_versioned_name(name, version)
                 name = make_versioned_name(name, version)
-                self.file_versions[path] = version
+                self.file_versions[enc_path] = version
 
 
-        path = item.path
-        del item.path  # save some space
-        if 'source' in item and hardlinkable(item.mode):
-            # a hardlink, no contents, <source> is the hardlink master
-            source = os.fsencode(item.source)
-            if self.versions:
-                # adjust source name with version
-                version = self.file_versions[source]
-                source = make_versioned_name(source, version, add_dir=True)
-                name = make_versioned_name(name, version)
-            try:
-                inode = self.find_inode(source, prefix)
-            except KeyError:
-                logger.warning('Skipping broken hard link: %s -> %s', path, item.source)
-                return
-            item = self.cache.get(inode)
-            item.nlink = item.get('nlink', 1) + 1
-            self._items[inode] = item
-        else:
-            inode = item_inode
         self.parent[inode] = parent
         self.parent[inode] = parent
         if name:
         if name:
             self.contents[parent][name] = inode
             self.contents[parent][name] = inode

+ 46 - 10
src/borg/testsuite/archiver.py

@@ -760,7 +760,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         os.mkdir(os.path.join(self.input_path, 'dir1'))
         os.mkdir(os.path.join(self.input_path, 'dir1'))
         os.mkdir(os.path.join(self.input_path, 'dir1/subdir'))
         os.mkdir(os.path.join(self.input_path, 'dir1/subdir'))
 
 
-        self.create_regular_file('source')
+        self.create_regular_file('source', contents=b'123456')
         os.link(os.path.join(self.input_path, 'source'),
         os.link(os.path.join(self.input_path, 'source'),
                 os.path.join(self.input_path, 'abba'))
                 os.path.join(self.input_path, 'abba'))
         os.link(os.path.join(self.input_path, 'source'),
         os.link(os.path.join(self.input_path, 'source'),
@@ -778,30 +778,56 @@ class ArchiverTestCase(ArchiverTestCaseBase):
     requires_hardlinks = pytest.mark.skipif(not are_hardlinks_supported(), reason='hardlinks not supported')
     requires_hardlinks = pytest.mark.skipif(not are_hardlinks_supported(), reason='hardlinks not supported')
 
 
     @requires_hardlinks
     @requires_hardlinks
-    def test_strip_components_links(self):
+    @unittest.skipUnless(has_llfuse, 'llfuse not installed')
+    def test_mount_hardlinks(self):
         self._extract_hardlinks_setup()
         self._extract_hardlinks_setup()
-        with changedir('output'):
-            self.cmd('extract', self.repository_location + '::test', '--strip-components', '2')
+        mountpoint = os.path.join(self.tmpdir, 'mountpoint')
+        with self.fuse_mount(self.repository_location + '::test', mountpoint, '--strip-components=2'), \
+             changedir(mountpoint):
             assert os.stat('hardlink').st_nlink == 2
             assert os.stat('hardlink').st_nlink == 2
             assert os.stat('subdir/hardlink').st_nlink == 2
             assert os.stat('subdir/hardlink').st_nlink == 2
+            assert open('subdir/hardlink', 'rb').read() == b'123456'
             assert os.stat('aaaa').st_nlink == 2
             assert os.stat('aaaa').st_nlink == 2
             assert os.stat('source2').st_nlink == 2
             assert os.stat('source2').st_nlink == 2
-        with changedir('output'):
-            self.cmd('extract', self.repository_location + '::test')
+        with self.fuse_mount(self.repository_location + '::test', mountpoint, 'input/dir1'), \
+             changedir(mountpoint):
+            assert os.stat('input/dir1/hardlink').st_nlink == 2
+            assert os.stat('input/dir1/subdir/hardlink').st_nlink == 2
+            assert open('input/dir1/subdir/hardlink', 'rb').read() == b'123456'
+            assert os.stat('input/dir1/aaaa').st_nlink == 2
+            assert os.stat('input/dir1/source2').st_nlink == 2
+        with self.fuse_mount(self.repository_location + '::test', mountpoint), \
+             changedir(mountpoint):
+            assert os.stat('input/source').st_nlink == 4
+            assert os.stat('input/abba').st_nlink == 4
             assert os.stat('input/dir1/hardlink').st_nlink == 4
             assert os.stat('input/dir1/hardlink').st_nlink == 4
+            assert os.stat('input/dir1/subdir/hardlink').st_nlink == 4
+            assert open('input/dir1/subdir/hardlink', 'rb').read() == b'123456'
 
 
     @requires_hardlinks
     @requires_hardlinks
     def test_extract_hardlinks(self):
     def test_extract_hardlinks(self):
         self._extract_hardlinks_setup()
         self._extract_hardlinks_setup()
+        with changedir('output'):
+            self.cmd('extract', self.repository_location + '::test', '--strip-components', '2')
+            assert os.stat('hardlink').st_nlink == 2
+            assert os.stat('subdir/hardlink').st_nlink == 2
+            assert open('subdir/hardlink', 'rb').read() == b'123456'
+            assert os.stat('aaaa').st_nlink == 2
+            assert os.stat('source2').st_nlink == 2
         with changedir('output'):
         with changedir('output'):
             self.cmd('extract', self.repository_location + '::test', 'input/dir1')
             self.cmd('extract', self.repository_location + '::test', 'input/dir1')
             assert os.stat('input/dir1/hardlink').st_nlink == 2
             assert os.stat('input/dir1/hardlink').st_nlink == 2
             assert os.stat('input/dir1/subdir/hardlink').st_nlink == 2
             assert os.stat('input/dir1/subdir/hardlink').st_nlink == 2
+            assert open('input/dir1/subdir/hardlink', 'rb').read() == b'123456'
             assert os.stat('input/dir1/aaaa').st_nlink == 2
             assert os.stat('input/dir1/aaaa').st_nlink == 2
             assert os.stat('input/dir1/source2').st_nlink == 2
             assert os.stat('input/dir1/source2').st_nlink == 2
         with changedir('output'):
         with changedir('output'):
             self.cmd('extract', self.repository_location + '::test')
             self.cmd('extract', self.repository_location + '::test')
+            assert os.stat('input/source').st_nlink == 4
+            assert os.stat('input/abba').st_nlink == 4
             assert os.stat('input/dir1/hardlink').st_nlink == 4
             assert os.stat('input/dir1/hardlink').st_nlink == 4
+            assert os.stat('input/dir1/subdir/hardlink').st_nlink == 4
+            assert open('input/dir1/subdir/hardlink', 'rb').read() == b'123456'
 
 
     def test_extract_include_exclude(self):
     def test_extract_include_exclude(self):
         self.cmd('init', '--encryption=repokey', self.repository_location)
         self.cmd('init', '--encryption=repokey', self.repository_location)
@@ -2182,8 +2208,9 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         self.cmd('init', '--encryption=repokey', self.repository_location)
         self.cmd('init', '--encryption=repokey', self.repository_location)
         self.create_regular_file('test', contents=b'first')
         self.create_regular_file('test', contents=b'first')
         if are_hardlinks_supported():
         if are_hardlinks_supported():
-            self.create_regular_file('hardlink1', contents=b'')
+            self.create_regular_file('hardlink1', contents=b'123456')
             os.link('input/hardlink1', 'input/hardlink2')
             os.link('input/hardlink1', 'input/hardlink2')
+            os.link('input/hardlink1', 'input/hardlink3')
         self.cmd('create', self.repository_location + '::archive1', 'input')
         self.cmd('create', self.repository_location + '::archive1', 'input')
         self.create_regular_file('test', contents=b'second')
         self.create_regular_file('test', contents=b'second')
         self.cmd('create', self.repository_location + '::archive2', 'input')
         self.cmd('create', self.repository_location + '::archive2', 'input')
@@ -2195,9 +2222,18 @@ class ArchiverTestCase(ArchiverTestCaseBase):
             assert all(f.startswith('test.') for f in files)  # ... with files test.xxxxx in there
             assert all(f.startswith('test.') for f in files)  # ... with files test.xxxxx in there
             assert {b'first', b'second'} == {open(os.path.join(path, f), 'rb').read() for f in files}
             assert {b'first', b'second'} == {open(os.path.join(path, f), 'rb').read() for f in files}
             if are_hardlinks_supported():
             if are_hardlinks_supported():
-                st1 = os.stat(os.path.join(mountpoint, 'input', 'hardlink1', 'hardlink1.00001'))
-                st2 = os.stat(os.path.join(mountpoint, 'input', 'hardlink2', 'hardlink2.00001'))
-                assert st1.st_ino == st2.st_ino
+                hl1 = os.path.join(mountpoint, 'input', 'hardlink1', 'hardlink1.00001')
+                hl2 = os.path.join(mountpoint, 'input', 'hardlink2', 'hardlink2.00001')
+                hl3 = os.path.join(mountpoint, 'input', 'hardlink3', 'hardlink3.00001')
+                assert os.stat(hl1).st_ino == os.stat(hl2).st_ino == os.stat(hl3).st_ino
+                assert open(hl3, 'rb').read() == b'123456'
+        # similar again, but exclude the hardlink master:
+        with self.fuse_mount(self.repository_location, mountpoint, '-o', 'versions', '-e', 'input/hardlink1'):
+            if are_hardlinks_supported():
+                hl2 = os.path.join(mountpoint, 'input', 'hardlink2', 'hardlink2.00001')
+                hl3 = os.path.join(mountpoint, 'input', 'hardlink3', 'hardlink3.00001')
+                assert os.stat(hl2).st_ino == os.stat(hl3).st_ino
+                assert open(hl3, 'rb').read() == b'123456'
 
 
     @unittest.skipUnless(has_llfuse, 'llfuse not installed')
     @unittest.skipUnless(has_llfuse, 'llfuse not installed')
     def test_fuse_allow_damaged_files(self):
     def test_fuse_allow_damaged_files(self):