Przeglądaj źródła

Merge pull request #763 from enkore/issue-761

fix links failing for extracting subtrees, fixes #761
TW 9 lat temu
rodzic
commit
77dfcbc31d
4 zmienionych plików z 109 dodań i 20 usunięć
  1. 48 18
      borg/archive.py
  2. 15 2
      borg/archiver.py
  3. 3 0
      borg/helpers.py
  4. 43 0
      borg/testsuite/archiver.py

+ 48 - 18
borg/archive.py

@@ -298,7 +298,19 @@ Number of files: {0.stats.nfiles}'''.format(
         cache.rollback()
         cache.rollback()
         return stats
         return stats
 
 
-    def extract_item(self, item, restore_attrs=True, dry_run=False, stdout=False, sparse=False):
+    def extract_item(self, item, restore_attrs=True, dry_run=False, stdout=False, sparse=False,
+                     hardlink_masters=None, original_path=None):
+        """
+        Extract archive item.
+
+        :param item: the item to extract
+        :param restore_attrs: restore file attributes
+        :param dry_run: do not write any data
+        :param stdout: write extracted data to stdout
+        :param sparse: write sparse files (chunk-granularity, independent of the original being sparse)
+        :param hardlink_masters: maps paths to (chunks, link_target) for extracting subtrees with hardlinks correctly
+        :param original_path: b'path' key as stored in archive
+        """
         if dry_run or stdout:
         if dry_run or stdout:
             if b'chunks' in item:
             if b'chunks' in item:
                 for data in self.pipeline.fetch_many([c[0] for c in item[b'chunks']], is_preloaded=True):
                 for data in self.pipeline.fetch_many([c[0] for c in item[b'chunks']], is_preloaded=True):
@@ -308,6 +320,7 @@ Number of files: {0.stats.nfiles}'''.format(
                     sys.stdout.buffer.flush()
                     sys.stdout.buffer.flush()
             return
             return
 
 
+        original_path = original_path or item[b'path']
         dest = self.cwd
         dest = self.cwd
         if item[b'path'].startswith('/') or item[b'path'].startswith('..'):
         if item[b'path'].startswith('/') or item[b'path'].startswith('..'):
             raise Exception('Path should be relative and local')
             raise Exception('Path should be relative and local')
@@ -327,25 +340,36 @@ Number of files: {0.stats.nfiles}'''.format(
         if stat.S_ISREG(mode):
         if stat.S_ISREG(mode):
             if not os.path.exists(os.path.dirname(path)):
             if not os.path.exists(os.path.dirname(path)):
                 os.makedirs(os.path.dirname(path))
                 os.makedirs(os.path.dirname(path))
+
             # Hard link?
             # Hard link?
             if b'source' in item:
             if b'source' in item:
                 source = os.path.join(dest, item[b'source'])
                 source = os.path.join(dest, item[b'source'])
                 if os.path.exists(path):
                 if os.path.exists(path):
                     os.unlink(path)
                     os.unlink(path)
-                os.link(source, path)
-            else:
-                with open(path, 'wb') as fd:
-                    ids = [c[0] for c in item[b'chunks']]
-                    for data in self.pipeline.fetch_many(ids, is_preloaded=True):
-                        if sparse and self.zeros.startswith(data):
-                            # all-zero chunk: create a hole in a sparse file
-                            fd.seek(len(data), 1)
-                        else:
-                            fd.write(data)
-                    pos = fd.tell()
-                    fd.truncate(pos)
-                    fd.flush()
-                    self.restore_attrs(path, item, fd=fd.fileno())
+                if not hardlink_masters:
+                    os.link(source, path)
+                    return
+                item[b'chunks'], link_target = hardlink_masters[item[b'source']]
+                if link_target:
+                    # Hard link was extracted previously, just link
+                    os.link(link_target, path)
+                    return
+                # Extract chunks, since the item which had the chunks was not extracted
+            with open(path, 'wb') as fd:
+                ids = [c[0] for c in item[b'chunks']]
+                for data in self.pipeline.fetch_many(ids, is_preloaded=True):
+                    if sparse and self.zeros.startswith(data):
+                        # all-zero chunk: create a hole in a sparse file
+                        fd.seek(len(data), 1)
+                    else:
+                        fd.write(data)
+                pos = fd.tell()
+                fd.truncate(pos)
+                fd.flush()
+                self.restore_attrs(path, item, fd=fd.fileno())
+            if hardlink_masters:
+                # Update master entry with extracted file path, so that following hardlinks don't extract twice.
+                hardlink_masters[item.get(b'source') or original_path] = (None, path)
         elif stat.S_ISDIR(mode):
         elif stat.S_ISDIR(mode):
             if not os.path.exists(path):
             if not os.path.exists(path):
                 os.makedirs(path)
                 os.makedirs(path)
@@ -527,7 +551,10 @@ Number of files: {0.stats.nfiles}'''.format(
             source = self.hard_links.get((st.st_ino, st.st_dev))
             source = self.hard_links.get((st.st_ino, st.st_dev))
             if (st.st_ino, st.st_dev) in self.hard_links:
             if (st.st_ino, st.st_dev) in self.hard_links:
                 item = self.stat_attrs(st, path)
                 item = self.stat_attrs(st, path)
-                item.update({b'path': safe_path, b'source': source})
+                item.update({
+                    b'path': safe_path,
+                    b'source': source,
+                })
                 self.add_item(item)
                 self.add_item(item)
                 status = 'h'  # regular file, hardlink (to already seen inodes)
                 status = 'h'  # regular file, hardlink (to already seen inodes)
                 return status
                 return status
@@ -549,7 +576,10 @@ Number of files: {0.stats.nfiles}'''.format(
                 status = 'U'  # regular file, unchanged
                 status = 'U'  # regular file, unchanged
         else:
         else:
             status = 'A'  # regular file, added
             status = 'A'  # regular file, added
-        item = {b'path': safe_path}
+        item = {
+            b'path': safe_path,
+            b'hardlink_master': st.st_nlink > 1,  # item is a hard link and has the chunks
+        }
         # Only chunkify the file if needed
         # Only chunkify the file if needed
         if chunks is None:
         if chunks is None:
             fh = Archive._open_rb(path)
             fh = Archive._open_rb(path)
@@ -587,7 +617,7 @@ Number of files: {0.stats.nfiles}'''.format(
 
 
 
 
 # this set must be kept complete, otherwise the RobustUnpacker might malfunction:
 # this set must be kept complete, otherwise the RobustUnpacker might malfunction:
-ITEM_KEYS = set([b'path', b'source', b'rdev', b'chunks',
+ITEM_KEYS = set([b'path', b'source', b'rdev', b'chunks', b'hardlink_master',
                  b'mode', b'user', b'group', b'uid', b'gid', b'mtime', b'atime', b'ctime',
                  b'mode', b'user', b'group', b'uid', b'gid', b'mtime', b'atime', b'ctime',
                  b'xattrs', b'bsdflags', b'acl_nfs4', b'acl_access', b'acl_default', b'acl_extended', ])
                  b'xattrs', b'bsdflags', b'acl_nfs4', b'acl_access', b'acl_default', b'acl_extended', ])
 
 

+ 15 - 2
borg/archiver.py

@@ -353,8 +353,20 @@ class Archiver:
         sparse = args.sparse
         sparse = args.sparse
         strip_components = args.strip_components
         strip_components = args.strip_components
         dirs = []
         dirs = []
-        for item in archive.iter_items(lambda item: matcher.match(item[b'path']), preload=True):
+        partial_extract = not matcher.empty() or strip_components
+        hardlink_masters = {} if partial_extract else None
+
+        def item_is_hardlink_master(item):
+            return (partial_extract and stat.S_ISREG(item[b'mode']) and
+                    item.get(b'hardlink_master', True) and b'source' not in item)
+
+        for item in archive.iter_items(preload=True,
+                filter=lambda item: item_is_hardlink_master(item) or matcher.match(item[b'path'])):
             orig_path = item[b'path']
             orig_path = item[b'path']
+            if item_is_hardlink_master(item):
+                hardlink_masters[orig_path] = (item.get(b'chunks'), item.get(b'source'))
+            if not matcher.match(item[b'path']):
+                continue
             if strip_components:
             if strip_components:
                 item[b'path'] = os.sep.join(orig_path.split(os.sep)[strip_components:])
                 item[b'path'] = os.sep.join(orig_path.split(os.sep)[strip_components:])
                 if not item[b'path']:
                 if not item[b'path']:
@@ -372,7 +384,8 @@ class Archiver:
                         dirs.append(item)
                         dirs.append(item)
                         archive.extract_item(item, restore_attrs=False)
                         archive.extract_item(item, restore_attrs=False)
                     else:
                     else:
-                        archive.extract_item(item, stdout=stdout, sparse=sparse)
+                        archive.extract_item(item, stdout=stdout, sparse=sparse, hardlink_masters=hardlink_masters,
+                                             original_path=orig_path)
             except OSError as e:
             except OSError as e:
                 self.print_warning('%s: %s', remove_surrogates(orig_path), e)
                 self.print_warning('%s: %s', remove_surrogates(orig_path), e)
 
 

+ 3 - 0
borg/helpers.py

@@ -293,6 +293,9 @@ class PatternMatcher:
         # Value to return from match function when none of the patterns match.
         # Value to return from match function when none of the patterns match.
         self.fallback = fallback
         self.fallback = fallback
 
 
+    def empty(self):
+        return not len(self._items)
+
     def add(self, patterns, value):
     def add(self, patterns, value):
         """Add list of patterns to internal list. The given value is returned from the match function when one of the
         """Add list of patterns to internal list. The given value is returned from the match function when one of the
         given patterns matches.
         given patterns matches.

+ 43 - 0
borg/testsuite/archiver.py

@@ -467,6 +467,49 @@ class ArchiverTestCase(ArchiverTestCaseBase):
             with self.assert_creates_file('input/dir/file'):
             with self.assert_creates_file('input/dir/file'):
                 self.cmd('extract', self.repository_location + '::test', '--strip-components', '0')
                 self.cmd('extract', self.repository_location + '::test', '--strip-components', '0')
 
 
+    def _extract_hardlinks_setup(self):
+        os.mkdir(os.path.join(self.input_path, 'dir1'))
+        os.mkdir(os.path.join(self.input_path, 'dir1/subdir'))
+
+        self.create_regular_file('source')
+        os.link(os.path.join(self.input_path, 'source'),
+                os.path.join(self.input_path, 'abba'))
+        os.link(os.path.join(self.input_path, 'source'),
+                os.path.join(self.input_path, 'dir1/hardlink'))
+        os.link(os.path.join(self.input_path, 'source'),
+                os.path.join(self.input_path, 'dir1/subdir/hardlink'))
+
+        self.create_regular_file('dir1/source2')
+        os.link(os.path.join(self.input_path, 'dir1/source2'),
+                os.path.join(self.input_path, 'dir1/aaaa'))
+
+        self.cmd('init', self.repository_location)
+        self.cmd('create', self.repository_location + '::test', 'input')
+
+    def test_strip_components_links(self):
+        self._extract_hardlinks_setup()
+        with changedir('output'):
+            self.cmd('extract', self.repository_location + '::test', '--strip-components', '2')
+            assert os.stat('hardlink').st_nlink == 2
+            assert os.stat('subdir/hardlink').st_nlink == 2
+            assert os.stat('aaaa').st_nlink == 2
+            assert os.stat('source2').st_nlink == 2
+        with changedir('output'):
+            self.cmd('extract', self.repository_location + '::test')
+            assert os.stat('input/dir1/hardlink').st_nlink == 4
+
+    def test_extract_hardlinks(self):
+        self._extract_hardlinks_setup()
+        with changedir('output'):
+            self.cmd('extract', self.repository_location + '::test', 'input/dir1')
+            assert os.stat('input/dir1/hardlink').st_nlink == 2
+            assert os.stat('input/dir1/subdir/hardlink').st_nlink == 2
+            assert os.stat('input/dir1/aaaa').st_nlink == 2
+            assert os.stat('input/dir1/source2').st_nlink == 2
+        with changedir('output'):
+            self.cmd('extract', self.repository_location + '::test')
+            assert os.stat('input/dir1/hardlink').st_nlink == 4
+
     def test_extract_include_exclude(self):
     def test_extract_include_exclude(self):
         self.cmd('init', self.repository_location)
         self.cmd('init', self.repository_location)
         self.create_regular_file('file1', size=1024 * 80)
         self.create_regular_file('file1', size=1024 * 80)