Kaynağa Gözat

Merge pull request #1526 from textshell/fix/preload-leak-hardlink

extract: When doing a partial restore don't leak prefetched chunks.
enkore 9 yıl önce
ebeveyn
işleme
47404dfde5

+ 7 - 5
src/borg/archive.py

@@ -161,11 +161,11 @@ class DownloadPipeline:
         for _, data in self.fetch_many(ids):
         for _, data in self.fetch_many(ids):
             unpacker.feed(data)
             unpacker.feed(data)
             items = [Item(internal_dict=item) for item in unpacker]
             items = [Item(internal_dict=item) for item in unpacker]
-            if filter:
-                items = [item for item in items if filter(item)]
             for item in items:
             for item in items:
                 if 'chunks' in item:
                 if 'chunks' in item:
                     item.chunks = [ChunkListEntry(*e) for e in item.chunks]
                     item.chunks = [ChunkListEntry(*e) for e in item.chunks]
+            if filter:
+                items = [item for item in items if filter(item)]
             if preload:
             if preload:
                 for item in items:
                 for item in items:
                     if 'chunks' in item:
                     if 'chunks' in item:
@@ -422,7 +422,7 @@ Number of files: {0.stats.nfiles}'''.format(
         return stats
         return stats
 
 
     def extract_item(self, item, restore_attrs=True, dry_run=False, stdout=False, sparse=False,
     def extract_item(self, item, restore_attrs=True, dry_run=False, stdout=False, sparse=False,
-                     hardlink_masters=None, original_path=None, pi=None):
+                     hardlink_masters=None, stripped_components=0, original_path=None, pi=None):
         """
         """
         Extract archive item.
         Extract archive item.
 
 
@@ -432,9 +432,11 @@ Number of files: {0.stats.nfiles}'''.format(
         :param stdout: write extracted data to stdout
         :param stdout: write extracted data to stdout
         :param sparse: write sparse files (chunk-granularity, independent of the original being sparse)
         :param sparse: write sparse files (chunk-granularity, independent of the original being sparse)
         :param hardlink_masters: maps paths to (chunks, link_target) for extracting subtrees with hardlinks correctly
         :param hardlink_masters: maps paths to (chunks, link_target) for extracting subtrees with hardlinks correctly
+        :param stripped_components: stripped leading path components to correct hard link extraction
         :param original_path: 'path' key as stored in archive
         :param original_path: 'path' key as stored in archive
         :param pi: ProgressIndicatorPercent (or similar) for file extraction progress (in bytes)
         :param pi: ProgressIndicatorPercent (or similar) for file extraction progress (in bytes)
         """
         """
+        hardlink_masters = hardlink_masters or {}
         has_damaged_chunks = 'chunks_healthy' in item
         has_damaged_chunks = 'chunks_healthy' in item
         if dry_run or stdout:
         if dry_run or stdout:
             if 'chunks' in item:
             if 'chunks' in item:
@@ -473,11 +475,11 @@ Number of files: {0.stats.nfiles}'''.format(
                     os.makedirs(os.path.dirname(path))
                     os.makedirs(os.path.dirname(path))
             # Hard link?
             # Hard link?
             if 'source' in item:
             if 'source' in item:
-                source = os.path.join(dest, item.source)
+                source = os.path.join(dest, *item.source.split(os.sep)[stripped_components:])
                 with backup_io():
                 with backup_io():
                     if os.path.exists(path):
                     if os.path.exists(path):
                         os.unlink(path)
                         os.unlink(path)
-                    if not hardlink_masters:
+                    if item.source not in hardlink_masters:
                         os.link(source, path)
                         os.link(source, path)
                         return
                         return
                 item.chunks, link_target = hardlink_masters[item.source]
                 item.chunks, link_target = hardlink_masters[item.source]

+ 14 - 15
src/borg/archiver.py

@@ -417,15 +417,17 @@ class Archiver:
         self.print_file_status(status, path)
         self.print_file_status(status, path)
 
 
     @staticmethod
     @staticmethod
-    def build_filter(matcher, is_hardlink_master, strip_components=0):
+    def build_filter(matcher, peek_and_store_hardlink_masters, strip_components):
         if strip_components:
         if strip_components:
             def item_filter(item):
             def item_filter(item):
-                return (is_hardlink_master(item) or
-                        matcher.match(item.path) and os.sep.join(item.path.split(os.sep)[strip_components:]))
+                matched = matcher.match(item.path) and os.sep.join(item.path.split(os.sep)[strip_components:])
+                peek_and_store_hardlink_masters(item, matched)
+                return matched
         else:
         else:
             def item_filter(item):
             def item_filter(item):
-                return (is_hardlink_master(item) or
-                        matcher.match(item.path))
+                matched = matcher.match(item.path)
+                peek_and_store_hardlink_masters(item, matched)
+                return matched
         return item_filter
         return item_filter
 
 
     @with_repository()
     @with_repository()
@@ -450,25 +452,22 @@ class Archiver:
         partial_extract = not matcher.empty() or strip_components
         partial_extract = not matcher.empty() or strip_components
         hardlink_masters = {} if partial_extract else None
         hardlink_masters = {} if partial_extract else None
 
 
-        def item_is_hardlink_master(item):
-            return (partial_extract and stat.S_ISREG(item.mode) and
-                    item.get('hardlink_master', True) and 'source' not in item)
+        def peek_and_store_hardlink_masters(item, matched):
+            if (partial_extract and not matched and stat.S_ISREG(item.mode) and
+                    item.get('hardlink_master', True) and 'source' not in item):
+                hardlink_masters[item.get('path')] = (item.get('chunks'), None)
 
 
-        filter = self.build_filter(matcher, item_is_hardlink_master, strip_components)
+        filter = self.build_filter(matcher, peek_and_store_hardlink_masters, strip_components)
         if progress:
         if progress:
             progress_logger = logging.getLogger(ProgressIndicatorPercent.LOGGER)
             progress_logger = logging.getLogger(ProgressIndicatorPercent.LOGGER)
             progress_logger.info('Calculating size')
             progress_logger.info('Calculating size')
-            extracted_size = sum(item.file_size() for item in archive.iter_items(filter))
+            extracted_size = sum(item.file_size(hardlink_masters) for item in archive.iter_items(filter))
             pi = ProgressIndicatorPercent(total=extracted_size, msg='Extracting files %5.1f%%', step=0.1)
             pi = ProgressIndicatorPercent(total=extracted_size, msg='Extracting files %5.1f%%', step=0.1)
         else:
         else:
             pi = None
             pi = None
 
 
         for item in archive.iter_items(filter, preload=True):
         for item in archive.iter_items(filter, preload=True):
             orig_path = item.path
             orig_path = item.path
-            if item_is_hardlink_master(item):
-                hardlink_masters[orig_path] = (item.get('chunks'), None)
-            if not matcher.match(item.path):
-                continue
             if strip_components:
             if strip_components:
                 item.path = os.sep.join(orig_path.split(os.sep)[strip_components:])
                 item.path = os.sep.join(orig_path.split(os.sep)[strip_components:])
             if not args.dry_run:
             if not args.dry_run:
@@ -489,7 +488,7 @@ class Archiver:
                         archive.extract_item(item, restore_attrs=False)
                         archive.extract_item(item, restore_attrs=False)
                     else:
                     else:
                         archive.extract_item(item, stdout=stdout, sparse=sparse, hardlink_masters=hardlink_masters,
                         archive.extract_item(item, stdout=stdout, sparse=sparse, hardlink_masters=hardlink_masters,
-                                             original_path=orig_path, pi=pi)
+                                             stripped_components=strip_components, original_path=orig_path, pi=pi)
             except BackupOSError as e:
             except BackupOSError as e:
                 self.print_warning('%s: %s', remove_surrogates(orig_path), e)
                 self.print_warning('%s: %s', remove_surrogates(orig_path), e)
 
 

+ 6 - 3
src/borg/item.py

@@ -157,10 +157,13 @@ class Item(PropDict):
 
 
     part = PropDict._make_property('part', int)
     part = PropDict._make_property('part', int)
 
 
-    def file_size(self):
-        if 'chunks' not in self:
+    def file_size(self, hardlink_masters=None):
+        hardlink_masters = hardlink_masters or {}
+        chunks, _ = hardlink_masters.get(self.get('source'), (None, None))
+        chunks = self.get('chunks', chunks)
+        if chunks is None:
             return 0
             return 0
-        return sum(chunk.size for chunk in self.chunks)
+        return sum(chunk.size for chunk in chunks)
 
 
 
 
 class EncryptedKey(PropDict):
 class EncryptedKey(PropDict):

+ 5 - 5
src/borg/testsuite/archiver.py

@@ -2204,25 +2204,25 @@ def test_compare_chunk_contents():
 
 
 class TestBuildFilter:
 class TestBuildFilter:
     @staticmethod
     @staticmethod
-    def item_is_hardlink_master(item):
-        return False
+    def peek_and_store_hardlink_masters(item, matched):
+        pass
 
 
     def test_basic(self):
     def test_basic(self):
         matcher = PatternMatcher()
         matcher = PatternMatcher()
         matcher.add([parse_pattern('included')], True)
         matcher.add([parse_pattern('included')], True)
-        filter = Archiver.build_filter(matcher, self.item_is_hardlink_master)
+        filter = Archiver.build_filter(matcher, self.peek_and_store_hardlink_masters, 0)
         assert filter(Item(path='included'))
         assert filter(Item(path='included'))
         assert filter(Item(path='included/file'))
         assert filter(Item(path='included/file'))
         assert not filter(Item(path='something else'))
         assert not filter(Item(path='something else'))
 
 
     def test_empty(self):
     def test_empty(self):
         matcher = PatternMatcher(fallback=True)
         matcher = PatternMatcher(fallback=True)
-        filter = Archiver.build_filter(matcher, self.item_is_hardlink_master)
+        filter = Archiver.build_filter(matcher, self.peek_and_store_hardlink_masters, 0)
         assert filter(Item(path='anything'))
         assert filter(Item(path='anything'))
 
 
     def test_strip_components(self):
     def test_strip_components(self):
         matcher = PatternMatcher(fallback=True)
         matcher = PatternMatcher(fallback=True)
-        filter = Archiver.build_filter(matcher, self.item_is_hardlink_master, strip_components=1)
+        filter = Archiver.build_filter(matcher, self.peek_and_store_hardlink_masters, strip_components=1)
         assert not filter(Item(path='shallow'))
         assert not filter(Item(path='shallow'))
         assert not filter(Item(path='shallow/'))  # can this even happen? paths are normalized...
         assert not filter(Item(path='shallow/'))  # can this even happen? paths are normalized...
         assert filter(Item(path='deep enough/file'))
         assert filter(Item(path='deep enough/file'))