6 years ago · 839e92fc4b
--- a/src/borg/archive.py
+++ b/src/borg/archive.py
@@ -248,7 +248,7 @@ class DownloadPipeline:
 
				         self.repository = repository
			
 
				         self.key = key
			
 
				 
			
 
				-    def unpack_many(self, ids, filter=None, preload=False):
			
 
				+    def unpack_many(self, ids, filter=None, partial_extract=False, preload=False, hardlink_masters=None):
			
 
				         """
			
 
				         Return iterator of items.
			
 
				 
			
@@ -265,12 +265,40 @@ class DownloadPipeline:
 
				             for item in items:
			
 
				                 if 'chunks' in item:
			
 
				                     item.chunks = [ChunkListEntry(*e) for e in item.chunks]
			
 
				+
			
 
				+            def preload(chunks):
			
 
				+                self.repository.preload([c.id for c in chunks])
			
 
				+
			
 
				             if filter:
			
 
				                 items = [item for item in items if filter(item)]
			
 
				+
			
 
				             if preload:
			
 
				-                for item in items:
			
 
				-                    if 'chunks' in item:
			
 
				-                        self.repository.preload([c.id for c in item.chunks])
			
 
				+                if filter and partial_extract:
			
 
				+                    # if we do only a partial extraction, it gets a bit
			
 
				+                    # complicated with computing the preload items: if a hardlink master item is not
			
 
				+                    # selected (== not extracted), we will still need to preload its chunks if a
			
 
				+                    # corresponding hardlink slave is selected (== is extracted).
			
 
				+                    # due to a side effect of the filter() call, we now have hardlink_masters dict populated.
			
 
				+                    masters_preloaded = set()
			
 
				+                    for item in items:
			
 
				+                        if 'chunks' in item:  # regular file, maybe a hardlink master
			
 
				+                            preload(item.chunks)
			
 
				+                            # if this is a hardlink master, remember that we already preloaded it:
			
 
				+                            if 'source' not in item and hardlinkable(item.mode) and item.get('hardlink_master', True):
			
 
				+                                masters_preloaded.add(item.path)
			
 
				+                        elif 'source' in item and hardlinkable(item.mode):  # hardlink slave
			
 
				+                            source = item.source
			
 
				+                            if source not in masters_preloaded:
			
 
				+                                # we only need to preload *once* (for the 1st selected slave)
			
 
				+                                chunks, _ = hardlink_masters[source]
			
 
				+                                preload(chunks)
			
 
				+                                masters_preloaded.add(source)
			
 
				+                else:
			
 
				+                    # easy: we do not have a filter, thus all items are selected, thus we need to preload all chunks.
			
 
				+                    for item in items:
			
 
				+                        if 'chunks' in item:
			
 
				+                            preload(item.chunks)
			
 
				+
			
 
				             for item in items:
			
 
				                 yield item
			
 
				 
			
@@ -486,8 +514,10 @@ Utilization of max. archive size: {csize_max:.0%}
 
				             return False
			
 
				         return filter(item) if filter else True
			
 
				 
			
 
				-    def iter_items(self, filter=None, preload=False):
			
 
				-        for item in self.pipeline.unpack_many(self.metadata.items, preload=preload,
			
 
				+    def iter_items(self, filter=None, partial_extract=False, preload=False, hardlink_masters=None):
			
 
				+        assert not (filter and partial_extract and preload) or hardlink_masters is not None
			
 
				+        for item in self.pipeline.unpack_many(self.metadata.items, partial_extract=partial_extract,
			
 
				+                                              preload=preload, hardlink_masters=hardlink_masters,
			
 
				                                               filter=lambda item: self.item_filter(item, filter)):
			
 
				             yield item
			
 
				 
			
--- a/src/borg/archiver.py
+++ b/src/borg/archiver.py
@@ -769,7 +769,8 @@ class Archiver:
 
				         else:
			
 
				             pi = None
			
 
				 
			
 
				-        for item in archive.iter_items(filter, preload=True):
			
 
				+        for item in archive.iter_items(filter, partial_extract=partial_extract,
			
 
				+                                       preload=True, hardlink_masters=hardlink_masters):
			
 
				             orig_path = item.path
			
 
				             if strip_components:
			
 
				                 item.path = os.sep.join(orig_path.split(os.sep)[strip_components:])
			
@@ -1011,7 +1012,7 @@ class Archiver:
 
				                 return None, stream
			
 
				             return tarinfo, stream
			
 
				 
			
 
				-        for item in archive.iter_items(filter, preload=True):
			
 
				+        for item in archive.iter_items(filter, preload=True, hardlink_masters=hardlink_masters):
			
 
				             orig_path = item.path
			
 
				             if strip_components:
			
 
				                 item.path = os.sep.join(orig_path.split(os.sep)[strip_components:])
			
--- a/src/borg/testsuite/archiver.py
+++ b/src/borg/testsuite/archiver.py
@@ -824,7 +824,18 @@ class ArchiverTestCase(ArchiverTestCaseBase):
 
				             assert open('input/dir1/subdir/hardlink', 'rb').read() == b'123456'
			
 
				 
			
 
				     @requires_hardlinks
			
 
				-    def test_extract_hardlinks(self):
			
 
				+    def test_extract_hardlinks1(self):
			
 
				+        self._extract_hardlinks_setup()
			
 
				+        with changedir('output'):
			
 
				+            self.cmd('extract', self.repository_location + '::test')
			
 
				+            assert os.stat('input/source').st_nlink == 4
			
 
				+            assert os.stat('input/abba').st_nlink == 4
			
 
				+            assert os.stat('input/dir1/hardlink').st_nlink == 4
			
 
				+            assert os.stat('input/dir1/subdir/hardlink').st_nlink == 4
			
 
				+            assert open('input/dir1/subdir/hardlink', 'rb').read() == b'123456'
			
 
				+
			
 
				+    @requires_hardlinks
			
 
				+    def test_extract_hardlinks2(self):
			
 
				         self._extract_hardlinks_setup()
			
 
				         with changedir('output'):
			
 
				             self.cmd('extract', self.repository_location + '::test', '--strip-components', '2')
			
@@ -840,13 +851,6 @@ class ArchiverTestCase(ArchiverTestCaseBase):
 
				             assert open('input/dir1/subdir/hardlink', 'rb').read() == b'123456'
			
 
				             assert os.stat('input/dir1/aaaa').st_nlink == 2
			
 
				             assert os.stat('input/dir1/source2').st_nlink == 2
			
 
				-        with changedir('output'):
			
 
				-            self.cmd('extract', self.repository_location + '::test')
			
 
				-            assert os.stat('input/source').st_nlink == 4
			
 
				-            assert os.stat('input/abba').st_nlink == 4
			
 
				-            assert os.stat('input/dir1/hardlink').st_nlink == 4
			
 
				-            assert os.stat('input/dir1/subdir/hardlink').st_nlink == 4
			
 
				-            assert open('input/dir1/subdir/hardlink', 'rb').read() == b'123456'
			
 
				 
			
 
				     def test_extract_include_exclude(self):
			
 
				         self.cmd('init', '--encryption=repokey', self.repository_location)