瀏覽代碼

Merge pull request #1492 from enkore/c/textshell/leaky-preload

(cherry pick) Fix leak in borg-extract --strip-components
enkore 8 年之前
父節點
當前提交
b124ec648a
共有 4 個文件被更改,包括 69 次插入4 次删除
  1. 9 0
      borg/archive.py
  2. 12 3
      borg/archiver.py
  3. 2 0
      borg/remote.py
  4. 46 1
      borg/testsuite/archiver.py

+ 9 - 0
borg/archive.py

@@ -99,6 +99,15 @@ class DownloadPipeline:
         self.key = key
 
     def unpack_many(self, ids, filter=None, preload=False):
+        """
+        Return iterator of items.
+
+        *ids* is a chunk ID list of an item stream. *filter* is a callable
+        to decide whether an item will be yielded. *preload* preloads the data chunks of every yielded item.
+
+        Warning: if *preload* is True then all data chunks of every yielded item have to be retrieved,
+        otherwise preloaded chunks will accumulate in RemoteRepository and create a memory leak.
+        """
         unpacker = msgpack.Unpacker(use_list=False)
         for data in self.fetch_many(ids):
             unpacker.feed(data)

+ 12 - 3
borg/archiver.py

@@ -343,6 +343,16 @@ class Archiver:
                 status = '-'  # dry run, item was not backed up
         self.print_file_status(status, path)
 
+    @staticmethod
+    def build_filter(matcher, strip_components=0):
+        if strip_components:
+            def item_filter(item):
+                return matcher.match(item[b'path']) and os.sep.join(item[b'path'].split(os.sep)[strip_components:])
+        else:
+            def item_filter(item):
+                return matcher.match(item[b'path'])
+        return item_filter
+
     @with_repository()
     @with_archive
     def do_extract(self, args, repository, manifest, key, archive):
@@ -371,12 +381,11 @@ class Archiver:
         sparse = args.sparse
         strip_components = args.strip_components
         dirs = []
-        for item in archive.iter_items(lambda item: matcher.match(item[b'path']), preload=True):
+        filter = self.build_filter(matcher, strip_components)
+        for item in archive.iter_items(filter, preload=True):
             orig_path = item[b'path']
             if strip_components:
                 item[b'path'] = os.sep.join(orig_path.split(os.sep)[strip_components:])
-                if not item[b'path']:
-                    continue
             if not args.dry_run:
                 while dirs and not item[b'path'].startswith(dirs[-1][b'path']):
                     dir_item = dirs.pop(-1)

+ 2 - 0
borg/remote.py

@@ -203,6 +203,8 @@ This problem will go away as soon as the server has been upgraded to 1.0.7+.
             raise
 
     def __del__(self):
+        if len(self.responses):
+            logging.debug("still %d cached responses left in RemoteRepository" % (len(self.responses),))
         if self.p:
             self.close()
             assert False, "cleanup happened in Repository.__del__"

+ 46 - 1
borg/testsuite/archiver.py

@@ -21,7 +21,7 @@ from ..archive import Archive, ChunkBuffer, CHUNK_MAX_EXP, flags_noatime, flags_
 from ..archiver import Archiver
 from ..cache import Cache
 from ..crypto import bytes_to_long, num_aes_blocks
-from ..helpers import Manifest, EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR
+from ..helpers import Manifest, PatternMatcher, parse_pattern, EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR
 from ..remote import RemoteRepository, PathNotAllowed
 from ..repository import Repository
 from . import BaseTestCase, changedir, environment_variable
@@ -1327,6 +1327,28 @@ class RemoteArchiverTestCase(ArchiverTestCase):
     def test_debug_put_get_delete_obj(self):
         pass
 
+    def test_strip_components_doesnt_leak(self):
+        self.cmd('init', self.repository_location)
+        self.create_regular_file('dir/file', contents=b"test file contents 1")
+        self.create_regular_file('dir/file2', contents=b"test file contents 2")
+        self.create_regular_file('skipped-file1', contents=b"test file contents 3")
+        self.create_regular_file('skipped-file2', contents=b"test file contents 4")
+        self.create_regular_file('skipped-file3', contents=b"test file contents 5")
+        self.cmd('create', self.repository_location + '::test', 'input')
+        marker = 'cached responses left in RemoteRepository'
+        with changedir('output'):
+            res = self.cmd('extract', "--debug", self.repository_location + '::test', '--strip-components', '3')
+            self.assert_true(marker not in res)
+            with self.assert_creates_file('file'):
+                res = self.cmd('extract', "--debug", self.repository_location + '::test', '--strip-components', '2')
+                self.assert_true(marker not in res)
+            with self.assert_creates_file('dir/file'):
+                res = self.cmd('extract', "--debug", self.repository_location + '::test', '--strip-components', '1')
+                self.assert_true(marker not in res)
+            with self.assert_creates_file('input/dir/file'):
+                res = self.cmd('extract', "--debug", self.repository_location + '::test', '--strip-components', '0')
+                self.assert_true(marker not in res)
+
 
 def test_get_args():
     archiver = Archiver()
@@ -1347,3 +1369,26 @@ def test_get_args():
     args = archiver.get_args(['borg', 'serve', '--restrict-to-path=/p1', '--restrict-to-path=/p2', ],
                              'borg init /')
     assert args.func == archiver.do_serve
+
+
+class TestBuildFilter:
+    def test_basic(self):
+        matcher = PatternMatcher()
+        matcher.add([parse_pattern('included')], True)
+        filter = Archiver.build_filter(matcher)
+        assert filter({b'path': 'included'})
+        assert filter({b'path': 'included/file'})
+        assert not filter({b'path': 'something else'})
+
+    def test_empty(self):
+        matcher = PatternMatcher(fallback=True)
+        filter = Archiver.build_filter(matcher)
+        assert filter({b'path': 'anything'})
+
+    def test_strip_components(self):
+        matcher = PatternMatcher(fallback=True)
+        filter = Archiver.build_filter(matcher, strip_components=1)
+        assert not filter({b'path': 'shallow'})
+        assert not filter({b'path': 'shallow/'})  # can this even happen? paths are normalized...
+        assert filter({b'path': 'deep enough/file'})
+        assert filter({b'path': 'something/dir/file'})