Prechádzať zdrojové kódy

Merge branch '1.0-maint'

Thomas Waldmann 9 rokov pred
rodič
commit
c355f3617a

+ 44 - 3
docs/changes.rst

@@ -123,12 +123,53 @@ Other changes:
   - ChunkBuffer: add test for leaving partial chunk in buffer, fixes #945
 
 
-Version 1.0.7 (not released yet)
---------------------------------
+Version 1.0.7 (2016-08-19)
+--------------------------
 
 Security fixes:
 
-- fix security issue with remote repository access, #1428
+- borg serve: fix security issue with remote repository access, #1428
+  If you used e.g. --restrict-to-path /path/client1/ (with or without trailing
+  slash does not make a difference), it acted like a path prefix match using
+  /path/client1 (note the missing trailing slash) - the code then also allowed
+  working in e.g. /path/client13 or /path/client1000.
+
+  As this could accidentally lead to major security/privacy issues depending on
+  the pathes you use, the behaviour was changed to be a strict directory match.
+  That means --restrict-to-path /path/client1 (with or without trailing slash
+  does not make a difference) now uses /path/client1/ internally (note the
+  trailing slash here!) for matching and allows precisely that path AND any
+  path below it. So, /path/client1 is allowed, /path/client1/repo1 is allowed,
+  but not /path/client13 or /path/client1000.
+
+  If you willingly used the undocumented (dangerous) previous behaviour, you
+  may need to rearrange your --restrict-to-path pathes now. We are sorry if
+  that causes work for you, but we did not want a potentially dangerous
+  behaviour in the software (not even using a for-backwards-compat option).
+
+Bug fixes:
+
+- fixed repeated LockTimeout exceptions when borg serve tried to write into
+  a already write-locked repo (e.g. by a borg mount), #502 part b)
+  This was solved by the fix for #1220 in 1.0.7rc1 already.
+- fix cosmetics + file leftover for "not a valid borg repository", #1490
+- Cache: release lock if cache is invalid, #1501
+- borg extract --strip-components: fix leak of preloaded chunk contents
+- Repository, when a InvalidRepository exception happens:
+
+  - fix spurious, empty lock.roster
+  - fix repo not closed cleanly
+
+New features:
+
+- implement borg debug-info, fixes #1122
+  (just calls already existing code via cli, same output as below tracebacks)
+
+Other changes:
+
+- skip the O_NOATIME test on GNU Hurd, fixes #1315
+  (this is a very minor issue and the GNU Hurd project knows the bug)
+- document using a clean repo to test / build the release
 
 
 Version 1.0.7rc2 (2016-08-13)

+ 8 - 0
docs/development.rst

@@ -192,6 +192,14 @@ Checklist:
 
     git tag -s -m "tagged/signed release X.Y.Z" X.Y.Z
 
+- create a clean repo and use it for the following steps::
+
+    git clone borg borg-clean
+
+  This makes sure no uncommitted files get into the release archive.
+  It also will find if you forgot to commit something that is needed.
+  It also makes sure the vagrant machines only get committed files and
+  do a fresh start based on that.
 - run tox and/or binary builds on all supported platforms via vagrant,
   check for test failures
 - create a release on PyPi::

+ 9 - 0
src/borg/archive.py

@@ -148,6 +148,15 @@ class DownloadPipeline:
         self.key = key
 
     def unpack_many(self, ids, filter=None, preload=False):
+        """
+        Return iterator of items.
+
+        *ids* is a chunk ID list of an item stream. *filter* is a callable
+        to decide whether an item will be yielded. *preload* preloads the data chunks of every yielded item.
+
+        Warning: if *preload* is True then all data chunks of every yielded item have to be retrieved,
+        otherwise preloaded chunks will accumulate in RemoteRepository and create a memory leak.
+        """
         unpacker = msgpack.Unpacker(use_list=False)
         for _, data in self.fetch_many(ids):
             unpacker.feed(data)

+ 31 - 4
src/borg/archiver.py

@@ -415,6 +415,18 @@ class Archiver:
                 status = '-'  # dry run, item was not backed up
         self.print_file_status(status, path)
 
+    @staticmethod
+    def build_filter(matcher, is_hardlink_master, strip_components=0):
+        if strip_components:
+            def item_filter(item):
+                return (is_hardlink_master(item) or
+                        matcher.match(item.path) and os.sep.join(item.path.split(os.sep)[strip_components:]))
+        else:
+            def item_filter(item):
+                return (is_hardlink_master(item) or
+                        matcher.match(item.path))
+        return item_filter
+
     @with_repository()
     @with_archive
     def do_extract(self, args, repository, manifest, key, archive):
@@ -440,8 +452,8 @@ class Archiver:
             return (partial_extract and stat.S_ISREG(item.mode) and
                     item.get('hardlink_master', True) and 'source' not in item)
 
-        for item in archive.iter_items(preload=True,
-                filter=lambda item: item_is_hardlink_master(item) or matcher.match(item.path)):
+        filter = self.build_filter(matcher, item_is_hardlink_master, strip_components)
+        for item in archive.iter_items(filter, preload=True):
             orig_path = item.path
             if item_is_hardlink_master(item):
                 hardlink_masters[orig_path] = (item.get('chunks'), None)
@@ -449,8 +461,6 @@ class Archiver:
                 continue
             if strip_components:
                 item.path = os.sep.join(orig_path.split(os.sep)[strip_components:])
-                if not item.path:
-                    continue
             if not args.dry_run:
                 while dirs and not item.path.startswith(dirs[-1].path):
                     dir_item = dirs.pop(-1)
@@ -1004,6 +1014,11 @@ class Archiver:
         finally:
             repository.rollback()
 
+    def do_debug_info(self, args):
+        """display system information for debugging / bug reports"""
+        print(sysinfo())
+        return EXIT_SUCCESS
+
     @with_repository()
     def do_debug_dump_archive_items(self, args, repository, manifest, key):
         """dump (decrypted, decompressed) archive items metadata (not: data)"""
@@ -2164,6 +2179,18 @@ class Archiver:
         subparser.add_argument('topic', metavar='TOPIC', type=str, nargs='?',
                                help='additional help on TOPIC')
 
+        debug_info_epilog = textwrap.dedent("""
+        This command displays some system information that might be useful for bug
+        reports and debugging problems. If a traceback happens, this information is
+        already appended at the end of the traceback.
+        """)
+        subparser = subparsers.add_parser('debug-info', parents=[common_parser], add_help=False,
+                                          description=self.do_debug_info.__doc__,
+                                          epilog=debug_info_epilog,
+                                          formatter_class=argparse.RawDescriptionHelpFormatter,
+                                          help='show system infos for debugging / bug reports (debug)')
+        subparser.set_defaults(func=self.do_debug_info)
+
         debug_dump_archive_items_epilog = textwrap.dedent("""
         This command dumps raw (but decrypted and decompressed) archive items (only metadata) to files.
         """)

+ 2 - 0
src/borg/cache.py

@@ -157,9 +157,11 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
             cache_version = self.config.getint('cache', 'version')
             wanted_version = 1
             if cache_version != wanted_version:
+                self.close()
                 raise Exception('%s has unexpected cache version %d (wanted: %d).' % (
                     config_path, cache_version, wanted_version))
         except configparser.NoSectionError:
+            self.close()
             raise Exception('%s does not look like a Borg cache.' % config_path) from None
         self.id = self.config.get('cache', 'repository')
         self.manifest_id = unhexlify(self.config.get('cache', 'manifest'))

+ 7 - 0
src/borg/locking.py

@@ -201,6 +201,9 @@ class LockRoster:
         roster = self.load()
         return set(tuple(e) for e in roster.get(key, []))
 
+    def empty(self, *keys):
+        return all(not self.get(key) for key in keys)
+
     def modify(self, key, op):
         roster = self.load()
         try:
@@ -293,10 +296,14 @@ class Lock:
     def release(self):
         if self.is_exclusive:
             self._roster.modify(EXCLUSIVE, REMOVE)
+            if self._roster.empty(EXCLUSIVE, SHARED):
+                self._roster.remove()
             self._lock.release()
         else:
             with self._lock:
                 self._roster.modify(SHARED, REMOVE)
+                if self._roster.empty(EXCLUSIVE, SHARED):
+                    self._roster.remove()
 
     def upgrade(self):
         # WARNING: if multiple read-lockers want to upgrade, it will deadlock because they

+ 8 - 1
src/borg/remote.py

@@ -124,8 +124,13 @@ class RepositoryServer:  # pragma: no cover
             path = os.path.join(get_home_dir(), path[2:])
         path = os.path.realpath(path)
         if self.restrict_to_paths:
+            # if --restrict-to-path P is given, we make sure that we only operate in/below path P.
+            # for the prefix check, it is important that the compared pathes both have trailing slashes,
+            # so that a path /foobar will NOT be accepted with --restrict-to-path /foo option.
+            path_with_sep = os.path.join(path, '')  # make sure there is a trailing slash (os.sep)
             for restrict_to_path in self.restrict_to_paths:
-                if path.startswith(os.path.realpath(restrict_to_path)):
+                restrict_to_path_with_sep = os.path.join(os.path.realpath(restrict_to_path), '')  # trailing slash
+                if path_with_sep.startswith(restrict_to_path_with_sep):
                     break
             else:
                 raise PathNotAllowed(path)
@@ -207,6 +212,8 @@ This problem will go away as soon as the server has been upgraded to 1.0.7+.
             raise
 
     def __del__(self):
+        if len(self.responses):
+            logging.debug("still %d cached responses left in RemoteRepository" % (len(self.responses),))
         if self.p:
             self.close()
             assert False, "cleanup happened in Repository.__del__"

+ 1 - 0
src/borg/repository.py

@@ -239,6 +239,7 @@ class Repository:
         self.config = ConfigParser(interpolation=None)
         self.config.read(os.path.join(self.path, 'config'))
         if 'repository' not in self.config.sections() or self.config.getint('repository', 'version') != 1:
+            self.close()
             raise self.InvalidRepository(path)
         self.max_segment_size = self.config.getint('repository', 'max_segment_size')
         self.segments_per_dir = self.config.getint('repository', 'segments_per_dir')

+ 65 - 2
src/borg/testsuite/archiver.py

@@ -28,9 +28,11 @@ from ..archiver import Archiver
 from ..cache import Cache
 from ..constants import *  # NOQA
 from ..crypto import bytes_to_long, num_aes_blocks
+from ..helpers import PatternMatcher, parse_pattern
 from ..helpers import Chunk, Manifest
 from ..helpers import EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR
 from ..helpers import bin_to_hex
+from ..item import Item
 from ..key import KeyfileKeyBase
 from ..remote import RemoteRepository, PathNotAllowed
 from ..repository import Repository
@@ -403,6 +405,9 @@ class ArchiverTestCase(ArchiverTestCaseBase):
             self.cmd('extract', self.repository_location + '::test')
             assert os.readlink('input/link1') == 'somewhere'
 
+    # Search for O_NOATIME there: https://www.gnu.org/software/hurd/contributing.html - we just
+    # skip the test on Hurd, it is not critical anyway, just testing a performance optimization.
+    @pytest.mark.skipif(sys.platform == 'gnu0', reason="O_NOATIME is strangely broken on GNU Hurd")
     @pytest.mark.skipif(not is_utime_fully_supported(), reason='cannot properly setup and execute test without utime')
     def test_atime(self):
         def has_noatime(some_file):
@@ -1928,12 +1933,21 @@ class RemoteArchiverTestCase(ArchiverTestCase):
     prefix = '__testsuite__:'
 
     def test_remote_repo_restrict_to_path(self):
-        self.cmd('init', self.repository_location)
-        path_prefix = os.path.dirname(self.repository_path)
+        # restricted to repo directory itself:
+        with patch.object(RemoteRepository, 'extra_test_args', ['--restrict-to-path', self.repository_path]):
+            self.cmd('init', self.repository_location)
+        # restricted to repo directory itself, fail for other directories with same prefix:
+        with patch.object(RemoteRepository, 'extra_test_args', ['--restrict-to-path', self.repository_path]):
+            self.assert_raises(PathNotAllowed, lambda: self.cmd('init', self.repository_location + '_0'))
+
+        # restricted to a completely different path:
         with patch.object(RemoteRepository, 'extra_test_args', ['--restrict-to-path', '/foo']):
             self.assert_raises(PathNotAllowed, lambda: self.cmd('init', self.repository_location + '_1'))
+        path_prefix = os.path.dirname(self.repository_path)
+        # restrict to repo directory's parent directory:
         with patch.object(RemoteRepository, 'extra_test_args', ['--restrict-to-path', path_prefix]):
             self.cmd('init', self.repository_location + '_2')
+        # restrict to repo directory's parent directory and another directory:
         with patch.object(RemoteRepository, 'extra_test_args', ['--restrict-to-path', '/foo', '--restrict-to-path', path_prefix]):
             self.cmd('init', self.repository_location + '_3')
 
@@ -1950,6 +1964,28 @@ class RemoteArchiverTestCase(ArchiverTestCase):
     def test_debug_put_get_delete_obj(self):
         pass
 
+    def test_strip_components_doesnt_leak(self):
+        self.cmd('init', self.repository_location)
+        self.create_regular_file('dir/file', contents=b"test file contents 1")
+        self.create_regular_file('dir/file2', contents=b"test file contents 2")
+        self.create_regular_file('skipped-file1', contents=b"test file contents 3")
+        self.create_regular_file('skipped-file2', contents=b"test file contents 4")
+        self.create_regular_file('skipped-file3', contents=b"test file contents 5")
+        self.cmd('create', self.repository_location + '::test', 'input')
+        marker = 'cached responses left in RemoteRepository'
+        with changedir('output'):
+            res = self.cmd('extract', "--debug", self.repository_location + '::test', '--strip-components', '3')
+            self.assert_true(marker not in res)
+            with self.assert_creates_file('file'):
+                res = self.cmd('extract', "--debug", self.repository_location + '::test', '--strip-components', '2')
+                self.assert_true(marker not in res)
+            with self.assert_creates_file('dir/file'):
+                res = self.cmd('extract', "--debug", self.repository_location + '::test', '--strip-components', '1')
+                self.assert_true(marker not in res)
+            with self.assert_creates_file('input/dir/file'):
+                res = self.cmd('extract', "--debug", self.repository_location + '::test', '--strip-components', '0')
+                self.assert_true(marker not in res)
+
 
 class DiffArchiverTestCase(ArchiverTestCaseBase):
     def test_basic_functionality(self):
@@ -2160,3 +2196,30 @@ def test_compare_chunk_contents():
     ], [
         b'1234', b'565'
     ])
+
+
+class TestBuildFilter:
+    @staticmethod
+    def item_is_hardlink_master(item):
+        return False
+
+    def test_basic(self):
+        matcher = PatternMatcher()
+        matcher.add([parse_pattern('included')], True)
+        filter = Archiver.build_filter(matcher, self.item_is_hardlink_master)
+        assert filter(Item(path='included'))
+        assert filter(Item(path='included/file'))
+        assert not filter(Item(path='something else'))
+
+    def test_empty(self):
+        matcher = PatternMatcher(fallback=True)
+        filter = Archiver.build_filter(matcher, self.item_is_hardlink_master)
+        assert filter(Item(path='anything'))
+
+    def test_strip_components(self):
+        matcher = PatternMatcher(fallback=True)
+        filter = Archiver.build_filter(matcher, self.item_is_hardlink_master, strip_components=1)
+        assert not filter(Item(path='shallow'))
+        assert not filter(Item(path='shallow/'))  # can this even happen? paths are normalized...
+        assert filter(Item(path='deep enough/file'))
+        assert filter(Item(path='something/dir/file'))

+ 4 - 0
src/borg/testsuite/locking.py

@@ -64,6 +64,8 @@ class TestLock:
         lock2 = Lock(lockpath, exclusive=False, id=ID2).acquire()
         assert len(lock1._roster.get(SHARED)) == 2
         assert len(lock1._roster.get(EXCLUSIVE)) == 0
+        assert not lock1._roster.empty(SHARED, EXCLUSIVE)
+        assert lock1._roster.empty(EXCLUSIVE)
         lock1.release()
         lock2.release()
 
@@ -71,6 +73,7 @@ class TestLock:
         with Lock(lockpath, exclusive=True, id=ID1) as lock:
             assert len(lock._roster.get(SHARED)) == 0
             assert len(lock._roster.get(EXCLUSIVE)) == 1
+            assert not lock._roster.empty(SHARED, EXCLUSIVE)
 
     def test_upgrade(self, lockpath):
         with Lock(lockpath, exclusive=False) as lock:
@@ -78,6 +81,7 @@ class TestLock:
             lock.upgrade()  # NOP
             assert len(lock._roster.get(SHARED)) == 0
             assert len(lock._roster.get(EXCLUSIVE)) == 1
+            assert not lock._roster.empty(SHARED, EXCLUSIVE)
 
     def test_downgrade(self, lockpath):
         with Lock(lockpath, exclusive=True) as lock: