浏览代码

Merge pull request #2745 from ThomasWaldmann/backports6

Backports (6)
TW 8 年之前
父节点
当前提交
cc7c7521f6
共有 9 个文件被更改,包括 122 次插入43 次删除
  1. 9 0
      borg/archiver.py
  2. 53 0
      borg/helpers.py
  3. 33 6
      borg/key.py
  4. 3 25
      borg/remote.py
  5. 2 4
      borg/repository.py
  6. 1 1
      borg/testsuite/key.py
  7. 6 5
      docs/faq.rst
  8. 7 2
      docs/quickstart.rst
  9. 8 0
      docs/usage.rst

+ 9 - 0
borg/archiver.py

@@ -989,6 +989,15 @@ class Archiver:
             This pattern style is useful to match whole sub-directories. The pattern
             This pattern style is useful to match whole sub-directories. The pattern
             `pp:/data/bar` matches `/data/bar` and everything therein.
             `pp:/data/bar` matches `/data/bar` and everything therein.
 
 
+        .. note::
+
+            `re:`, `sh:` and `fm:` patterns are all implemented on top of the Python SRE
+            engine. It is very easy to formulate patterns for each of these types which
+            requires an inordinate amount of time to match paths. If untrusted users
+            are able to supply patterns, ensure they cannot supply `re:` patterns.
+            Further, ensure that `sh:` and `fm:` patterns only contain a handful of
+            wildcards at most.
+
         Exclusions can be passed via the command line option `--exclude`. When used
         Exclusions can be passed via the command line option `--exclude`. When used
         from within a shell the patterns should be quoted to protect them from
         from within a shell the patterns should be quoted to protect them from
         expansion.
         expansion.

+ 53 - 0
borg/helpers.py

@@ -39,6 +39,26 @@ import msgpack.fallback
 
 
 import socket
 import socket
 
 
+# 20 MiB minus 41 bytes for a Repository header (because the "size" field in the Repository includes
+# the header, and the total size was set to 20 MiB).
+MAX_DATA_SIZE = 20971479
+
+# MAX_OBJECT_SIZE = <20 MiB (MAX_DATA_SIZE) + 41 bytes for a Repository PUT header, which consists of
+# a 1 byte tag ID, 4 byte CRC, 4 byte size and 32 bytes for the ID.
+MAX_OBJECT_SIZE = MAX_DATA_SIZE + 41  # see LoggedIO.put_header_fmt.size assertion in repository module
+assert MAX_OBJECT_SIZE == 20971520 == 20 * 1024 * 1024
+
+# borg.remote read() buffer size
+BUFSIZE = 10 * 1024 * 1024
+
+# to use a safe, limited unpacker, we need to set a upper limit to the archive count in the manifest.
+# this does not mean that you can always really reach that number, because it also needs to be less than
+# MAX_DATA_SIZE or it will trigger the check for that.
+MAX_ARCHIVES = 400000
+
+# repo.list() / .scan() result count limit the borg client uses
+LIST_SCAN_LIMIT = 10000
+
 # return codes returned by borg command
 # return codes returned by borg command
 # when borg is killed by signal N, rc = 128 + N
 # when borg is killed by signal N, rc = 128 + N
 EXIT_SUCCESS = 0  # everything done, no problems
 EXIT_SUCCESS = 0  # everything done, no problems
@@ -139,6 +159,35 @@ def check_extension_modules():
         raise ExtensionModuleError
         raise ExtensionModuleError
 
 
 
 
+def get_limited_unpacker(kind):
+    """return a limited Unpacker because we should not trust msgpack data received from remote"""
+    args = dict(use_list=False,  # return tuples, not lists
+                max_bin_len=0,  # not used
+                max_ext_len=0,  # not used
+                max_buffer_size=3 * max(BUFSIZE, MAX_OBJECT_SIZE),
+                max_str_len=MAX_OBJECT_SIZE,  # a chunk or other repo object
+                )
+    if kind == 'server':
+        args.update(dict(max_array_len=100,  # misc. cmd tuples
+                         max_map_len=100,  # misc. cmd dicts
+                         ))
+    elif kind == 'client':
+        args.update(dict(max_array_len=LIST_SCAN_LIMIT,  # result list from repo.list() / .scan()
+                         max_map_len=100,  # misc. result dicts
+                         ))
+    elif kind == 'manifest':
+        args.update(dict(use_list=True,  # default value
+                         max_array_len=100,  # ITEM_KEYS ~= 22
+                         max_map_len=MAX_ARCHIVES,  # list of archives
+                         max_str_len=255,  # archive name
+                         object_hook=StableDict,
+                         unicode_errors='surrogateescape',
+                         ))
+    else:
+        raise ValueError('kind must be "server", "client" or "manifest"')
+    return msgpack.Unpacker(**args)
+
+
 class Manifest:
 class Manifest:
 
 
     @enum.unique
     @enum.unique
@@ -254,6 +303,10 @@ class Manifest:
             prev_ts = datetime.strptime(self.timestamp, "%Y-%m-%dT%H:%M:%S.%f")
             prev_ts = datetime.strptime(self.timestamp, "%Y-%m-%dT%H:%M:%S.%f")
             incremented = (prev_ts + timedelta(microseconds=1)).isoformat()
             incremented = (prev_ts + timedelta(microseconds=1)).isoformat()
             self.timestamp = max(incremented, datetime.utcnow().isoformat())
             self.timestamp = max(incremented, datetime.utcnow().isoformat())
+        # include checks for limits as enforced by limited unpacker (used by load())
+        assert len(self.archives) <= MAX_ARCHIVES
+        assert all(len(name) <= 255 for name in self.archives)
+        assert len(self.item_keys) <= 100
         m = {
         m = {
             'version': 1,
             'version': 1,
             'archives': StableDict((name, StableDict(archive)) for name, archive in self.archives.items()),
             'archives': StableDict((name, StableDict(archive)) for name, archive in self.archives.items()),

+ 33 - 6
borg/key.py

@@ -2,6 +2,8 @@ from binascii import hexlify, a2b_base64, b2a_base64
 import configparser
 import configparser
 import getpass
 import getpass
 import os
 import os
+import shlex
+import subprocess
 import sys
 import sys
 import textwrap
 import textwrap
 from hmac import HMAC, compare_digest
 from hmac import HMAC, compare_digest
@@ -16,12 +18,18 @@ logger = create_logger()
 from .crypto import AES, bytes_to_long, long_to_bytes, bytes_to_int, num_aes_blocks
 from .crypto import AES, bytes_to_long, long_to_bytes, bytes_to_int, num_aes_blocks
 from .crypto import hkdf_hmac_sha512
 from .crypto import hkdf_hmac_sha512
 from .compress import Compressor, CNONE
 from .compress import Compressor, CNONE
+from .helpers import get_limited_unpacker
+
 
 
 PREFIX = b'\0' * 8
 PREFIX = b'\0' * 8
 
 
 
 
 class PassphraseWrong(Error):
 class PassphraseWrong(Error):
-    """passphrase supplied in BORG_PASSPHRASE is incorrect"""
+    """passphrase supplied in BORG_PASSPHRASE or by BORG_PASSCOMMAND is incorrect."""
+
+
+class PasscommandFailure(Error):
+    """passcommand supplied in BORG_PASSCOMMAND failed: {}"""
 
 
 
 
 class PasswordRetriesExceeded(Error):
 class PasswordRetriesExceeded(Error):
@@ -155,9 +163,9 @@ class KeyBase:
             logger.warning('Manifest authentication DISABLED.')
             logger.warning('Manifest authentication DISABLED.')
             tam_required = False
             tam_required = False
         data = bytearray(data)
         data = bytearray(data)
-        # Since we don't trust these bytes we use the slower Python unpacker,
-        # which is assumed to have a lower probability of security issues.
-        unpacked = msgpack.fallback.unpackb(data, object_hook=StableDict, unicode_errors='surrogateescape')
+        unpacker = get_limited_unpacker('manifest')
+        unpacker.feed(data)
+        unpacked = unpacker.unpack()
         if b'tam' not in unpacked:
         if b'tam' not in unpacked:
             if tam_required:
             if tam_required:
                 raise TAMRequiredError(self.repository._location.canonical_path())
                 raise TAMRequiredError(self.repository._location.canonical_path())
@@ -301,11 +309,30 @@ class AESKeyBase(KeyBase):
 
 
 class Passphrase(str):
 class Passphrase(str):
     @classmethod
     @classmethod
-    def env_passphrase(cls, default=None):
-        passphrase = os.environ.get('BORG_PASSPHRASE', default)
+    def _env_passphrase(cls, env_var, default=None):
+        passphrase = os.environ.get(env_var, default)
         if passphrase is not None:
         if passphrase is not None:
             return cls(passphrase)
             return cls(passphrase)
 
 
+    @classmethod
+    def env_passphrase(cls, default=None):
+        passphrase = cls._env_passphrase('BORG_PASSPHRASE', default)
+        if passphrase is not None:
+            return passphrase
+        passphrase = cls.env_passcommand()
+        if passphrase is not None:
+            return passphrase
+
+    @classmethod
+    def env_passcommand(cls, default=None):
+        passcommand = os.environ.get('BORG_PASSCOMMAND', None)
+        if passcommand is not None:
+            try:
+                passphrase = subprocess.check_output(shlex.split(passcommand), universal_newlines=True)
+            except (subprocess.CalledProcessError, FileNotFoundError) as e:
+                raise PasscommandFailure(e)
+            return cls(passphrase.rstrip('\n'))
+
     @classmethod
     @classmethod
     def getpass(cls, prompt):
     def getpass(cls, prompt):
         return cls(getpass.getpass(prompt))
         return cls(getpass.getpass(prompt))

+ 3 - 25
borg/remote.py

@@ -14,8 +14,9 @@ from . import __version__
 
 
 from .helpers import Error, IntegrityError, sysinfo
 from .helpers import Error, IntegrityError, sysinfo
 from .helpers import replace_placeholders
 from .helpers import replace_placeholders
-from .helpers import bin_to_hex
-from .repository import Repository, LIST_SCAN_LIMIT, MAX_OBJECT_SIZE
+from .helpers import BUFSIZE
+from .helpers import get_limited_unpacker
+from .repository import Repository
 from .logger import create_logger
 from .logger import create_logger
 
 
 import msgpack
 import msgpack
@@ -24,8 +25,6 @@ logger = create_logger(__name__)
 
 
 RPC_PROTOCOL_VERSION = 2
 RPC_PROTOCOL_VERSION = 2
 
 
-BUFSIZE = 10 * 1024 * 1024
-
 MAX_INFLIGHT = 100
 MAX_INFLIGHT = 100
 
 
 
 
@@ -48,27 +47,6 @@ def os_write(fd, data):
     return amount
     return amount
 
 
 
 
-def get_limited_unpacker(kind):
-    """return a limited Unpacker because we should not trust msgpack data received from remote"""
-    args = dict(use_list=False,  # return tuples, not lists
-                max_bin_len=0,  # not used
-                max_ext_len=0,  # not used
-                max_buffer_size=3 * max(BUFSIZE, MAX_OBJECT_SIZE),
-                max_str_len=MAX_OBJECT_SIZE,  # a chunk or other repo object
-                )
-    if kind == 'server':
-        args.update(dict(max_array_len=100,  # misc. cmd tuples
-                         max_map_len=100,  # misc. cmd dicts
-                         ))
-    elif kind == 'client':
-        args.update(dict(max_array_len=LIST_SCAN_LIMIT,  # result list from repo.list() / .scan()
-                         max_map_len=100,  # misc. result dicts
-                         ))
-    else:
-        raise ValueError('kind must be "server" or "client"')
-    return msgpack.Unpacker(**args)
-
-
 class ConnectionClosed(Error):
 class ConnectionClosed(Error):
     """Connection closed by remote host"""
     """Connection closed by remote host"""
 
 

+ 2 - 4
borg/repository.py

@@ -14,20 +14,18 @@ from .logger import create_logger
 logger = create_logger()
 logger = create_logger()
 
 
 from .helpers import Error, ErrorWithTraceback, IntegrityError, Location, ProgressIndicatorPercent, bin_to_hex
 from .helpers import Error, ErrorWithTraceback, IntegrityError, Location, ProgressIndicatorPercent, bin_to_hex
+from .helpers import LIST_SCAN_LIMIT, MAX_OBJECT_SIZE, MAX_DATA_SIZE
 from .hashindex import NSIndex
 from .hashindex import NSIndex
 from .locking import Lock, LockError, LockErrorT
 from .locking import Lock, LockError, LockErrorT
 from .lrucache import LRUCache
 from .lrucache import LRUCache
 from .platform import sync_dir
 from .platform import sync_dir
 
 
-MAX_OBJECT_SIZE = 20 * 1024 * 1024
 MAGIC = b'BORG_SEG'
 MAGIC = b'BORG_SEG'
 MAGIC_LEN = len(MAGIC)
 MAGIC_LEN = len(MAGIC)
 TAG_PUT = 0
 TAG_PUT = 0
 TAG_DELETE = 1
 TAG_DELETE = 1
 TAG_COMMIT = 2
 TAG_COMMIT = 2
 
 
-LIST_SCAN_LIMIT = 10000  # repo.list() / .scan() result count limit the borg client uses
-
 
 
 class Repository:
 class Repository:
     """Filesystem based transactional key value store
     """Filesystem based transactional key value store
@@ -860,4 +858,4 @@ class LoggedIO:
                     sync_dir(dirname)
                     sync_dir(dirname)
 
 
 
 
-MAX_DATA_SIZE = MAX_OBJECT_SIZE - LoggedIO.put_header_fmt.size
+assert LoggedIO.put_header_fmt.size == 41  # see helpers.MAX_OBJECT_SIZE

+ 1 - 1
borg/testsuite/key.py

@@ -123,7 +123,7 @@ class TestTAM:
             key.unpack_and_verify_manifest(blob)
             key.unpack_and_verify_manifest(blob)
 
 
         blob = b'\xc1\xc1\xc1'
         blob = b'\xc1\xc1\xc1'
-        with pytest.raises(msgpack.UnpackException):
+        with pytest.raises((ValueError, msgpack.UnpackException)):
             key.unpack_and_verify_manifest(blob)
             key.unpack_and_verify_manifest(blob)
 
 
     def test_missing_when_required(self, key):
     def test_missing_when_required(self, key):

+ 6 - 5
docs/faq.rst

@@ -123,15 +123,16 @@ none.
 How can I specify the encryption passphrase programmatically?
 How can I specify the encryption passphrase programmatically?
 -------------------------------------------------------------
 -------------------------------------------------------------
 
 
-The encryption passphrase can be specified programmatically using the
-`BORG_PASSPHRASE` environment variable. This is convenient when setting up
-automated encrypted backups. Another option is to use
-key file based encryption with a blank passphrase. See
-:ref:`encrypted_repos` for more details.
+The encryption passphrase or a command to retrieve the passphrase can be
+specified programmatically using the `BORG_PASSPHRASE` or `BORG_PASSCOMMAND`
+environment variables. This is convenient when setting up automated encrypted
+backups. Another option is to use key file based encryption with a blank passphrase.
+See :ref:`encrypted_repos` for more details.
 
 
 .. _password_env:
 .. _password_env:
 .. note:: Be careful how you set the environment; using the ``env``
 .. note:: Be careful how you set the environment; using the ``env``
           command, a ``system()`` call or using inline shell scripts
           command, a ``system()`` call or using inline shell scripts
+          (e.g. ``BORG_PASSPHRASE=hunter12 borg ...``)
           might expose the credentials in the process list directly
           might expose the credentials in the process list directly
           and they will be readable to all users on a system. Using
           and they will be readable to all users on a system. Using
           ``export`` in a shell script file should be safe, however, as
           ``export`` in a shell script file should be safe, however, as

+ 7 - 2
docs/quickstart.rst

@@ -112,6 +112,11 @@ certain number of old archives::
     #!/bin/sh
     #!/bin/sh
     REPOSITORY=username@remoteserver.com:backup
     REPOSITORY=username@remoteserver.com:backup
 
 
+    # Setting this, so you won't be asked for your repository passphrase:
+    export BORG_PASSPHRASE='XYZl0ngandsecurepa_55_phrasea&&123'
+    # or this to ask an external program to supply the passphrase:
+    export BORG_PASSCOMMAND='pass show backup'
+
     # Backup all of /home and /var/www except a few
     # Backup all of /home and /var/www except a few
     # excluded directories
     # excluded directories
     borg create -v --stats                          \
     borg create -v --stats                          \
@@ -149,8 +154,8 @@ may be surprised that the following ``export`` has no effect on your command::
    export BORG_PASSPHRASE='complicated & long'
    export BORG_PASSPHRASE='complicated & long'
    sudo ./yourborgwrapper.sh  # still prompts for password
    sudo ./yourborgwrapper.sh  # still prompts for password
 
 
-For more information, see sudo(8) man page. Hint: see ``env_keep`` in
-sudoers(5), or try ``sudo BORG_PASSPHRASE='yourphrase' borg`` syntax.
+For more information, refer to the sudo(8) man page and ``env_keep`` in
+the sudoers(5) man page.
 
 
 .. Tip::
 .. Tip::
     To debug what your borg process is actually seeing, find its PID
     To debug what your borg process is actually seeing, find its PID

+ 8 - 0
docs/usage.rst

@@ -147,6 +147,14 @@ General:
         can either leave it away or abbreviate as `::`, if a positional parameter is required.
         can either leave it away or abbreviate as `::`, if a positional parameter is required.
     BORG_PASSPHRASE
     BORG_PASSPHRASE
         When set, use the value to answer the passphrase question for encrypted repositories.
         When set, use the value to answer the passphrase question for encrypted repositories.
+        It is used when a passphrase is needed to access an encrypted repo as well as when a new
+        passphrase should be initially set when initializing an encrypted repo.
+    BORG_PASSCOMMAND
+        When set, use the standard output of the command (trailing newlines are stripped) to answer the
+        passphrase question for encrypted repositories.
+        It is used when a passphrase is needed to access an encrypted repo as well as when a new
+        passphrase should be initially set when initializing an encrypted repo.
+        If BORG_PASSPHRASE is also set, it takes precedence.
     BORG_DISPLAY_PASSPHRASE
     BORG_DISPLAY_PASSPHRASE
         When set, use the value to answer the "display the passphrase for verification" question when defining a new passphrase for encrypted repositories.
         When set, use the value to answer the "display the passphrase for verification" question when defining a new passphrase for encrypted repositories.
     BORG_LOGGING_CONF
     BORG_LOGGING_CONF