2
0
Эх сурвалжийг харах

Merge pull request #1036 from Anakonda/windows

Update windows branch
enkore 9 жил өмнө
parent
commit
7116888780

+ 1 - 0
.travis/install.sh

@@ -15,6 +15,7 @@ if [[ "$(uname -s)" == 'Darwin' ]]; then
     fi
 
     brew install lz4
+    brew install xz  # required for python lzma module
     brew outdated pyenv || brew upgrade pyenv
 
     case "${TOXENV}" in

+ 1 - 1
README.rst

@@ -107,7 +107,7 @@ Now doing another backup, just to show off the great deduplication::
     -----------------------------------------------------------------------------
 
 
-For a graphical frontend refer to our complementary project `BorgWeb <https://borgbackup.github.io/borgweb/>`_.
+For a graphical frontend refer to our complementary project `BorgWeb <https://borgweb.readthedocs.io/>`_.
 
 Links
 =====

+ 3 - 2
Vagrantfile

@@ -54,14 +54,15 @@ def packages_darwin
     # install all the (security and other) updates
     sudo softwareupdate --install --all
     # get osxfuse 3.0.x pre-release code from github:
-    curl -s -L https://github.com/osxfuse/osxfuse/releases/download/osxfuse-3.0.9/osxfuse-3.0.9.dmg >osxfuse.dmg
+    curl -s -L https://github.com/osxfuse/osxfuse/releases/download/osxfuse-3.2.0/osxfuse-3.2.0.dmg >osxfuse.dmg
     MOUNTDIR=$(echo `hdiutil mount osxfuse.dmg | tail -1 | awk '{$1="" ; print $0}'` | xargs -0 echo) \
-    && sudo installer -pkg "${MOUNTDIR}/Extras/FUSE for OS X 3.0.9.pkg" -target /
+    && sudo installer -pkg "${MOUNTDIR}/Extras/FUSE for OS X 3.2.0.pkg" -target /
     sudo chown -R vagrant /usr/local  # brew must be able to create stuff here
     ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)"
     brew update
     brew install openssl
     brew install lz4
+    brew install xz  # required for python lzma module
     brew install fakeroot
     brew install git
     brew install pkgconfig

+ 4 - 4
borg/_chunker.c

@@ -184,9 +184,9 @@ chunker_fill(Chunker *c)
         length = c->bytes_read - offset;
         #if ( ( _XOPEN_SOURCE >= 600 || _POSIX_C_SOURCE >= 200112L ) && defined(POSIX_FADV_DONTNEED) )
 
-	// Only do it once per run.
-	if (pagemask == 0)
-		pagemask = getpagesize() - 1;
+        // Only do it once per run.
+        if (pagemask == 0)
+            pagemask = getpagesize() - 1;
 
         // We tell the OS that we do not need the data that we just have read any
         // more (that it maybe has in the cache). This avoids that we spoil the
@@ -207,7 +207,7 @@ chunker_fill(Chunker *c)
             // fadvise. This will cancel the final page and is not part
             // of the above workaround.
             overshoot = 0;
-	}
+        }
 
         posix_fadvise(c->fh, offset & ~pagemask, length - overshoot, POSIX_FADV_DONTNEED);
         #endif

+ 20 - 11
borg/archive.py

@@ -15,13 +15,14 @@ import sys
 import time
 from io import BytesIO
 from . import xattr
-from .compress import Compressor, COMPR_BUFFER
+from .compress import COMPR_BUFFER
 from .constants import *  # NOQA
 from .helpers import Chunk, Error, uid2user, user2uid, gid2group, group2gid, \
     parse_timestamp, to_localtime, format_time, format_timedelta, safe_encode, safe_decode, \
     Manifest, Statistics, decode_dict, make_path_safe, StableDict, int_to_bigint, bigint_to_int, bin_to_hex, \
     ProgressIndicatorPercent, ChunkIteratorFileWrapper, remove_surrogates, log_multi, \
-    PathPrefixPattern, FnmatchPattern, open_item, file_status, format_file_size, consume
+    PathPrefixPattern, FnmatchPattern, open_item, file_status, format_file_size, consume, \
+    CompressionDecider1, CompressionDecider2, CompressionSpec
 from .repository import Repository
 from .platform import acl_get, acl_set
 from .chunker import Chunker
@@ -125,7 +126,7 @@ class Archive:
 
     def __init__(self, repository, key, manifest, name, cache=None, create=False,
                  checkpoint_interval=300, numeric_owner=False, progress=False,
-                 chunker_params=CHUNKER_PARAMS, start=None, end=None):
+                 chunker_params=CHUNKER_PARAMS, start=None, end=None, compression=None, compression_files=None):
         self.cwd = os.getcwd()
         self.key = key
         self.repository = repository
@@ -148,6 +149,9 @@ class Archive:
         if create:
             self.items_buffer = CacheChunkBuffer(self.cache, self.key, self.stats)
             self.chunker = Chunker(self.key.chunk_seed, *chunker_params)
+            self.compression_decider1 = CompressionDecider1(compression or CompressionSpec('none'),
+                                                            compression_files or [])
+            key.compression_decider2 = CompressionDecider2(compression or CompressionSpec('none'))
             if name in manifest.archives:
                 raise self.AlreadyExists(name)
             self.last_checkpoint = time.time()
@@ -601,11 +605,15 @@ Number of files: {0.stats.nfiles}'''.format(
         }
         # Only chunkify the file if needed
         if chunks is None:
+            compress = self.compression_decider1.decide(path)
+            logger.debug('%s -> compression %s', path, compress['name'])
             fh = Archive._open_rb(path)
             with os.fdopen(fh, 'rb') as fd:
                 chunks = []
                 for data in self.chunker.chunkify(fd, fh):
-                    chunks.append(cache.add_chunk(self.key.id_hash(data), Chunk(data), self.stats))
+                    chunks.append(cache.add_chunk(self.key.id_hash(data),
+                                                  Chunk(data, compress=compress),
+                                                  self.stats))
                     if self.show_progress:
                         self.stats.show_progress(item=item, dt=0.2)
             cache.memorize_file(path_hash, st, [c.id for c in chunks])
@@ -948,7 +956,7 @@ class ArchiveRecreater:
 
     def __init__(self, repository, manifest, key, cache, matcher,
                  exclude_caches=False, exclude_if_present=None, keep_tag_files=False,
-                 chunker_params=None, compression=None,
+                 chunker_params=None, compression=None, compression_files=None,
                  dry_run=False, stats=False, progress=False, file_status_printer=None):
         self.repository = repository
         self.key = key
@@ -961,12 +969,12 @@ class ArchiveRecreater:
         self.keep_tag_files = keep_tag_files
 
         self.chunker_params = chunker_params or CHUNKER_PARAMS
-        self.compression = compression or dict(name='none')
-        self.seen_chunks = set()
         self.recompress = bool(compression)
-        compr_args = dict(buffer=COMPR_BUFFER)
-        compr_args.update(self.compression)
-        key.compressor = Compressor(**compr_args)
+        self.compression = compression or CompressionSpec('none')
+        self.seen_chunks = set()
+        self.compression_decider1 = CompressionDecider1(compression or CompressionSpec('none'),
+                                                            compression_files or [])
+        key.compression_decider2 = CompressionDecider2(compression or CompressionSpec('none'))
 
         self.autocommit_threshold = max(self.AUTOCOMMIT_THRESHOLD, self.cache.chunks_stored_size() / 100)
         logger.debug("Autocommit threshold: %s", format_file_size(self.autocommit_threshold))
@@ -1054,6 +1062,7 @@ class ArchiveRecreater:
 
     def process_chunks(self, archive, target, item):
         """Return new chunk ID list for 'item'."""
+        # TODO: support --compression-from
         if not self.recompress and not target.recreate_rechunkify:
             for chunk_id, size, csize in item[b'chunks']:
                 self.cache.chunk_incref(chunk_id, target.stats)
@@ -1248,7 +1257,7 @@ class ArchiveRecreater:
     def create_target_archive(self, name):
         target = Archive(self.repository, self.key, self.manifest, name, create=True,
                           progress=self.progress, chunker_params=self.chunker_params, cache=self.cache,
-                          checkpoint_interval=0)
+                          checkpoint_interval=0, compression=self.compression)
         target.recreate_partial_chunks = None
         target.recreate_uncomitted_bytes = 0
         return target

+ 97 - 12
borg/archiver.py

@@ -9,9 +9,11 @@ import hashlib
 import inspect
 import io
 import os
+import re
 import shlex
 import signal
 import stat
+import subprocess
 import sys
 import textwrap
 import traceback
@@ -34,6 +36,7 @@ from .constants import *  # NOQA
 from .key import key_creator, RepoKey, PassphraseKey
 from .archive import Archive, ArchiveChecker, ArchiveRecreater
 from .remote import RepositoryServer, RemoteRepository, cache_if_remote
+from .selftest import selftest
 from .hashindex import ChunkIndexEntry
 
 has_lchflags = hasattr(os, 'lchflags')
@@ -285,14 +288,12 @@ class Archiver:
         dry_run = args.dry_run
         t0 = datetime.utcnow()
         if not dry_run:
-            compr_args = dict(buffer=COMPR_BUFFER)
-            compr_args.update(args.compression)
-            key.compressor = Compressor(**compr_args)
             with Cache(repository, key, manifest, do_files=args.cache_files, lock_wait=self.lock_wait) as cache:
                 archive = Archive(repository, key, manifest, args.location.archive, cache=cache,
                                   create=True, checkpoint_interval=args.checkpoint_interval,
                                   numeric_owner=args.numeric_owner, progress=args.progress,
-                                  chunker_params=args.chunker_params, start=t0)
+                                  chunker_params=args.chunker_params, start=t0,
+                                  compression=args.compression, compression_files=args.compression_files)
                 create_inner(archive, cache)
         else:
             create_inner(None, None)
@@ -788,9 +789,20 @@ class Archiver:
                              '"keep-secondly", "keep-minutely", "keep-hourly", "keep-daily", '
                              '"keep-weekly", "keep-monthly" or "keep-yearly" settings must be specified.')
             return self.exit_code
-        archives = manifest.list_archive_infos(sort_by='ts', reverse=True)  # just a ArchiveInfo list
+        archives_checkpoints = manifest.list_archive_infos(sort_by='ts', reverse=True)  # just a ArchiveInfo list
         if args.prefix:
-            archives = [archive for archive in archives if archive.name.startswith(args.prefix)]
+            archives_checkpoints = [arch for arch in archives_checkpoints if arch.name.startswith(args.prefix)]
+        is_checkpoint = re.compile(r'\.checkpoint(\.\d+)?$').search
+        checkpoints = [arch for arch in archives_checkpoints if is_checkpoint(arch.name)]
+        # keep the latest checkpoint, if there is no later non-checkpoint archive
+        if archives_checkpoints and checkpoints and archives_checkpoints[0] is checkpoints[0]:
+            keep_checkpoints = checkpoints[:1]
+        else:
+            keep_checkpoints = []
+        checkpoints = set(checkpoints)
+        # ignore all checkpoint archives to avoid keeping one (which is an incomplete backup)
+        # that is newer than a successfully completed backup - and killing the successful backup.
+        archives = [arch for arch in archives_checkpoints if arch not in checkpoints]
         keep = []
         if args.within:
             keep += prune_within(archives, args.within)
@@ -808,11 +820,10 @@ class Archiver:
             keep += prune_split(archives, '%Y-%m', args.monthly, keep)
         if args.yearly:
             keep += prune_split(archives, '%Y', args.yearly, keep)
-
-        to_delete = set(archives) - set(keep)
+        to_delete = (set(archives) | checkpoints) - (set(keep) | set(keep_checkpoints))
         stats = Statistics()
         with Cache(repository, key, manifest, do_files=args.cache_files, lock_wait=self.lock_wait) as cache:
-            for archive in archives:
+            for archive in archives_checkpoints:
                 if archive in to_delete:
                     if args.dry_run:
                         if args.output_list:
@@ -874,8 +885,8 @@ class Archiver:
 
         recreater = ArchiveRecreater(repository, manifest, key, cache, matcher,
                                      exclude_caches=args.exclude_caches, exclude_if_present=args.exclude_if_present,
-                                     keep_tag_files=args.keep_tag_files,
-                                     compression=args.compression, chunker_params=args.chunker_params,
+                                     keep_tag_files=args.keep_tag_files, chunker_params=args.chunker_params,
+                                     compression=args.compression, compression_files=args.compression_files,
                                      progress=args.progress, stats=args.stats,
                                      file_status_printer=self.print_file_status,
                                      dry_run=args.dry_run)
@@ -902,6 +913,21 @@ class Archiver:
         cache.commit()
         return self.exit_code
 
+    @with_repository(manifest=False)
+    def do_with_lock(self, args, repository):
+        """run a user specified command with the repository lock held"""
+        # re-write manifest to start a repository transaction - this causes a
+        # lock upgrade to exclusive for remote (and also for local) repositories.
+        # by using manifest=False in the decorator, we avoid having to require
+        # the encryption key (and can operate just with encrypted data).
+        data = repository.get(Manifest.MANIFEST_ID)
+        repository.put(Manifest.MANIFEST_ID, data)
+        try:
+            # we exit with the return code we get from the subprocess
+            return subprocess.call([args.command] + args.args)
+        finally:
+            repository.rollback()
+
     @with_repository()
     def do_debug_dump_archive_items(self, args, repository, manifest, key):
         """dump (decrypted, decompressed) archive items metadata (not: data)"""
@@ -1265,6 +1291,12 @@ class Archiver:
         traversing all paths specified. The archive will consume almost no disk space for
         files or parts of files that have already been stored in other archives.
 
+        The archive name needs to be unique. It must not end in '.checkpoint' or
+        '.checkpoint.N' (with N being a number), because these names are used for
+        checkpoints and treated in special ways.
+
+        In the archive name, you may use the following format tags:
+        {now}, {utcnow}, {fqdn}, {hostname}, {user}, {pid}
 
         To speed up pulling backups over sshfs and similar network file systems which do
         not provide correct inode information the --ignore-inode flag can be used. This
@@ -1350,11 +1382,16 @@ class Archiver:
                                    type=CompressionSpec, default=dict(name='none'), metavar='COMPRESSION',
                                    help='select compression algorithm (and level):\n'
                                         'none == no compression (default),\n'
+                                        'auto,C[,L] == built-in heuristic decides between none or C[,L] - with C[,L]\n'
+                                        '              being any valid compression algorithm (and optional level),\n'
                                         'lz4 == lz4,\n'
                                         'zlib == zlib (default level 6),\n'
                                         'zlib,0 .. zlib,9 == zlib (with level 0..9),\n'
                                         'lzma == lzma (default level 6),\n'
                                         'lzma,0 .. lzma,9 == lzma (with level 0..9).')
+        archive_group.add_argument('--compression-from', dest='compression_files',
+                                   type=argparse.FileType('r'), action='append',
+                                   metavar='COMPRESSIONCONFIG', help='read compression patterns from COMPRESSIONCONFIG, one per line')
 
         subparser.add_argument('location', metavar='ARCHIVE',
                                type=location_validator(archive=True),
@@ -1369,6 +1406,10 @@ class Archiver:
         be restricted by using the ``--exclude`` option.
 
         See the output of the "borg help patterns" command for more help on exclude patterns.
+
+        By using ``--dry-run``, you can do all extraction steps except actually writing the
+        output data: reading metadata and data chunks from the repo, checking the hash/hmac,
+        decrypting, decompressing.
         """)
         subparser = subparsers.add_parser('extract', parents=[common_parser], add_help=False,
                                           description=self.do_extract.__doc__,
@@ -1603,11 +1644,20 @@ class Archiver:
         any of the specified retention options. This command is normally used by
         automated backup scripts wanting to keep a certain number of historic backups.
 
+        Also, prune automatically removes checkpoint archives (incomplete archives left
+        behind by interrupted backup runs) except if the checkpoint is the latest
+        archive (and thus still needed). Checkpoint archives are not considered when
+        comparing archive counts against the retention limits (--keep-*).
+
         If a prefix is set with -P, then only archives that start with the prefix are
         considered for deletion and only those archives count towards the totals
         specified by the rules.
         Otherwise, *all* archives in the repository are candidates for deletion!
 
+        If you have multiple sequences of archives with different data sets (e.g.
+        from different machines) in one shared repository, use one prune call per
+        data set that matches only the respective archives using the -P option.
+
         The "--keep-within" option takes an argument of the form "<int><char>",
         where char is "H", "d", "w", "m", "y". For example, "--keep-within 2d" means
         to keep all archives that were created within the past 48 hours.
@@ -1816,11 +1866,16 @@ class Archiver:
                                    type=CompressionSpec, default=None, metavar='COMPRESSION',
                                    help='select compression algorithm (and level):\n'
                                         'none == no compression (default),\n'
+                                        'auto,C[,L] == built-in heuristic decides between none or C[,L] - with C[,L]\n'
+                                        '              being any valid compression algorithm (and optional level),\n'
                                         'lz4 == lz4,\n'
                                         'zlib == zlib (default level 6),\n'
                                         'zlib,0 .. zlib,9 == zlib (with level 0..9),\n'
                                         'lzma == lzma (default level 6),\n'
                                         'lzma,0 .. lzma,9 == lzma (with level 0..9).')
+        archive_group.add_argument('--compression-from', dest='compression_files',
+                                   type=argparse.FileType('r'), action='append',
+                                   metavar='COMPRESSIONCONFIG', help='read compression patterns from COMPRESSIONCONFIG, one per line')
         archive_group.add_argument('--chunker-params', dest='chunker_params',
                                    type=ChunkerParams, default=None,
                                    metavar='CHUNK_MIN_EXP,CHUNK_MAX_EXP,HASH_MASK_BITS,HASH_WINDOW_SIZE',
@@ -1832,6 +1887,32 @@ class Archiver:
         subparser.add_argument('paths', metavar='PATH', nargs='*', type=str,
                                help='paths to recreate; patterns are supported')
 
+        with_lock_epilog = textwrap.dedent("""
+        This command runs a user-specified command while the repository lock is held.
+
+        It will first try to acquire the lock (make sure that no other operation is
+        running in the repo), then execute the given command as a subprocess and wait
+        for its termination, release the lock and return the user command's return
+        code as borg's return code.
+
+        Note: if you copy a repository with the lock held, the lock will be present in
+              the copy, obviously. Thus, before using borg on the copy, you need to
+              use "borg break-lock" on it.
+        """)
+        subparser = subparsers.add_parser('with-lock', parents=[common_parser], add_help=False,
+                                          description=self.do_with_lock.__doc__,
+                                          epilog=with_lock_epilog,
+                                          formatter_class=argparse.RawDescriptionHelpFormatter,
+                                          help='run user command with lock held')
+        subparser.set_defaults(func=self.do_with_lock)
+        subparser.add_argument('location', metavar='REPOSITORY',
+                               type=location_validator(archive=False),
+                               help='repository to lock')
+        subparser.add_argument('command', metavar='COMMAND',
+                               help='command to run')
+        subparser.add_argument('args', metavar='ARGS', nargs=argparse.REMAINDER,
+                               help='command arguments')
+
         subparser = subparsers.add_parser('help', parents=[common_parser], add_help=False,
                                           description='Extra help')
         subparser.add_argument('--epilog-only', dest='epilog_only',
@@ -1926,13 +2007,17 @@ class Archiver:
         update_excludes(args)
         return args
 
+    def prerun_checks(self, logger):
+        check_extension_modules()
+        selftest(logger)
+
     def run(self, args):
         os.umask(args.umask)  # early, before opening files
         self.lock_wait = args.lock_wait
         setup_logging(level=args.log_level, is_serve=args.func == self.do_serve)  # do not use loggers before this!
         if args.show_version:
             logger.info('borgbackup version %s' % __version__)
-        check_extension_modules()
+        self.prerun_checks(logger)
         if is_slow_msgpack():
             logger.warning("Using a pure-python msgpack! This will result in lower performance.")
         return args.func(args)

+ 2 - 8
borg/crypto.pyx

@@ -1,15 +1,9 @@
-"""A thin OpenSSL wrapper
+"""A thin OpenSSL wrapper"""
 
-This could be replaced by PyCrypto maybe?
-"""
 from libc.stdlib cimport malloc, free
 from cpython.buffer cimport PyBUF_SIMPLE, PyObject_GetBuffer, PyBuffer_Release
 
-API_VERSION = 2
-
-
-cdef extern from "openssl/rand.h":
-    int  RAND_bytes(unsigned char *buf, int num)
+API_VERSION = 3
 
 
 cdef extern from "openssl/evp.h":

+ 102 - 3
borg/helpers.py

@@ -36,6 +36,7 @@ from . import hashindex
 from . import chunker
 from .constants import *  # NOQA
 from . import crypto
+from .compress import COMPR_BUFFER, get_compressor
 from . import shellpattern
 import msgpack
 import msgpack.fallback
@@ -83,7 +84,7 @@ def check_extension_modules():
         raise ExtensionModuleError
     if chunker.API_VERSION != 2:
         raise ExtensionModuleError
-    if crypto.API_VERSION != 2:
+    if crypto.API_VERSION != 3:
         raise ExtensionModuleError
     if platform.API_VERSION != 2:
         raise ExtensionModuleError
@@ -285,8 +286,7 @@ def load_excludes(fh):
     """Load and parse exclude patterns from file object. Lines empty or starting with '#' after stripping whitespace on
     both line ends are ignored.
     """
-    patterns = (line for line in (i.strip() for i in fh) if not line.startswith('#'))
-    return [parse_pattern(pattern) for pattern in patterns if pattern]
+    return [parse_pattern(pattern) for pattern in clean_lines(fh)]
 
 
 def update_excludes(args):
@@ -539,6 +539,12 @@ def CompressionSpec(s):
         else:
             raise ValueError
         return dict(name=name, level=level)
+    if name == 'auto':
+        if 2 <= count <= 3:
+            compression = ','.join(values[1:])
+        else:
+            raise ValueError
+        return dict(name=name, spec=CompressionSpec(compression))
     raise ValueError
 
 
@@ -1484,3 +1490,96 @@ except ImportError:
 
 def scandir_inorder(path='.'):
     return sorted(scandir(path), key=lambda dirent: dirent.inode())
+
+
+def clean_lines(lines, lstrip=None, rstrip=None, remove_empty=True, remove_comments=True):
+    """
+    clean lines (usually read from a config file):
+
+    1. strip whitespace (left and right), 2. remove empty lines, 3. remove comments.
+
+    note: only "pure comment lines" are supported, no support for "trailing comments".
+
+    :param lines: input line iterator (e.g. list or open text file) that gives unclean input lines
+    :param lstrip: lstrip call arguments or False, if lstripping is not desired
+    :param rstrip: rstrip call arguments or False, if rstripping is not desired
+    :param remove_comments: remove comment lines (lines starting with "#")
+    :param remove_empty: remove empty lines
+    :return: yields processed lines
+    """
+    for line in lines:
+        if lstrip is not False:
+            line = line.lstrip(lstrip)
+        if rstrip is not False:
+            line = line.rstrip(rstrip)
+        if remove_empty and not line:
+            continue
+        if remove_comments and line.startswith('#'):
+            continue
+        yield line
+
+
+class CompressionDecider1:
+    def __init__(self, compression, compression_files):
+        """
+        Initialize a CompressionDecider instance (and read config files, if needed).
+
+        :param compression: default CompressionSpec (e.g. from --compression option)
+        :param compression_files: list of compression config files (e.g. from --compression-from) or
+                                  a list of other line iterators
+        """
+        self.compression = compression
+        if not compression_files:
+            self.matcher = None
+        else:
+            self.matcher = PatternMatcher(fallback=compression)
+            for file in compression_files:
+                try:
+                    for line in clean_lines(file):
+                        try:
+                            compr_spec, fn_pattern = line.split(':', 1)
+                        except:
+                            continue
+                        self.matcher.add([parse_pattern(fn_pattern)], CompressionSpec(compr_spec))
+                finally:
+                    if hasattr(file, 'close'):
+                        file.close()
+
+    def decide(self, path):
+        if self.matcher is not None:
+            return self.matcher.match(path)
+        return self.compression
+
+
+class CompressionDecider2:
+    def __init__(self, compression):
+        self.compression = compression
+
+    def decide(self, chunk):
+        # nothing fancy here yet: we either use what the metadata says or the default
+        # later, we can decide based on the chunk data also.
+        # if we compress the data here to decide, we can even update the chunk data
+        # and modify the metadata as desired.
+        compr_spec = chunk.meta.get('compress', self.compression)
+        compr_args = dict(buffer=COMPR_BUFFER)
+        compr_args.update(compr_spec)
+        if compr_args['name'] == 'auto':
+            # we did not decide yet, use heuristic:
+            compr_args, chunk = self.heuristic_lz4(compr_args, chunk)
+        return compr_args, chunk
+
+    def heuristic_lz4(self, compr_args, chunk):
+        meta, data = chunk
+        lz4 = get_compressor('lz4', buffer=compr_args['buffer'])
+        cdata = lz4.compress(data)
+        data_len = len(data)
+        cdata_len = len(cdata)
+        if cdata_len < data_len:
+            compr_spec = compr_args['spec']
+        else:
+            # uncompressible - we could have a special "uncompressible compressor"
+            # that marks such data as uncompressible via compression-type metadata.
+            compr_spec = CompressionSpec('none')
+        compr_args.update(compr_spec)
+        logger.debug("len(data) == %d, len(lz4(data)) == %d, choosing %s", data_len, cdata_len, compr_spec)
+        return compr_args, Chunk(data, **meta)

+ 43 - 11
borg/key.py

@@ -7,13 +7,13 @@ import textwrap
 from hmac import compare_digest
 from hashlib import sha256, pbkdf2_hmac
 
-from .helpers import Chunk, IntegrityError, get_keys_dir, Error, yes, bin_to_hex
+from .helpers import Chunk, IntegrityError, get_keys_dir, Error, yes, bin_to_hex, CompressionDecider2, CompressionSpec
 from .logger import create_logger
 logger = create_logger()
 
 from .constants import *  # NOQA
 from .crypto import AES, bytes_to_long, long_to_bytes, bytes_to_int, num_aes_blocks, hmac_sha256
-from .compress import Compressor, COMPR_BUFFER
+from .compress import Compressor, COMPR_BUFFER, get_compressor
 import msgpack
 
 PREFIX = b'\0' * 8
@@ -35,6 +35,14 @@ class KeyfileNotFoundError(Error):
     """No key file for repository {} found in {}."""
 
 
+class KeyfileInvalidError(Error):
+    """Invalid key file for repository {} found in {}."""
+
+
+class KeyfileMismatchError(Error):
+    """Mismatch between repository {} and key file {}."""
+
+
 class RepoKeyNotFoundError(Error):
     """No key entry found in the config of repository {}."""
 
@@ -71,12 +79,20 @@ class KeyBase:
         self.TYPE_STR = bytes([self.TYPE])
         self.repository = repository
         self.target = None  # key location file path / repo obj
-        self.compressor = Compressor('none', buffer=COMPR_BUFFER)
+        self.compression_decider2 = CompressionDecider2(CompressionSpec('none'))
+        self.compressor = Compressor('none', buffer=COMPR_BUFFER)  # for decompression
 
     def id_hash(self, data):
         """Return HMAC hash using the "id" HMAC key
         """
 
+    def compress(self, chunk):
+        compr_args, chunk = self.compression_decider2.decide(chunk)
+        compressor = Compressor(**compr_args)
+        meta, data = chunk
+        data = compressor.compress(data)
+        return Chunk(data, **meta)
+
     def encrypt(self, chunk):
         pass
 
@@ -102,8 +118,8 @@ class PlaintextKey(KeyBase):
         return sha256(data).digest()
 
     def encrypt(self, chunk):
-        meta, data = chunk
-        return b''.join([self.TYPE_STR, self.compressor.compress(data)])
+        chunk = self.compress(chunk)
+        return b''.join([self.TYPE_STR, chunk.data])
 
     def decrypt(self, id, data):
         if data[0] != self.TYPE:
@@ -135,9 +151,9 @@ class AESKeyBase(KeyBase):
         return hmac_sha256(self.id_key, data)
 
     def encrypt(self, chunk):
-        data = self.compressor.compress(chunk.data)
+        chunk = self.compress(chunk)
         self.enc_cipher.reset()
-        data = b''.join((self.enc_cipher.iv[8:], self.enc_cipher.encrypt(data)))
+        data = b''.join((self.enc_cipher.iv[8:], self.enc_cipher.encrypt(chunk.data)))
         hmac = hmac_sha256(self.enc_hmac_key, data)
         return b''.join((self.TYPE_STR, hmac, data))
 
@@ -396,17 +412,33 @@ class KeyfileKey(KeyfileKeyBase):
     TYPE = 0x00
     FILE_ID = 'BORG_KEY'
 
+    def sanity_check(self, filename, id):
+        with open(filename, 'r') as fd:
+            line = fd.readline().strip()
+            if not line.startswith(self.FILE_ID):
+                raise KeyfileInvalidError(self.repository._location.canonical_path(), filename)
+            if line[len(self.FILE_ID) + 1:] != id:
+                raise KeyfileMismatchError(self.repository._location.canonical_path(), filename)
+            return filename
+
     def find_key(self):
+        id = self.repository.id_str
+        keyfile = os.environ.get('BORG_KEY_FILE')
+        if keyfile:
+            return self.sanity_check(keyfile, id)
         keys_dir = get_keys_dir()
         for name in os.listdir(keys_dir):
             filename = os.path.join(keys_dir, name)
-            with open(filename, 'r') as fd:
-                line = fd.readline().strip()
-                if line.startswith(self.FILE_ID) and line[len(self.FILE_ID) + 1:] == self.repository.id_str:
-                    return filename
+            try:
+                return self.sanity_check(filename, id)
+            except (KeyfileInvalidError, KeyfileMismatchError):
+                pass
         raise KeyfileNotFoundError(self.repository._location.canonical_path(), get_keys_dir())
 
     def get_new_target(self, args):
+        keyfile = os.environ.get('BORG_KEY_FILE')
+        if keyfile:
+            return keyfile
         filename = args.location.to_key_filename()
         path = filename
         i = 1

+ 79 - 0
borg/selftest.py

@@ -0,0 +1,79 @@
+"""
+Self testing module
+===================
+
+The selftest() function runs a small test suite of relatively fast tests that are meant to discover issues
+with the way Borg was compiled or packaged and also bugs in Borg itself.
+
+Theses tests are a subset of the borg/testsuite and are run with Pythons built-in unittest, hence none of
+the tests used for this can or should be ported to py.test currently.
+
+To assert that self test discovery works correctly the number of tests is kept in the SELFTEST_COUNT
+variable. SELFTEST_COUNT must be updated if new tests are added or removed to or from any of the tests
+used here.
+"""
+
+
+import sys
+import time
+from unittest import TestResult, TestSuite, defaultTestLoader
+
+from .testsuite.hashindex import HashIndexDataTestCase, HashIndexRefcountingTestCase, HashIndexTestCase
+from .testsuite.crypto import CryptoTestCase
+from .testsuite.chunker import ChunkerTestCase
+
+SELFTEST_CASES = [
+    HashIndexDataTestCase,
+    HashIndexRefcountingTestCase,
+    HashIndexTestCase,
+    CryptoTestCase,
+    ChunkerTestCase,
+]
+
+SELFTEST_COUNT = 27
+
+
+class SelfTestResult(TestResult):
+    def __init__(self):
+        super().__init__()
+        self.successes = []
+
+    def addSuccess(self, test):
+        super().addSuccess(test)
+        self.successes.append(test)
+
+    def test_name(self, test):
+        return test.shortDescription() or str(test)
+
+    def log_results(self, logger):
+        for test, failure in self.errors + self.failures + self.unexpectedSuccesses:
+            logger.error('self test %s FAILED:\n%s', self.test_name(test), failure)
+        for test, reason in self.skipped:
+            logger.warning('self test %s skipped: %s', self.test_name(test), reason)
+
+    def successful_test_count(self):
+        return len(self.successes)
+
+
+def selftest(logger):
+    selftest_started = time.perf_counter()
+    result = SelfTestResult()
+    test_suite = TestSuite()
+    for test_case in SELFTEST_CASES:
+        test_suite.addTest(defaultTestLoader.loadTestsFromTestCase(test_case))
+    test_suite.run(result)
+    result.log_results(logger)
+    successful_tests = result.successful_test_count()
+    count_mismatch = successful_tests != SELFTEST_COUNT
+    if result.wasSuccessful() and count_mismatch:
+        # only print this if all tests succeeded
+        logger.error("self test count (%d != %d) mismatch, either test discovery is broken or a test was added "
+                     "without updating borg.selftest",
+                     successful_tests, SELFTEST_COUNT)
+    if not result.wasSuccessful() or count_mismatch:
+        logger.error("self test failed\n"
+                     "This is a bug either in Borg or in the package / distribution you use.")
+        sys.exit(2)
+        assert False, "sanity assertion failed: ran beyond sys.exit()"
+    selftest_elapsed = time.perf_counter() - selftest_started
+    logger.debug("%d self tests completed in %.2f seconds", successful_tests, selftest_elapsed)

+ 12 - 5
borg/testsuite/__init__.py

@@ -9,7 +9,8 @@ import sysconfig
 import time
 import unittest
 from ..xattr import get_all
-from ..logger import setup_logging
+
+# Note: this is used by borg.selftest, do not use or import py.test functionality here.
 
 try:
     import llfuse
@@ -18,6 +19,11 @@ try:
 except ImportError:
     have_fuse_mtime_ns = False
 
+try:
+    from pytest import raises
+except ImportError:
+    raises = None
+
 has_lchflags = hasattr(os, 'lchflags')
 
 
@@ -32,9 +38,6 @@ else:
 if sys.platform.startswith('netbsd'):
     st_mtime_ns_round = -4  # only >1 microsecond resolution here?
 
-# Ensure that the loggers exist for all tests
-setup_logging()
-
 
 class BaseTestCase(unittest.TestCase):
     """
@@ -43,9 +46,13 @@ class BaseTestCase(unittest.TestCase):
     assert_not_in = unittest.TestCase.assertNotIn
     assert_equal = unittest.TestCase.assertEqual
     assert_not_equal = unittest.TestCase.assertNotEqual
-    assert_raises = unittest.TestCase.assertRaises
     assert_true = unittest.TestCase.assertTrue
 
+    if raises:
+        assert_raises = staticmethod(raises)
+    else:
+        assert_raises = unittest.TestCase.assertRaises
+
     @contextmanager
     def assert_creates_file(self, path):
         self.assert_true(not os.path.exists(path), '{} should not exist'.format(path))

+ 89 - 1
borg/testsuite/archiver.py

@@ -61,6 +61,7 @@ def exec_cmd(*args, archiver=None, fork=False, exe=None, **kw):
             sys.stdout = sys.stderr = output = StringIO()
             if archiver is None:
                 archiver = Archiver()
+            archiver.prerun_checks = lambda *args: None
             archiver.exit_code = EXIT_SUCCESS
             args = archiver.parse_args(list(args))
             ret = archiver.run(args)
@@ -987,16 +988,39 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         self.cmd('init', self.repository_location)
         self.cmd('create', self.repository_location + '::test1', src_dir)
         self.cmd('create', self.repository_location + '::test2', src_dir)
+        # these are not really a checkpoints, but they look like some:
+        self.cmd('create', self.repository_location + '::test3.checkpoint', src_dir)
+        self.cmd('create', self.repository_location + '::test3.checkpoint.1', src_dir)
+        self.cmd('create', self.repository_location + '::test4.checkpoint', src_dir)
         output = self.cmd('prune', '-v', '--list', '--dry-run', self.repository_location, '--keep-daily=2')
-        self.assert_in('Keeping archive: test2', output)
         self.assert_in('Would prune:     test1', output)
+        # must keep the latest non-checkpoint archive:
+        self.assert_in('Keeping archive: test2', output)
+        # must keep the latest checkpoint archive:
+        self.assert_in('Keeping archive: test4.checkpoint', output)
         output = self.cmd('list', self.repository_location)
         self.assert_in('test1', output)
         self.assert_in('test2', output)
+        self.assert_in('test3.checkpoint', output)
+        self.assert_in('test3.checkpoint.1', output)
+        self.assert_in('test4.checkpoint', output)
         self.cmd('prune', self.repository_location, '--keep-daily=2')
         output = self.cmd('list', self.repository_location)
         self.assert_not_in('test1', output)
+        # the latest non-checkpoint archive must be still there:
         self.assert_in('test2', output)
+        # only the latest checkpoint archive must still be there:
+        self.assert_not_in('test3.checkpoint', output)
+        self.assert_not_in('test3.checkpoint.1', output)
+        self.assert_in('test4.checkpoint', output)
+        # now we supercede the latest checkpoint by a successful backup:
+        self.cmd('create', self.repository_location + '::test5', src_dir)
+        self.cmd('prune', self.repository_location, '--keep-daily=2')
+        output = self.cmd('list', self.repository_location)
+        # all checkpoints should be gone now:
+        self.assert_not_in('checkpoint', output)
+        # the latest archive must be still there
+        self.assert_in('test5', output)
 
     def test_prune_repository_save_space(self):
         self.cmd('init', self.repository_location)
@@ -1088,6 +1112,64 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         size, csize, path = output.split("\n")[1].split(" ")
         assert int(csize) < int(size)
 
+    def _get_sizes(self, compression, compressible, size=10000):
+        if compressible:
+            contents = b'X' * size
+        else:
+            contents = os.urandom(size)
+        self.create_regular_file('file', contents=contents)
+        self.cmd('init', '--encryption=none', self.repository_location)
+        archive = self.repository_location + '::test'
+        self.cmd('create', '-C', compression, archive, 'input')
+        output = self.cmd('list', '--format', '{size} {csize} {path}{NL}', archive)
+        size, csize, path = output.split("\n")[1].split(" ")
+        return int(size), int(csize)
+
+    def test_compression_none_compressible(self):
+        size, csize = self._get_sizes('none', compressible=True)
+        assert csize >= size
+        assert csize == size + 3
+
+    def test_compression_none_uncompressible(self):
+        size, csize = self._get_sizes('none', compressible=False)
+        assert csize >= size
+        assert csize == size + 3
+
+    def test_compression_zlib_compressible(self):
+        size, csize = self._get_sizes('zlib', compressible=True)
+        assert csize < size * 0.1
+        assert csize == 35
+
+    def test_compression_zlib_uncompressible(self):
+        size, csize = self._get_sizes('zlib', compressible=False)
+        assert csize >= size
+
+    def test_compression_auto_compressible(self):
+        size, csize = self._get_sizes('auto,zlib', compressible=True)
+        assert csize < size * 0.1
+        assert csize == 35  # same as compression 'zlib'
+
+    def test_compression_auto_uncompressible(self):
+        size, csize = self._get_sizes('auto,zlib', compressible=False)
+        assert csize >= size
+        assert csize == size + 3  # same as compression 'none'
+
+    def test_compression_lz4_compressible(self):
+        size, csize = self._get_sizes('lz4', compressible=True)
+        assert csize < size * 0.1
+
+    def test_compression_lz4_uncompressible(self):
+        size, csize = self._get_sizes('lz4', compressible=False)
+        assert csize >= size
+
+    def test_compression_lzma_compressible(self):
+        size, csize = self._get_sizes('lzma', compressible=True)
+        assert csize < size * 0.1
+
+    def test_compression_lzma_uncompressible(self):
+        size, csize = self._get_sizes('lzma', compressible=False)
+        assert csize >= size
+
     def test_break_lock(self):
         self.cmd('init', self.repository_location)
         self.cmd('break-lock', self.repository_location)
@@ -1398,6 +1480,12 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         info_after = self.cmd('info', self.repository_location + '::test')
         assert info_before == info_after  # includes archive ID
 
+    def test_with_lock(self):
+        self.cmd('init', self.repository_location)
+        lock_path = os.path.join(self.repository_path, 'lock.exclusive')
+        cmd = 'python3', '-c', 'import os, sys; sys.exit(42 if os.path.exists("%s") else 23)' % lock_path
+        self.cmd('with-lock', self.repository_location, *cmd, fork=True, exit_code=42)
+
 
 @unittest.skipUnless('binary' in BORG_EXES, 'no borg.exe available')
 class ArchiverTestCaseBinary(ArchiverTestCase):

+ 3 - 0
borg/testsuite/chunker.py

@@ -4,6 +4,9 @@ from ..chunker import Chunker, buzhash, buzhash_update
 from ..constants import *  # NOQA
 from . import BaseTestCase
 
+# Note: these tests are part of the self test, do not use or import py.test functionality here.
+#       See borg.selftest for details. If you add/remove test methods, update SELFTEST_COUNT
+
 
 class ChunkerTestCase(BaseTestCase):
 

+ 4 - 0
borg/testsuite/conftest.py

@@ -0,0 +1,4 @@
+from ..logger import setup_logging
+
+# Ensure that the loggers exist for all tests
+setup_logging()

+ 3 - 0
borg/testsuite/crypto.py

@@ -3,6 +3,9 @@ from binascii import hexlify, unhexlify
 from ..crypto import AES, bytes_to_long, bytes_to_int, long_to_bytes, hmac_sha256
 from . import BaseTestCase
 
+# Note: these tests are part of the self test, do not use or import py.test functionality here.
+#       See borg.selftest for details. If you add/remove test methods, update SELFTEST_COUNT
+
 
 class CryptoTestCase(BaseTestCase):
 

+ 17 - 15
borg/testsuite/hashindex.py

@@ -1,15 +1,16 @@
 import base64
 import hashlib
 import os
-import struct
 import tempfile
 import zlib
 
-import pytest
 from ..hashindex import NSIndex, ChunkIndex
 from .. import hashindex
 from . import BaseTestCase
 
+# Note: these tests are part of the self test, do not use or import py.test functionality here.
+#       See borg.selftest for details. If you add/remove test methods, update SELFTEST_COUNT
+
 
 def H(x):
     # make some 32byte long thing that depends on x
@@ -194,7 +195,7 @@ class HashIndexRefcountingTestCase(BaseTestCase):
     def test_decref_zero(self):
         idx1 = ChunkIndex()
         idx1[H(1)] = 0, 0, 0
-        with pytest.raises(AssertionError):
+        with self.assert_raises(AssertionError):
             idx1.decref(H(1))
 
     def test_incref_decref(self):
@@ -208,18 +209,18 @@ class HashIndexRefcountingTestCase(BaseTestCase):
 
     def test_setitem_raises(self):
         idx1 = ChunkIndex()
-        with pytest.raises(AssertionError):
+        with self.assert_raises(AssertionError):
             idx1[H(1)] = hashindex.MAX_VALUE + 1, 0, 0
 
     def test_keyerror(self):
         idx = ChunkIndex()
-        with pytest.raises(KeyError):
+        with self.assert_raises(KeyError):
             idx.incref(H(1))
-        with pytest.raises(KeyError):
+        with self.assert_raises(KeyError):
             idx.decref(H(1))
-        with pytest.raises(KeyError):
+        with self.assert_raises(KeyError):
             idx[H(1)]
-        with pytest.raises(OverflowError):
+        with self.assert_raises(OverflowError):
             idx.add(H(1), -1, 0, 0)
 
 
@@ -269,10 +270,11 @@ class HashIndexDataTestCase(BaseTestCase):
         assert idx1[H(3)] == (hashindex.MAX_VALUE, 6, 7)
 
 
-def test_nsindex_segment_limit():
-    idx = NSIndex()
-    with pytest.raises(AssertionError):
-        idx[H(1)] = hashindex.MAX_VALUE + 1, 0
-    assert H(1) not in idx
-    idx[H(2)] = hashindex.MAX_VALUE, 0
-    assert H(2) in idx
+class NSIndexTestCase(BaseTestCase):
+    def test_nsindex_segment_limit(self):
+        idx = NSIndex()
+        with self.assert_raises(AssertionError):
+            idx[H(1)] = hashindex.MAX_VALUE + 1, 0
+        assert H(1) not in idx
+        idx[H(2)] = hashindex.MAX_VALUE, 0
+        assert H(2) in idx

+ 50 - 2
borg/testsuite/helpers.py

@@ -10,11 +10,12 @@ import msgpack
 import msgpack.fallback
 import time
 
-from ..helpers import Location, format_file_size, format_timedelta, make_path_safe, \
+from ..helpers import Location, format_file_size, format_timedelta, make_path_safe, clean_lines, \
     prune_within, prune_split, get_cache_dir, get_keys_dir, Statistics, is_slow_msgpack, \
     yes, TRUISH, FALSISH, DEFAULTISH, \
-    StableDict, int_to_bigint, bigint_to_int, bin_to_hex, parse_timestamp, CompressionSpec, ChunkerParams, Chunk, \
+    StableDict, int_to_bigint, bigint_to_int, bin_to_hex, parse_timestamp, ChunkerParams, Chunk, \
     ProgressIndicatorPercent, ProgressIndicatorEndless, load_excludes, parse_pattern, \
+    CompressionSpec, CompressionDecider1, CompressionDecider2, \
     PatternMatcher, RegexPattern, PathPrefixPattern, FnmatchPattern, ShellPattern, partial_format, ChunkIteratorFileWrapper
 from . import BaseTestCase, environment_variable, FakeInputs
 
@@ -915,3 +916,50 @@ def test_chunk_file_wrapper():
     cfw = ChunkIteratorFileWrapper(iter([]))
     assert cfw.read(2) == b''
     assert cfw.exhausted
+
+
+def test_clean_lines():
+    conf = """\
+#comment
+data1 #data1
+data2
+
+ data3
+""".splitlines(keepends=True)
+    assert list(clean_lines(conf)) == ['data1 #data1', 'data2', 'data3', ]
+    assert list(clean_lines(conf, lstrip=False)) == ['data1 #data1', 'data2', ' data3', ]
+    assert list(clean_lines(conf, rstrip=False)) == ['data1 #data1\n', 'data2\n', 'data3\n', ]
+    assert list(clean_lines(conf, remove_empty=False)) == ['data1 #data1', 'data2', '', 'data3', ]
+    assert list(clean_lines(conf, remove_comments=False)) == ['#comment', 'data1 #data1', 'data2', 'data3', ]
+
+
+def test_compression_decider1():
+    default = CompressionSpec('zlib')
+    conf = """
+# use super-fast lz4 compression on huge VM files in this path:
+lz4:/srv/vm_disks
+
+# jpeg or zip files do not compress:
+none:*.jpeg
+none:*.zip
+""".splitlines()
+
+    cd = CompressionDecider1(default, [])  # no conf, always use default
+    assert cd.decide('/srv/vm_disks/linux')['name'] == 'zlib'
+    assert cd.decide('test.zip')['name'] == 'zlib'
+    assert cd.decide('test')['name'] == 'zlib'
+
+    cd = CompressionDecider1(default, [conf, ])
+    assert cd.decide('/srv/vm_disks/linux')['name'] == 'lz4'
+    assert cd.decide('test.zip')['name'] == 'none'
+    assert cd.decide('test')['name'] == 'zlib'  # no match in conf, use default
+
+
+def test_compression_decider2():
+    default = CompressionSpec('zlib')
+
+    cd = CompressionDecider2(default)
+    compr_spec, chunk = cd.decide(Chunk(None))
+    assert compr_spec['name'] == 'zlib'
+    compr_spec, chunk = cd.decide(Chunk(None, compress=CompressionSpec('lzma')))
+    assert compr_spec['name'] == 'lzma'

+ 25 - 1
borg/testsuite/key.py

@@ -7,7 +7,7 @@ from binascii import hexlify, unhexlify
 from ..crypto import bytes_to_long, num_aes_blocks
 from ..key import PlaintextKey, PassphraseKey, KeyfileKey
 from ..helpers import Location, Chunk, bin_to_hex
-from . import BaseTestCase
+from . import BaseTestCase, environment_variable
 
 
 class KeyTestCase(BaseTestCase):
@@ -34,9 +34,11 @@ class KeyTestCase(BaseTestCase):
     def setUp(self):
         self.tmppath = tempfile.mkdtemp()
         os.environ['BORG_KEYS_DIR'] = self.tmppath
+        self.tmppath2 = tempfile.mkdtemp()
 
     def tearDown(self):
         shutil.rmtree(self.tmppath)
+        shutil.rmtree(self.tmppath2)
 
     class MockRepository:
         class _Location:
@@ -71,6 +73,20 @@ class KeyTestCase(BaseTestCase):
         chunk = Chunk(b'foo')
         self.assert_equal(chunk, key2.decrypt(key.id_hash(chunk.data), key.encrypt(chunk)))
 
+    def test_keyfile_kfenv(self):
+        keyfile = os.path.join(self.tmppath2, 'keyfile')
+        with environment_variable(BORG_KEY_FILE=keyfile, BORG_PASSPHRASE='testkf'):
+            assert not os.path.exists(keyfile)
+            key = KeyfileKey.create(self.MockRepository(), self.MockArgs())
+            assert os.path.exists(keyfile)
+            chunk = Chunk(b'XXX')
+            chunk_id = key.id_hash(chunk.data)
+            chunk_cdata = key.encrypt(chunk)
+            key = KeyfileKey.detect(self.MockRepository(), chunk_cdata)
+            self.assert_equal(chunk, key.decrypt(chunk_id, chunk_cdata))
+            os.unlink(keyfile)
+            self.assert_raises(FileNotFoundError, KeyfileKey.detect, self.MockRepository(), chunk_cdata)
+
     def test_keyfile2(self):
         with open(os.path.join(os.environ['BORG_KEYS_DIR'], 'keyfile'), 'w') as fd:
             fd.write(self.keyfile2_key_file)
@@ -78,6 +94,14 @@ class KeyTestCase(BaseTestCase):
         key = KeyfileKey.detect(self.MockRepository(), self.keyfile2_cdata)
         self.assert_equal(key.decrypt(self.keyfile2_id, self.keyfile2_cdata).data, b'payload')
 
+    def test_keyfile2_kfenv(self):
+        keyfile = os.path.join(self.tmppath2, 'keyfile')
+        with open(keyfile, 'w') as fd:
+            fd.write(self.keyfile2_key_file)
+        with environment_variable(BORG_KEY_FILE=keyfile, BORG_PASSPHRASE='passphrase'):
+            key = KeyfileKey.detect(self.MockRepository(), self.keyfile2_cdata)
+            self.assert_equal(key.decrypt(self.keyfile2_id, self.keyfile2_cdata).data, b'payload')
+
     def test_passphrase(self):
         os.environ['BORG_PASSPHRASE'] = 'test'
         key = PassphraseKey.create(self.MockRepository(), None)

+ 18 - 0
docs/changes.rst

@@ -70,6 +70,24 @@ Other changes:
   - ChunkBuffer: add test for leaving partial chunk in buffer, fixes #945
 
 
+Version 1.0.3 (not released yet)
+--------------------------------
+
+Bug fixes:
+
+- prune: ignore checkpoints, #997
+- prune: fix bad validator, #942
+- fix capabilities extraction on Linux (set xattrs last, after chown())
+
+Other changes:
+
+- update readthedocs URLs, #991
+- add missing docs for "borg break-lock", #992
+- borg create help: add some words to about the archive name
+- borg create help: document format tags, #894
+- Vagrantfile: OS X: update osxfuse / install lzma package, #933
+
+
 Version 1.0.2
 -------------
 

+ 3 - 1
docs/development.rst

@@ -139,7 +139,9 @@ Usage::
    # To create and provision the VM:
    vagrant up OS
    # To create an ssh session to the VM:
-   vagrant ssh OS command
+   vagrant ssh OS
+   # To execute a command via ssh in the VM:
+   vagrant ssh OS -c "command args"
    # To shut down the VM:
    vagrant halt OS
    # To shut down and destroy the VM:

+ 62 - 6
docs/faq.rst

@@ -133,6 +133,50 @@ into the repository.
 Yes, as an attacker with access to the remote server could delete (or
 otherwise make unavailable) all your backups.
 
+How can I protect against a hacked backup client?
+-------------------------------------------------
+
+Assume you backup your backup client machine C to the backup server S and
+C gets hacked. In a simple push setup, the attacker could then use borg on
+C to delete all backups residing on S.
+
+These are your options to protect against that:
+
+- Do not allow to permanently delete data from the repo, see :ref:`append-only-mode`.
+- Use a pull-mode setup using ``ssh -R``, see :issue:`900`.
+- Mount C's filesystem on another machine and then create a backup of it.
+- Do not give C filesystem-level access to S.
+
+How can I protect against a hacked backup server?
+-------------------------------------------------
+
+Just in case you got the impression that pull-mode backups are way more safe
+than push-mode, you also need to consider the case that your backup server S
+gets hacked. In case S has access to a lot of clients C, that might bring you
+into even bigger trouble than a hacked backup client in the previous FAQ entry.
+
+These are your options to protect against that:
+
+- Use the standard push-mode setup (see also previous FAQ entry).
+- Mount (the repo part of) S's filesystem on C.
+- Do not give S file-system level access to C.
+- Have your backup server at a well protected place (maybe not reachable from
+  the internet), configure it safely, apply security updates, monitor it, ...
+
+How can I protect against theft, sabotage, lightning, fire, ...?
+----------------------------------------------------------------
+
+In general: if your only backup medium is nearby the backupped machine and
+always connected, you can easily get into trouble: they likely share the same
+fate if something goes really wrong.
+
+Thus:
+
+- have multiple backup media
+- have media disconnected from network, power, computer
+- have media at another place
+- have a relatively recent backup on your media
+
 Why do I get "connection closed by remote" after a while?
 ---------------------------------------------------------
 
@@ -140,8 +184,7 @@ When doing a backup to a remote server (using a ssh: repo URL), it sometimes
 stops after a while (some minutes, hours, ... - not immediately) with
 "connection closed by remote" error message. Why?
 
-That's a good question and we are trying to find a good answer in
-`ticket 636 <https://github.com/borgbackup/borg/issues/636>`_.
+That's a good question and we are trying to find a good answer in :issue:`636`.
 
 The borg cache eats way too much disk space, what can I do?
 -----------------------------------------------------------
@@ -180,12 +223,25 @@ Yes, |project_name| supports resuming backups.
 
 During a backup a special checkpoint archive named ``<archive-name>.checkpoint``
 is saved every checkpoint interval (the default value for this is 5
-minutes) containing all the data backed-up until that point. This means
+minutes) containing all the data backed-up until that point. This checkpoint
+archive is a valid archive, but it is only a partial backup. Having it
+in the repo until a successful, full backup is completed is useful because it
+references all the transmitted chunks up to the checkpoint time. This means
 that at most <checkpoint interval> worth of data needs to be retransmitted
-if a backup needs to be restarted.
+if you restart the backup.
+
+If a backup was interrupted, you do not need to do any special considerations,
+just invoke ``borg create`` as you always do. You may use the same archive name
+as in previous attempt or a different one (e.g. if you always include the current
+datetime), it does not matter.
+|project_name| always does full single-pass backups, so it will start again
+from the beginning - but it will be much faster, because some of the data was
+already stored into the repo (and is still referenced by the checkpoint
+archive), so it does not need to get transmitted and stored again.
 
 Once your backup has finished successfully, you can delete all
-``<archive-name>.checkpoint`` archives.
+``<archive-name>.checkpoint`` archives. If you run ``borg prune``, it will
+also care for deleting unneeded checkpoints.
 
 If it crashes with a UnicodeError, what can I do?
 -------------------------------------------------
@@ -217,7 +273,7 @@ control which we do not have (and also can't get, even if we wanted).
 So, if you need that, consider RAID or a filesystem that offers redundant
 storage or just make backups to different locations / different hardware.
 
-See also `ticket 225 <https://github.com/borgbackup/borg/issues/225>`_.
+See also :issue:`225`.
 
 Can |project_name| verify data integrity of a backup archive?
 -------------------------------------------------------------

+ 1 - 1
docs/installation.rst

@@ -49,7 +49,7 @@ Ubuntu       `16.04`_, backports (PPA): `15.10`_, `14.04`_ ``apt install borgbac
 .. _[community]: https://www.archlinux.org/packages/?name=borg
 .. _jessie-backports: https://packages.debian.org/jessie-backports/borgbackup
 .. _stretch: https://packages.debian.org/stretch/borgbackup
-.. _unstable/sid: https://packages.debian.org/sid/borgbackup
+.. _sid: https://packages.debian.org/sid/borgbackup
 .. _ebuild: https://packages.gentoo.org/packages/app-backup/borgbackup
 .. _Ports-Tree: http://www.freshports.org/archivers/py-borgbackup/
 .. _pkgsrc: http://pkgsrc.se/sysutils/py-borgbackup

+ 56 - 0
docs/misc/compression.conf

@@ -0,0 +1,56 @@
+# example config file for --compression-from option
+#
+# Format of non-comment / non-empty lines:
+# <compression-spec>:<path/filename pattern>
+# compression-spec is same format as for --compression option
+# path/filename pattern is same format as for --exclude option
+
+# archives / files:
+none:*.gz
+none:*.tgz
+none:*.bz2
+none:*.tbz2
+none:*.xz
+none:*.txz
+none:*.lzma
+none:*.lzo
+none:*.zip
+none:*.rar
+none:*.7z
+
+# audio:
+none:*.mp3
+none:*.ogg
+none:*.oga
+none:*.flac
+none:*.aac
+none:*.m4a
+
+# video:
+none:*.mp4
+none:*.mkv
+none:*.m4v
+none:*.avi
+none:*.mpg
+none:*.mpeg
+none:*.webm
+none:*.vob
+none:*.ts
+none:*.ogv
+none:*.mov
+none:*.flv
+none:*.ogm
+
+# pictures/images
+none:*.jpg
+none:*.jpeg
+none:*.png
+none:*.gif
+
+# disk images
+none:*.dmg
+
+# software archives
+none:*.rpm
+none:*.deb
+none:*.msi

+ 16 - 12
docs/quickstart.rst

@@ -105,23 +105,27 @@ server. The script also uses the :ref:`borg_prune` subcommand to maintain a
 certain number of old archives::
 
     #!/bin/sh
-    REPOSITORY=username@remoteserver.com:backup
-
-    # Backup all of /home and /var/www except a few
-    # excluded directories
-    borg create -v --stats                          \
-        $REPOSITORY::`hostname`-`date +%Y-%m-%d`    \
-        /home                                       \
-        /var/www                                    \
-        --exclude '/home/*/.cache'                  \
-        --exclude /home/Ben/Music/Justin\ Bieber    \
+
+    # setting this, so the repo does not need to be given on the commandline:
+    export BORG_REPO=username@remoteserver.com:backup
+
+    # setting this, so you won't be asked for your passphrase - make sure the
+    # script has appropriate owner/group and mode, e.g. root.root 600:
+    export BORG_PASSPHRASE=mysecret
+
+    # Backup most important stuff:
+    borg create -v --stats -C lz4 ::`hostname`-`date +%Y-%m-%d` \
+        /etc                                                    \
+        /home                                                   \
+        /var                                                    \
+        --exclude '/home/*/.cache'                              \
         --exclude '*.pyc'
 
     # Use the `prune` subcommand to maintain 7 daily, 4 weekly and 6 monthly
-    # archives of THIS machine. --prefix `hostname`- is very important to
+    # archives of THIS machine. Using --prefix is very important to
     # limit prune's operation to this machine's archives and not apply to
     # other machine's archives also.
-    borg prune -v $REPOSITORY --prefix `hostname`- \
+    borg prune -v --prefix `hostname`- \
         --keep-daily=7 --keep-weekly=4 --keep-monthly=6
 
 .. backup_compression:

+ 1 - 1
docs/resources.rst

@@ -36,6 +36,6 @@ Some of them refer to attic, but you can do the same stuff (and more) with borgb
 Software
 --------
 
-- `BorgWeb - a very simple web UI for BorgBackup <https://borgbackup.github.io/borgweb/>`_
+- `BorgWeb - a very simple web UI for BorgBackup <https://borgweb.readthedocs.io/>`_
 - some other stuff found at the `BorgBackup Github organisation <https://github.com/borgbackup/>`_
 - `atticmatic <https://github.com/witten/atticmatic/>`_ (includes borgmatic)

+ 22 - 9
docs/usage.rst

@@ -101,9 +101,11 @@ Some automatic "answerers" (if set, they automatically answer confirmation quest
     answer or ask you interactively, depending on whether retries are allowed (they by default are
     allowed). So please test your scripts interactively before making them a non-interactive script.
 
-Directories:
+Directories and files:
     BORG_KEYS_DIR
         Default to '~/.config/borg/keys'. This directory contains keys for encrypted repositories.
+    BORG_KEY_FILE
+        When set, use the given filename as repository key file.
     BORG_CACHE_DIR
         Default to '~/.cache/borg'. This directory contains the local cache and might need a lot
         of space for dealing with big repositories).
@@ -309,10 +311,9 @@ Examples
     # Even slower, even higher compression (N = 0..9)
     $ borg create --compression lzma,N /path/to/repo::arch ~
 
-    # Format tags available for archive name:
-    # {now}, {utcnow}, {fqdn}, {hostname}, {user}, {pid}
-    # add short hostname, backup username and current unixtime (seconds from epoch)
-    $ borg create  /path/to/repo::{hostname}-{user}-{now:%s} ~
+    # Use short hostname, user name and current time in archive name
+    $ borg create /path/to/repo::{hostname}-{user}-{now} ~
+    $ borg create /path/to/repo::{hostname}-{user}-{now:%Y-%m-%d_%H:%M:%S} ~
 
 .. include:: usage/extract.rst.inc
 
@@ -326,6 +327,9 @@ Examples
     # Extract entire archive and list files while processing
     $ borg extract -v --list /path/to/repo::my-files
 
+    # Verify whether an archive could be successfully extracted, but do not write files to disk
+    $ borg extract --dry-run /path/to/repo::my-files
+
     # Extract the "src" directory
     $ borg extract /path/to/repo::my-files home/USERNAME/src
 
@@ -645,6 +649,12 @@ Examples
     ...
 
 
+.. include:: usage/with-lock.rst.inc
+
+
+.. include:: usage/break-lock.rst.inc
+
+
 Miscellaneous Help
 ------------------
 
@@ -814,13 +824,16 @@ Now, let's see how to restore some LVs from such a backup. ::
     $ borg extract --stdout /path/to/repo::arch dev/vg0/home-snapshot > /dev/vg0/home
 
 
+.. _append-only-mode:
+
 Append-only mode
 ~~~~~~~~~~~~~~~~
 
 A repository can be made "append-only", which means that Borg will never overwrite or
-delete committed data. This is useful for scenarios where multiple machines back up to
-a central backup server using ``borg serve``, since a hacked machine cannot delete
-backups permanently.
+delete committed data (append-only refers to the segment files, but borg will also
+reject to delete the repository completely). This is useful for scenarios where a
+backup client machine backups remotely to a backup server using ``borg serve``, since
+a hacked client machine cannot delete backups on the server permanently.
 
 To activate append-only mode, edit the repository ``config`` file and add a line
 ``append_only=1`` to the ``[repository]`` section (or edit the line if it exists).
@@ -881,6 +894,6 @@ repository. Make sure that backup client machines only get to access the reposit
 Ensure that no remote access is possible if the repository is temporarily set to normal mode
 for e.g. regular pruning.
 
-Further protections can be implemented, but are outside of Borgs scope. For example,
+Further protections can be implemented, but are outside of Borg's scope. For example,
 file system snapshots or wrapping ``borg serve`` to set special permissions or ACLs on
 new data files.

+ 32 - 0
docs/usage/with-lock.rst.inc

@@ -0,0 +1,32 @@
+.. _borg_with-lock:
+
+borg with-lock
+--------------
+::
+
+    borg with-lock <options> REPOSITORY COMMAND ARGS
+
+positional arguments
+    REPOSITORY
+        repository to lock
+    COMMAND
+        command to run
+    ARGS
+        command arguments
+
+`Common options`_
+    |
+
+Description
+~~~~~~~~~~~
+
+This command runs a user-specified command while the repository lock is held.
+
+It will first try to acquire the lock (make sure that no other operation is
+running in the repo), then execute the given command as a subprocess and wait
+for its termination, release the lock and return the user command's return
+code as borg's return code.
+
+Note: if you copy a repository with the lock held, the lock will be present in
+      the copy, obviously. Thus, before using borg on the copy, you need to
+      use "borg break-lock" on it.

+ 7 - 4
setup.py

@@ -117,12 +117,13 @@ if sys.platform == 'win32':
     windowsIncludeDirs.append(os.path.abspath(os.path.join(gccpath, "..")))
     windowsIncludeDirs.append(os.path.abspath(os.path.join(gccpath, "..", "..")))
 
-
 possible_openssl_prefixes = None
 if sys.platform == 'win32':
     possible_openssl_prefixes = windowsIncludeDirs
 else:
-    possible_openssl_prefixes = ['/usr', '/usr/local', '/usr/local/opt/openssl', '/usr/local/ssl', '/usr/local/openssl', '/usr/local/borg', '/opt/local']
+    possible_openssl_prefixes = ['/usr', '/usr/local', '/usr/local/opt/openssl', '/usr/local/ssl', '/usr/local/openssl',
+                                 '/usr/local/borg', '/opt/local', '/opt/pkg', ]
+
 if os.environ.get('BORG_OPENSSL_PREFIX'):
     possible_openssl_prefixes.insert(0, os.environ.get('BORG_OPENSSL_PREFIX'))
 ssl_prefix = detect_openssl(possible_openssl_prefixes)
@@ -135,7 +136,9 @@ possible_lz4_prefixes = None
 if sys.platform == 'win32':
     possible_lz4_prefixes = windowsIncludeDirs
 else:
-    possible_lz4_prefixes = ['/usr', '/usr/local', '/usr/local/opt/lz4', '/usr/local/lz4', '/usr/local/borg', '/opt/local']
+    possible_lz4_prefixes = ['/usr', '/usr/local', '/usr/local/opt/lz4', '/usr/local/lz4',
+                             '/usr/local/borg', '/opt/local', '/opt/pkg', ]
+
 if os.environ.get('BORG_LZ4_PREFIX'):
     possible_lz4_prefixes.insert(0, os.environ.get('BORG_LZ4_PREFIX'))
 lz4_prefix = detect_lz4(possible_lz4_prefixes)
@@ -327,7 +330,7 @@ setup(
     },
     author='The Borg Collective (see AUTHORS file)',
     author_email='borgbackup@python.org',
-    url='https://borgbackup.readthedocs.org/',
+    url='https://borgbackup.readthedocs.io/',
     description='Deduplicated, encrypted, authenticated and compressed backups',
     long_description=long_description,
     license='BSD',