Browse Source

Merge pull request #1036 from Anakonda/windows

Update windows branch
enkore 9 years ago
parent
commit
7116888780

+ 1 - 0
.travis/install.sh

@@ -15,6 +15,7 @@ if [[ "$(uname -s)" == 'Darwin' ]]; then
     fi
     fi
 
 
     brew install lz4
     brew install lz4
+    brew install xz  # required for python lzma module
     brew outdated pyenv || brew upgrade pyenv
     brew outdated pyenv || brew upgrade pyenv
 
 
     case "${TOXENV}" in
     case "${TOXENV}" in

+ 1 - 1
README.rst

@@ -107,7 +107,7 @@ Now doing another backup, just to show off the great deduplication::
     -----------------------------------------------------------------------------
     -----------------------------------------------------------------------------
 
 
 
 
-For a graphical frontend refer to our complementary project `BorgWeb <https://borgbackup.github.io/borgweb/>`_.
+For a graphical frontend refer to our complementary project `BorgWeb <https://borgweb.readthedocs.io/>`_.
 
 
 Links
 Links
 =====
 =====

+ 3 - 2
Vagrantfile

@@ -54,14 +54,15 @@ def packages_darwin
     # install all the (security and other) updates
     # install all the (security and other) updates
     sudo softwareupdate --install --all
     sudo softwareupdate --install --all
     # get osxfuse 3.0.x pre-release code from github:
     # get osxfuse 3.0.x pre-release code from github:
-    curl -s -L https://github.com/osxfuse/osxfuse/releases/download/osxfuse-3.0.9/osxfuse-3.0.9.dmg >osxfuse.dmg
+    curl -s -L https://github.com/osxfuse/osxfuse/releases/download/osxfuse-3.2.0/osxfuse-3.2.0.dmg >osxfuse.dmg
     MOUNTDIR=$(echo `hdiutil mount osxfuse.dmg | tail -1 | awk '{$1="" ; print $0}'` | xargs -0 echo) \
     MOUNTDIR=$(echo `hdiutil mount osxfuse.dmg | tail -1 | awk '{$1="" ; print $0}'` | xargs -0 echo) \
-    && sudo installer -pkg "${MOUNTDIR}/Extras/FUSE for OS X 3.0.9.pkg" -target /
+    && sudo installer -pkg "${MOUNTDIR}/Extras/FUSE for OS X 3.2.0.pkg" -target /
     sudo chown -R vagrant /usr/local  # brew must be able to create stuff here
     sudo chown -R vagrant /usr/local  # brew must be able to create stuff here
     ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)"
     ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)"
     brew update
     brew update
     brew install openssl
     brew install openssl
     brew install lz4
     brew install lz4
+    brew install xz  # required for python lzma module
     brew install fakeroot
     brew install fakeroot
     brew install git
     brew install git
     brew install pkgconfig
     brew install pkgconfig

+ 4 - 4
borg/_chunker.c

@@ -184,9 +184,9 @@ chunker_fill(Chunker *c)
         length = c->bytes_read - offset;
         length = c->bytes_read - offset;
         #if ( ( _XOPEN_SOURCE >= 600 || _POSIX_C_SOURCE >= 200112L ) && defined(POSIX_FADV_DONTNEED) )
         #if ( ( _XOPEN_SOURCE >= 600 || _POSIX_C_SOURCE >= 200112L ) && defined(POSIX_FADV_DONTNEED) )
 
 
-	// Only do it once per run.
-	if (pagemask == 0)
-		pagemask = getpagesize() - 1;
+        // Only do it once per run.
+        if (pagemask == 0)
+            pagemask = getpagesize() - 1;
 
 
         // We tell the OS that we do not need the data that we just have read any
         // We tell the OS that we do not need the data that we just have read any
         // more (that it maybe has in the cache). This avoids that we spoil the
         // more (that it maybe has in the cache). This avoids that we spoil the
@@ -207,7 +207,7 @@ chunker_fill(Chunker *c)
             // fadvise. This will cancel the final page and is not part
             // fadvise. This will cancel the final page and is not part
             // of the above workaround.
             // of the above workaround.
             overshoot = 0;
             overshoot = 0;
-	}
+        }
 
 
         posix_fadvise(c->fh, offset & ~pagemask, length - overshoot, POSIX_FADV_DONTNEED);
         posix_fadvise(c->fh, offset & ~pagemask, length - overshoot, POSIX_FADV_DONTNEED);
         #endif
         #endif

+ 20 - 11
borg/archive.py

@@ -15,13 +15,14 @@ import sys
 import time
 import time
 from io import BytesIO
 from io import BytesIO
 from . import xattr
 from . import xattr
-from .compress import Compressor, COMPR_BUFFER
+from .compress import COMPR_BUFFER
 from .constants import *  # NOQA
 from .constants import *  # NOQA
 from .helpers import Chunk, Error, uid2user, user2uid, gid2group, group2gid, \
 from .helpers import Chunk, Error, uid2user, user2uid, gid2group, group2gid, \
     parse_timestamp, to_localtime, format_time, format_timedelta, safe_encode, safe_decode, \
     parse_timestamp, to_localtime, format_time, format_timedelta, safe_encode, safe_decode, \
     Manifest, Statistics, decode_dict, make_path_safe, StableDict, int_to_bigint, bigint_to_int, bin_to_hex, \
     Manifest, Statistics, decode_dict, make_path_safe, StableDict, int_to_bigint, bigint_to_int, bin_to_hex, \
     ProgressIndicatorPercent, ChunkIteratorFileWrapper, remove_surrogates, log_multi, \
     ProgressIndicatorPercent, ChunkIteratorFileWrapper, remove_surrogates, log_multi, \
-    PathPrefixPattern, FnmatchPattern, open_item, file_status, format_file_size, consume
+    PathPrefixPattern, FnmatchPattern, open_item, file_status, format_file_size, consume, \
+    CompressionDecider1, CompressionDecider2, CompressionSpec
 from .repository import Repository
 from .repository import Repository
 from .platform import acl_get, acl_set
 from .platform import acl_get, acl_set
 from .chunker import Chunker
 from .chunker import Chunker
@@ -125,7 +126,7 @@ class Archive:
 
 
     def __init__(self, repository, key, manifest, name, cache=None, create=False,
     def __init__(self, repository, key, manifest, name, cache=None, create=False,
                  checkpoint_interval=300, numeric_owner=False, progress=False,
                  checkpoint_interval=300, numeric_owner=False, progress=False,
-                 chunker_params=CHUNKER_PARAMS, start=None, end=None):
+                 chunker_params=CHUNKER_PARAMS, start=None, end=None, compression=None, compression_files=None):
         self.cwd = os.getcwd()
         self.cwd = os.getcwd()
         self.key = key
         self.key = key
         self.repository = repository
         self.repository = repository
@@ -148,6 +149,9 @@ class Archive:
         if create:
         if create:
             self.items_buffer = CacheChunkBuffer(self.cache, self.key, self.stats)
             self.items_buffer = CacheChunkBuffer(self.cache, self.key, self.stats)
             self.chunker = Chunker(self.key.chunk_seed, *chunker_params)
             self.chunker = Chunker(self.key.chunk_seed, *chunker_params)
+            self.compression_decider1 = CompressionDecider1(compression or CompressionSpec('none'),
+                                                            compression_files or [])
+            key.compression_decider2 = CompressionDecider2(compression or CompressionSpec('none'))
             if name in manifest.archives:
             if name in manifest.archives:
                 raise self.AlreadyExists(name)
                 raise self.AlreadyExists(name)
             self.last_checkpoint = time.time()
             self.last_checkpoint = time.time()
@@ -601,11 +605,15 @@ Number of files: {0.stats.nfiles}'''.format(
         }
         }
         # Only chunkify the file if needed
         # Only chunkify the file if needed
         if chunks is None:
         if chunks is None:
+            compress = self.compression_decider1.decide(path)
+            logger.debug('%s -> compression %s', path, compress['name'])
             fh = Archive._open_rb(path)
             fh = Archive._open_rb(path)
             with os.fdopen(fh, 'rb') as fd:
             with os.fdopen(fh, 'rb') as fd:
                 chunks = []
                 chunks = []
                 for data in self.chunker.chunkify(fd, fh):
                 for data in self.chunker.chunkify(fd, fh):
-                    chunks.append(cache.add_chunk(self.key.id_hash(data), Chunk(data), self.stats))
+                    chunks.append(cache.add_chunk(self.key.id_hash(data),
+                                                  Chunk(data, compress=compress),
+                                                  self.stats))
                     if self.show_progress:
                     if self.show_progress:
                         self.stats.show_progress(item=item, dt=0.2)
                         self.stats.show_progress(item=item, dt=0.2)
             cache.memorize_file(path_hash, st, [c.id for c in chunks])
             cache.memorize_file(path_hash, st, [c.id for c in chunks])
@@ -948,7 +956,7 @@ class ArchiveRecreater:
 
 
     def __init__(self, repository, manifest, key, cache, matcher,
     def __init__(self, repository, manifest, key, cache, matcher,
                  exclude_caches=False, exclude_if_present=None, keep_tag_files=False,
                  exclude_caches=False, exclude_if_present=None, keep_tag_files=False,
-                 chunker_params=None, compression=None,
+                 chunker_params=None, compression=None, compression_files=None,
                  dry_run=False, stats=False, progress=False, file_status_printer=None):
                  dry_run=False, stats=False, progress=False, file_status_printer=None):
         self.repository = repository
         self.repository = repository
         self.key = key
         self.key = key
@@ -961,12 +969,12 @@ class ArchiveRecreater:
         self.keep_tag_files = keep_tag_files
         self.keep_tag_files = keep_tag_files
 
 
         self.chunker_params = chunker_params or CHUNKER_PARAMS
         self.chunker_params = chunker_params or CHUNKER_PARAMS
-        self.compression = compression or dict(name='none')
-        self.seen_chunks = set()
         self.recompress = bool(compression)
         self.recompress = bool(compression)
-        compr_args = dict(buffer=COMPR_BUFFER)
-        compr_args.update(self.compression)
-        key.compressor = Compressor(**compr_args)
+        self.compression = compression or CompressionSpec('none')
+        self.seen_chunks = set()
+        self.compression_decider1 = CompressionDecider1(compression or CompressionSpec('none'),
+                                                            compression_files or [])
+        key.compression_decider2 = CompressionDecider2(compression or CompressionSpec('none'))
 
 
         self.autocommit_threshold = max(self.AUTOCOMMIT_THRESHOLD, self.cache.chunks_stored_size() / 100)
         self.autocommit_threshold = max(self.AUTOCOMMIT_THRESHOLD, self.cache.chunks_stored_size() / 100)
         logger.debug("Autocommit threshold: %s", format_file_size(self.autocommit_threshold))
         logger.debug("Autocommit threshold: %s", format_file_size(self.autocommit_threshold))
@@ -1054,6 +1062,7 @@ class ArchiveRecreater:
 
 
     def process_chunks(self, archive, target, item):
     def process_chunks(self, archive, target, item):
         """Return new chunk ID list for 'item'."""
         """Return new chunk ID list for 'item'."""
+        # TODO: support --compression-from
         if not self.recompress and not target.recreate_rechunkify:
         if not self.recompress and not target.recreate_rechunkify:
             for chunk_id, size, csize in item[b'chunks']:
             for chunk_id, size, csize in item[b'chunks']:
                 self.cache.chunk_incref(chunk_id, target.stats)
                 self.cache.chunk_incref(chunk_id, target.stats)
@@ -1248,7 +1257,7 @@ class ArchiveRecreater:
     def create_target_archive(self, name):
     def create_target_archive(self, name):
         target = Archive(self.repository, self.key, self.manifest, name, create=True,
         target = Archive(self.repository, self.key, self.manifest, name, create=True,
                           progress=self.progress, chunker_params=self.chunker_params, cache=self.cache,
                           progress=self.progress, chunker_params=self.chunker_params, cache=self.cache,
-                          checkpoint_interval=0)
+                          checkpoint_interval=0, compression=self.compression)
         target.recreate_partial_chunks = None
         target.recreate_partial_chunks = None
         target.recreate_uncomitted_bytes = 0
         target.recreate_uncomitted_bytes = 0
         return target
         return target

+ 97 - 12
borg/archiver.py

@@ -9,9 +9,11 @@ import hashlib
 import inspect
 import inspect
 import io
 import io
 import os
 import os
+import re
 import shlex
 import shlex
 import signal
 import signal
 import stat
 import stat
+import subprocess
 import sys
 import sys
 import textwrap
 import textwrap
 import traceback
 import traceback
@@ -34,6 +36,7 @@ from .constants import *  # NOQA
 from .key import key_creator, RepoKey, PassphraseKey
 from .key import key_creator, RepoKey, PassphraseKey
 from .archive import Archive, ArchiveChecker, ArchiveRecreater
 from .archive import Archive, ArchiveChecker, ArchiveRecreater
 from .remote import RepositoryServer, RemoteRepository, cache_if_remote
 from .remote import RepositoryServer, RemoteRepository, cache_if_remote
+from .selftest import selftest
 from .hashindex import ChunkIndexEntry
 from .hashindex import ChunkIndexEntry
 
 
 has_lchflags = hasattr(os, 'lchflags')
 has_lchflags = hasattr(os, 'lchflags')
@@ -285,14 +288,12 @@ class Archiver:
         dry_run = args.dry_run
         dry_run = args.dry_run
         t0 = datetime.utcnow()
         t0 = datetime.utcnow()
         if not dry_run:
         if not dry_run:
-            compr_args = dict(buffer=COMPR_BUFFER)
-            compr_args.update(args.compression)
-            key.compressor = Compressor(**compr_args)
             with Cache(repository, key, manifest, do_files=args.cache_files, lock_wait=self.lock_wait) as cache:
             with Cache(repository, key, manifest, do_files=args.cache_files, lock_wait=self.lock_wait) as cache:
                 archive = Archive(repository, key, manifest, args.location.archive, cache=cache,
                 archive = Archive(repository, key, manifest, args.location.archive, cache=cache,
                                   create=True, checkpoint_interval=args.checkpoint_interval,
                                   create=True, checkpoint_interval=args.checkpoint_interval,
                                   numeric_owner=args.numeric_owner, progress=args.progress,
                                   numeric_owner=args.numeric_owner, progress=args.progress,
-                                  chunker_params=args.chunker_params, start=t0)
+                                  chunker_params=args.chunker_params, start=t0,
+                                  compression=args.compression, compression_files=args.compression_files)
                 create_inner(archive, cache)
                 create_inner(archive, cache)
         else:
         else:
             create_inner(None, None)
             create_inner(None, None)
@@ -788,9 +789,20 @@ class Archiver:
                              '"keep-secondly", "keep-minutely", "keep-hourly", "keep-daily", '
                              '"keep-secondly", "keep-minutely", "keep-hourly", "keep-daily", '
                              '"keep-weekly", "keep-monthly" or "keep-yearly" settings must be specified.')
                              '"keep-weekly", "keep-monthly" or "keep-yearly" settings must be specified.')
             return self.exit_code
             return self.exit_code
-        archives = manifest.list_archive_infos(sort_by='ts', reverse=True)  # just a ArchiveInfo list
+        archives_checkpoints = manifest.list_archive_infos(sort_by='ts', reverse=True)  # just a ArchiveInfo list
         if args.prefix:
         if args.prefix:
-            archives = [archive for archive in archives if archive.name.startswith(args.prefix)]
+            archives_checkpoints = [arch for arch in archives_checkpoints if arch.name.startswith(args.prefix)]
+        is_checkpoint = re.compile(r'\.checkpoint(\.\d+)?$').search
+        checkpoints = [arch for arch in archives_checkpoints if is_checkpoint(arch.name)]
+        # keep the latest checkpoint, if there is no later non-checkpoint archive
+        if archives_checkpoints and checkpoints and archives_checkpoints[0] is checkpoints[0]:
+            keep_checkpoints = checkpoints[:1]
+        else:
+            keep_checkpoints = []
+        checkpoints = set(checkpoints)
+        # ignore all checkpoint archives to avoid keeping one (which is an incomplete backup)
+        # that is newer than a successfully completed backup - and killing the successful backup.
+        archives = [arch for arch in archives_checkpoints if arch not in checkpoints]
         keep = []
         keep = []
         if args.within:
         if args.within:
             keep += prune_within(archives, args.within)
             keep += prune_within(archives, args.within)
@@ -808,11 +820,10 @@ class Archiver:
             keep += prune_split(archives, '%Y-%m', args.monthly, keep)
             keep += prune_split(archives, '%Y-%m', args.monthly, keep)
         if args.yearly:
         if args.yearly:
             keep += prune_split(archives, '%Y', args.yearly, keep)
             keep += prune_split(archives, '%Y', args.yearly, keep)
-
-        to_delete = set(archives) - set(keep)
+        to_delete = (set(archives) | checkpoints) - (set(keep) | set(keep_checkpoints))
         stats = Statistics()
         stats = Statistics()
         with Cache(repository, key, manifest, do_files=args.cache_files, lock_wait=self.lock_wait) as cache:
         with Cache(repository, key, manifest, do_files=args.cache_files, lock_wait=self.lock_wait) as cache:
-            for archive in archives:
+            for archive in archives_checkpoints:
                 if archive in to_delete:
                 if archive in to_delete:
                     if args.dry_run:
                     if args.dry_run:
                         if args.output_list:
                         if args.output_list:
@@ -874,8 +885,8 @@ class Archiver:
 
 
         recreater = ArchiveRecreater(repository, manifest, key, cache, matcher,
         recreater = ArchiveRecreater(repository, manifest, key, cache, matcher,
                                      exclude_caches=args.exclude_caches, exclude_if_present=args.exclude_if_present,
                                      exclude_caches=args.exclude_caches, exclude_if_present=args.exclude_if_present,
-                                     keep_tag_files=args.keep_tag_files,
-                                     compression=args.compression, chunker_params=args.chunker_params,
+                                     keep_tag_files=args.keep_tag_files, chunker_params=args.chunker_params,
+                                     compression=args.compression, compression_files=args.compression_files,
                                      progress=args.progress, stats=args.stats,
                                      progress=args.progress, stats=args.stats,
                                      file_status_printer=self.print_file_status,
                                      file_status_printer=self.print_file_status,
                                      dry_run=args.dry_run)
                                      dry_run=args.dry_run)
@@ -902,6 +913,21 @@ class Archiver:
         cache.commit()
         cache.commit()
         return self.exit_code
         return self.exit_code
 
 
+    @with_repository(manifest=False)
+    def do_with_lock(self, args, repository):
+        """run a user specified command with the repository lock held"""
+        # re-write manifest to start a repository transaction - this causes a
+        # lock upgrade to exclusive for remote (and also for local) repositories.
+        # by using manifest=False in the decorator, we avoid having to require
+        # the encryption key (and can operate just with encrypted data).
+        data = repository.get(Manifest.MANIFEST_ID)
+        repository.put(Manifest.MANIFEST_ID, data)
+        try:
+            # we exit with the return code we get from the subprocess
+            return subprocess.call([args.command] + args.args)
+        finally:
+            repository.rollback()
+
     @with_repository()
     @with_repository()
     def do_debug_dump_archive_items(self, args, repository, manifest, key):
     def do_debug_dump_archive_items(self, args, repository, manifest, key):
         """dump (decrypted, decompressed) archive items metadata (not: data)"""
         """dump (decrypted, decompressed) archive items metadata (not: data)"""
@@ -1265,6 +1291,12 @@ class Archiver:
         traversing all paths specified. The archive will consume almost no disk space for
         traversing all paths specified. The archive will consume almost no disk space for
         files or parts of files that have already been stored in other archives.
         files or parts of files that have already been stored in other archives.
 
 
+        The archive name needs to be unique. It must not end in '.checkpoint' or
+        '.checkpoint.N' (with N being a number), because these names are used for
+        checkpoints and treated in special ways.
+
+        In the archive name, you may use the following format tags:
+        {now}, {utcnow}, {fqdn}, {hostname}, {user}, {pid}
 
 
         To speed up pulling backups over sshfs and similar network file systems which do
         To speed up pulling backups over sshfs and similar network file systems which do
         not provide correct inode information the --ignore-inode flag can be used. This
         not provide correct inode information the --ignore-inode flag can be used. This
@@ -1350,11 +1382,16 @@ class Archiver:
                                    type=CompressionSpec, default=dict(name='none'), metavar='COMPRESSION',
                                    type=CompressionSpec, default=dict(name='none'), metavar='COMPRESSION',
                                    help='select compression algorithm (and level):\n'
                                    help='select compression algorithm (and level):\n'
                                         'none == no compression (default),\n'
                                         'none == no compression (default),\n'
+                                        'auto,C[,L] == built-in heuristic decides between none or C[,L] - with C[,L]\n'
+                                        '              being any valid compression algorithm (and optional level),\n'
                                         'lz4 == lz4,\n'
                                         'lz4 == lz4,\n'
                                         'zlib == zlib (default level 6),\n'
                                         'zlib == zlib (default level 6),\n'
                                         'zlib,0 .. zlib,9 == zlib (with level 0..9),\n'
                                         'zlib,0 .. zlib,9 == zlib (with level 0..9),\n'
                                         'lzma == lzma (default level 6),\n'
                                         'lzma == lzma (default level 6),\n'
                                         'lzma,0 .. lzma,9 == lzma (with level 0..9).')
                                         'lzma,0 .. lzma,9 == lzma (with level 0..9).')
+        archive_group.add_argument('--compression-from', dest='compression_files',
+                                   type=argparse.FileType('r'), action='append',
+                                   metavar='COMPRESSIONCONFIG', help='read compression patterns from COMPRESSIONCONFIG, one per line')
 
 
         subparser.add_argument('location', metavar='ARCHIVE',
         subparser.add_argument('location', metavar='ARCHIVE',
                                type=location_validator(archive=True),
                                type=location_validator(archive=True),
@@ -1369,6 +1406,10 @@ class Archiver:
         be restricted by using the ``--exclude`` option.
         be restricted by using the ``--exclude`` option.
 
 
         See the output of the "borg help patterns" command for more help on exclude patterns.
         See the output of the "borg help patterns" command for more help on exclude patterns.
+
+        By using ``--dry-run``, you can do all extraction steps except actually writing the
+        output data: reading metadata and data chunks from the repo, checking the hash/hmac,
+        decrypting, decompressing.
         """)
         """)
         subparser = subparsers.add_parser('extract', parents=[common_parser], add_help=False,
         subparser = subparsers.add_parser('extract', parents=[common_parser], add_help=False,
                                           description=self.do_extract.__doc__,
                                           description=self.do_extract.__doc__,
@@ -1603,11 +1644,20 @@ class Archiver:
         any of the specified retention options. This command is normally used by
         any of the specified retention options. This command is normally used by
         automated backup scripts wanting to keep a certain number of historic backups.
         automated backup scripts wanting to keep a certain number of historic backups.
 
 
+        Also, prune automatically removes checkpoint archives (incomplete archives left
+        behind by interrupted backup runs) except if the checkpoint is the latest
+        archive (and thus still needed). Checkpoint archives are not considered when
+        comparing archive counts against the retention limits (--keep-*).
+
         If a prefix is set with -P, then only archives that start with the prefix are
         If a prefix is set with -P, then only archives that start with the prefix are
         considered for deletion and only those archives count towards the totals
         considered for deletion and only those archives count towards the totals
         specified by the rules.
         specified by the rules.
         Otherwise, *all* archives in the repository are candidates for deletion!
         Otherwise, *all* archives in the repository are candidates for deletion!
 
 
+        If you have multiple sequences of archives with different data sets (e.g.
+        from different machines) in one shared repository, use one prune call per
+        data set that matches only the respective archives using the -P option.
+
         The "--keep-within" option takes an argument of the form "<int><char>",
         The "--keep-within" option takes an argument of the form "<int><char>",
         where char is "H", "d", "w", "m", "y". For example, "--keep-within 2d" means
         where char is "H", "d", "w", "m", "y". For example, "--keep-within 2d" means
         to keep all archives that were created within the past 48 hours.
         to keep all archives that were created within the past 48 hours.
@@ -1816,11 +1866,16 @@ class Archiver:
                                    type=CompressionSpec, default=None, metavar='COMPRESSION',
                                    type=CompressionSpec, default=None, metavar='COMPRESSION',
                                    help='select compression algorithm (and level):\n'
                                    help='select compression algorithm (and level):\n'
                                         'none == no compression (default),\n'
                                         'none == no compression (default),\n'
+                                        'auto,C[,L] == built-in heuristic decides between none or C[,L] - with C[,L]\n'
+                                        '              being any valid compression algorithm (and optional level),\n'
                                         'lz4 == lz4,\n'
                                         'lz4 == lz4,\n'
                                         'zlib == zlib (default level 6),\n'
                                         'zlib == zlib (default level 6),\n'
                                         'zlib,0 .. zlib,9 == zlib (with level 0..9),\n'
                                         'zlib,0 .. zlib,9 == zlib (with level 0..9),\n'
                                         'lzma == lzma (default level 6),\n'
                                         'lzma == lzma (default level 6),\n'
                                         'lzma,0 .. lzma,9 == lzma (with level 0..9).')
                                         'lzma,0 .. lzma,9 == lzma (with level 0..9).')
+        archive_group.add_argument('--compression-from', dest='compression_files',
+                                   type=argparse.FileType('r'), action='append',
+                                   metavar='COMPRESSIONCONFIG', help='read compression patterns from COMPRESSIONCONFIG, one per line')
         archive_group.add_argument('--chunker-params', dest='chunker_params',
         archive_group.add_argument('--chunker-params', dest='chunker_params',
                                    type=ChunkerParams, default=None,
                                    type=ChunkerParams, default=None,
                                    metavar='CHUNK_MIN_EXP,CHUNK_MAX_EXP,HASH_MASK_BITS,HASH_WINDOW_SIZE',
                                    metavar='CHUNK_MIN_EXP,CHUNK_MAX_EXP,HASH_MASK_BITS,HASH_WINDOW_SIZE',
@@ -1832,6 +1887,32 @@ class Archiver:
         subparser.add_argument('paths', metavar='PATH', nargs='*', type=str,
         subparser.add_argument('paths', metavar='PATH', nargs='*', type=str,
                                help='paths to recreate; patterns are supported')
                                help='paths to recreate; patterns are supported')
 
 
+        with_lock_epilog = textwrap.dedent("""
+        This command runs a user-specified command while the repository lock is held.
+
+        It will first try to acquire the lock (make sure that no other operation is
+        running in the repo), then execute the given command as a subprocess and wait
+        for its termination, release the lock and return the user command's return
+        code as borg's return code.
+
+        Note: if you copy a repository with the lock held, the lock will be present in
+              the copy, obviously. Thus, before using borg on the copy, you need to
+              use "borg break-lock" on it.
+        """)
+        subparser = subparsers.add_parser('with-lock', parents=[common_parser], add_help=False,
+                                          description=self.do_with_lock.__doc__,
+                                          epilog=with_lock_epilog,
+                                          formatter_class=argparse.RawDescriptionHelpFormatter,
+                                          help='run user command with lock held')
+        subparser.set_defaults(func=self.do_with_lock)
+        subparser.add_argument('location', metavar='REPOSITORY',
+                               type=location_validator(archive=False),
+                               help='repository to lock')
+        subparser.add_argument('command', metavar='COMMAND',
+                               help='command to run')
+        subparser.add_argument('args', metavar='ARGS', nargs=argparse.REMAINDER,
+                               help='command arguments')
+
         subparser = subparsers.add_parser('help', parents=[common_parser], add_help=False,
         subparser = subparsers.add_parser('help', parents=[common_parser], add_help=False,
                                           description='Extra help')
                                           description='Extra help')
         subparser.add_argument('--epilog-only', dest='epilog_only',
         subparser.add_argument('--epilog-only', dest='epilog_only',
@@ -1926,13 +2007,17 @@ class Archiver:
         update_excludes(args)
         update_excludes(args)
         return args
         return args
 
 
+    def prerun_checks(self, logger):
+        check_extension_modules()
+        selftest(logger)
+
     def run(self, args):
     def run(self, args):
         os.umask(args.umask)  # early, before opening files
         os.umask(args.umask)  # early, before opening files
         self.lock_wait = args.lock_wait
         self.lock_wait = args.lock_wait
         setup_logging(level=args.log_level, is_serve=args.func == self.do_serve)  # do not use loggers before this!
         setup_logging(level=args.log_level, is_serve=args.func == self.do_serve)  # do not use loggers before this!
         if args.show_version:
         if args.show_version:
             logger.info('borgbackup version %s' % __version__)
             logger.info('borgbackup version %s' % __version__)
-        check_extension_modules()
+        self.prerun_checks(logger)
         if is_slow_msgpack():
         if is_slow_msgpack():
             logger.warning("Using a pure-python msgpack! This will result in lower performance.")
             logger.warning("Using a pure-python msgpack! This will result in lower performance.")
         return args.func(args)
         return args.func(args)

+ 2 - 8
borg/crypto.pyx

@@ -1,15 +1,9 @@
-"""A thin OpenSSL wrapper
+"""A thin OpenSSL wrapper"""
 
 
-This could be replaced by PyCrypto maybe?
-"""
 from libc.stdlib cimport malloc, free
 from libc.stdlib cimport malloc, free
 from cpython.buffer cimport PyBUF_SIMPLE, PyObject_GetBuffer, PyBuffer_Release
 from cpython.buffer cimport PyBUF_SIMPLE, PyObject_GetBuffer, PyBuffer_Release
 
 
-API_VERSION = 2
-
-
-cdef extern from "openssl/rand.h":
-    int  RAND_bytes(unsigned char *buf, int num)
+API_VERSION = 3
 
 
 
 
 cdef extern from "openssl/evp.h":
 cdef extern from "openssl/evp.h":

+ 102 - 3
borg/helpers.py

@@ -36,6 +36,7 @@ from . import hashindex
 from . import chunker
 from . import chunker
 from .constants import *  # NOQA
 from .constants import *  # NOQA
 from . import crypto
 from . import crypto
+from .compress import COMPR_BUFFER, get_compressor
 from . import shellpattern
 from . import shellpattern
 import msgpack
 import msgpack
 import msgpack.fallback
 import msgpack.fallback
@@ -83,7 +84,7 @@ def check_extension_modules():
         raise ExtensionModuleError
         raise ExtensionModuleError
     if chunker.API_VERSION != 2:
     if chunker.API_VERSION != 2:
         raise ExtensionModuleError
         raise ExtensionModuleError
-    if crypto.API_VERSION != 2:
+    if crypto.API_VERSION != 3:
         raise ExtensionModuleError
         raise ExtensionModuleError
     if platform.API_VERSION != 2:
     if platform.API_VERSION != 2:
         raise ExtensionModuleError
         raise ExtensionModuleError
@@ -285,8 +286,7 @@ def load_excludes(fh):
     """Load and parse exclude patterns from file object. Lines empty or starting with '#' after stripping whitespace on
     """Load and parse exclude patterns from file object. Lines empty or starting with '#' after stripping whitespace on
     both line ends are ignored.
     both line ends are ignored.
     """
     """
-    patterns = (line for line in (i.strip() for i in fh) if not line.startswith('#'))
-    return [parse_pattern(pattern) for pattern in patterns if pattern]
+    return [parse_pattern(pattern) for pattern in clean_lines(fh)]
 
 
 
 
 def update_excludes(args):
 def update_excludes(args):
@@ -539,6 +539,12 @@ def CompressionSpec(s):
         else:
         else:
             raise ValueError
             raise ValueError
         return dict(name=name, level=level)
         return dict(name=name, level=level)
+    if name == 'auto':
+        if 2 <= count <= 3:
+            compression = ','.join(values[1:])
+        else:
+            raise ValueError
+        return dict(name=name, spec=CompressionSpec(compression))
     raise ValueError
     raise ValueError
 
 
 
 
@@ -1484,3 +1490,96 @@ except ImportError:
 
 
 def scandir_inorder(path='.'):
 def scandir_inorder(path='.'):
     return sorted(scandir(path), key=lambda dirent: dirent.inode())
     return sorted(scandir(path), key=lambda dirent: dirent.inode())
+
+
+def clean_lines(lines, lstrip=None, rstrip=None, remove_empty=True, remove_comments=True):
+    """
+    clean lines (usually read from a config file):
+
+    1. strip whitespace (left and right), 2. remove empty lines, 3. remove comments.
+
+    note: only "pure comment lines" are supported, no support for "trailing comments".
+
+    :param lines: input line iterator (e.g. list or open text file) that gives unclean input lines
+    :param lstrip: lstrip call arguments or False, if lstripping is not desired
+    :param rstrip: rstrip call arguments or False, if rstripping is not desired
+    :param remove_comments: remove comment lines (lines starting with "#")
+    :param remove_empty: remove empty lines
+    :return: yields processed lines
+    """
+    for line in lines:
+        if lstrip is not False:
+            line = line.lstrip(lstrip)
+        if rstrip is not False:
+            line = line.rstrip(rstrip)
+        if remove_empty and not line:
+            continue
+        if remove_comments and line.startswith('#'):
+            continue
+        yield line
+
+
+class CompressionDecider1:
+    def __init__(self, compression, compression_files):
+        """
+        Initialize a CompressionDecider instance (and read config files, if needed).
+
+        :param compression: default CompressionSpec (e.g. from --compression option)
+        :param compression_files: list of compression config files (e.g. from --compression-from) or
+                                  a list of other line iterators
+        """
+        self.compression = compression
+        if not compression_files:
+            self.matcher = None
+        else:
+            self.matcher = PatternMatcher(fallback=compression)
+            for file in compression_files:
+                try:
+                    for line in clean_lines(file):
+                        try:
+                            compr_spec, fn_pattern = line.split(':', 1)
+                        except:
+                            continue
+                        self.matcher.add([parse_pattern(fn_pattern)], CompressionSpec(compr_spec))
+                finally:
+                    if hasattr(file, 'close'):
+                        file.close()
+
+    def decide(self, path):
+        if self.matcher is not None:
+            return self.matcher.match(path)
+        return self.compression
+
+
+class CompressionDecider2:
+    def __init__(self, compression):
+        self.compression = compression
+
+    def decide(self, chunk):
+        # nothing fancy here yet: we either use what the metadata says or the default
+        # later, we can decide based on the chunk data also.
+        # if we compress the data here to decide, we can even update the chunk data
+        # and modify the metadata as desired.
+        compr_spec = chunk.meta.get('compress', self.compression)
+        compr_args = dict(buffer=COMPR_BUFFER)
+        compr_args.update(compr_spec)
+        if compr_args['name'] == 'auto':
+            # we did not decide yet, use heuristic:
+            compr_args, chunk = self.heuristic_lz4(compr_args, chunk)
+        return compr_args, chunk
+
+    def heuristic_lz4(self, compr_args, chunk):
+        meta, data = chunk
+        lz4 = get_compressor('lz4', buffer=compr_args['buffer'])
+        cdata = lz4.compress(data)
+        data_len = len(data)
+        cdata_len = len(cdata)
+        if cdata_len < data_len:
+            compr_spec = compr_args['spec']
+        else:
+            # uncompressible - we could have a special "uncompressible compressor"
+            # that marks such data as uncompressible via compression-type metadata.
+            compr_spec = CompressionSpec('none')
+        compr_args.update(compr_spec)
+        logger.debug("len(data) == %d, len(lz4(data)) == %d, choosing %s", data_len, cdata_len, compr_spec)
+        return compr_args, Chunk(data, **meta)

+ 43 - 11
borg/key.py

@@ -7,13 +7,13 @@ import textwrap
 from hmac import compare_digest
 from hmac import compare_digest
 from hashlib import sha256, pbkdf2_hmac
 from hashlib import sha256, pbkdf2_hmac
 
 
-from .helpers import Chunk, IntegrityError, get_keys_dir, Error, yes, bin_to_hex
+from .helpers import Chunk, IntegrityError, get_keys_dir, Error, yes, bin_to_hex, CompressionDecider2, CompressionSpec
 from .logger import create_logger
 from .logger import create_logger
 logger = create_logger()
 logger = create_logger()
 
 
 from .constants import *  # NOQA
 from .constants import *  # NOQA
 from .crypto import AES, bytes_to_long, long_to_bytes, bytes_to_int, num_aes_blocks, hmac_sha256
 from .crypto import AES, bytes_to_long, long_to_bytes, bytes_to_int, num_aes_blocks, hmac_sha256
-from .compress import Compressor, COMPR_BUFFER
+from .compress import Compressor, COMPR_BUFFER, get_compressor
 import msgpack
 import msgpack
 
 
 PREFIX = b'\0' * 8
 PREFIX = b'\0' * 8
@@ -35,6 +35,14 @@ class KeyfileNotFoundError(Error):
     """No key file for repository {} found in {}."""
     """No key file for repository {} found in {}."""
 
 
 
 
+class KeyfileInvalidError(Error):
+    """Invalid key file for repository {} found in {}."""
+
+
+class KeyfileMismatchError(Error):
+    """Mismatch between repository {} and key file {}."""
+
+
 class RepoKeyNotFoundError(Error):
 class RepoKeyNotFoundError(Error):
     """No key entry found in the config of repository {}."""
     """No key entry found in the config of repository {}."""
 
 
@@ -71,12 +79,20 @@ class KeyBase:
         self.TYPE_STR = bytes([self.TYPE])
         self.TYPE_STR = bytes([self.TYPE])
         self.repository = repository
         self.repository = repository
         self.target = None  # key location file path / repo obj
         self.target = None  # key location file path / repo obj
-        self.compressor = Compressor('none', buffer=COMPR_BUFFER)
+        self.compression_decider2 = CompressionDecider2(CompressionSpec('none'))
+        self.compressor = Compressor('none', buffer=COMPR_BUFFER)  # for decompression
 
 
     def id_hash(self, data):
     def id_hash(self, data):
         """Return HMAC hash using the "id" HMAC key
         """Return HMAC hash using the "id" HMAC key
         """
         """
 
 
+    def compress(self, chunk):
+        compr_args, chunk = self.compression_decider2.decide(chunk)
+        compressor = Compressor(**compr_args)
+        meta, data = chunk
+        data = compressor.compress(data)
+        return Chunk(data, **meta)
+
     def encrypt(self, chunk):
     def encrypt(self, chunk):
         pass
         pass
 
 
@@ -102,8 +118,8 @@ class PlaintextKey(KeyBase):
         return sha256(data).digest()
         return sha256(data).digest()
 
 
     def encrypt(self, chunk):
     def encrypt(self, chunk):
-        meta, data = chunk
-        return b''.join([self.TYPE_STR, self.compressor.compress(data)])
+        chunk = self.compress(chunk)
+        return b''.join([self.TYPE_STR, chunk.data])
 
 
     def decrypt(self, id, data):
     def decrypt(self, id, data):
         if data[0] != self.TYPE:
         if data[0] != self.TYPE:
@@ -135,9 +151,9 @@ class AESKeyBase(KeyBase):
         return hmac_sha256(self.id_key, data)
         return hmac_sha256(self.id_key, data)
 
 
     def encrypt(self, chunk):
     def encrypt(self, chunk):
-        data = self.compressor.compress(chunk.data)
+        chunk = self.compress(chunk)
         self.enc_cipher.reset()
         self.enc_cipher.reset()
-        data = b''.join((self.enc_cipher.iv[8:], self.enc_cipher.encrypt(data)))
+        data = b''.join((self.enc_cipher.iv[8:], self.enc_cipher.encrypt(chunk.data)))
         hmac = hmac_sha256(self.enc_hmac_key, data)
         hmac = hmac_sha256(self.enc_hmac_key, data)
         return b''.join((self.TYPE_STR, hmac, data))
         return b''.join((self.TYPE_STR, hmac, data))
 
 
@@ -396,17 +412,33 @@ class KeyfileKey(KeyfileKeyBase):
     TYPE = 0x00
     TYPE = 0x00
     FILE_ID = 'BORG_KEY'
     FILE_ID = 'BORG_KEY'
 
 
+    def sanity_check(self, filename, id):
+        with open(filename, 'r') as fd:
+            line = fd.readline().strip()
+            if not line.startswith(self.FILE_ID):
+                raise KeyfileInvalidError(self.repository._location.canonical_path(), filename)
+            if line[len(self.FILE_ID) + 1:] != id:
+                raise KeyfileMismatchError(self.repository._location.canonical_path(), filename)
+            return filename
+
     def find_key(self):
     def find_key(self):
+        id = self.repository.id_str
+        keyfile = os.environ.get('BORG_KEY_FILE')
+        if keyfile:
+            return self.sanity_check(keyfile, id)
         keys_dir = get_keys_dir()
         keys_dir = get_keys_dir()
         for name in os.listdir(keys_dir):
         for name in os.listdir(keys_dir):
             filename = os.path.join(keys_dir, name)
             filename = os.path.join(keys_dir, name)
-            with open(filename, 'r') as fd:
-                line = fd.readline().strip()
-                if line.startswith(self.FILE_ID) and line[len(self.FILE_ID) + 1:] == self.repository.id_str:
-                    return filename
+            try:
+                return self.sanity_check(filename, id)
+            except (KeyfileInvalidError, KeyfileMismatchError):
+                pass
         raise KeyfileNotFoundError(self.repository._location.canonical_path(), get_keys_dir())
         raise KeyfileNotFoundError(self.repository._location.canonical_path(), get_keys_dir())
 
 
     def get_new_target(self, args):
     def get_new_target(self, args):
+        keyfile = os.environ.get('BORG_KEY_FILE')
+        if keyfile:
+            return keyfile
         filename = args.location.to_key_filename()
         filename = args.location.to_key_filename()
         path = filename
         path = filename
         i = 1
         i = 1

+ 79 - 0
borg/selftest.py

@@ -0,0 +1,79 @@
+"""
+Self testing module
+===================
+
+The selftest() function runs a small test suite of relatively fast tests that are meant to discover issues
+with the way Borg was compiled or packaged and also bugs in Borg itself.
+
+Theses tests are a subset of the borg/testsuite and are run with Pythons built-in unittest, hence none of
+the tests used for this can or should be ported to py.test currently.
+
+To assert that self test discovery works correctly the number of tests is kept in the SELFTEST_COUNT
+variable. SELFTEST_COUNT must be updated if new tests are added or removed to or from any of the tests
+used here.
+"""
+
+
+import sys
+import time
+from unittest import TestResult, TestSuite, defaultTestLoader
+
+from .testsuite.hashindex import HashIndexDataTestCase, HashIndexRefcountingTestCase, HashIndexTestCase
+from .testsuite.crypto import CryptoTestCase
+from .testsuite.chunker import ChunkerTestCase
+
+SELFTEST_CASES = [
+    HashIndexDataTestCase,
+    HashIndexRefcountingTestCase,
+    HashIndexTestCase,
+    CryptoTestCase,
+    ChunkerTestCase,
+]
+
+SELFTEST_COUNT = 27
+
+
+class SelfTestResult(TestResult):
+    def __init__(self):
+        super().__init__()
+        self.successes = []
+
+    def addSuccess(self, test):
+        super().addSuccess(test)
+        self.successes.append(test)
+
+    def test_name(self, test):
+        return test.shortDescription() or str(test)
+
+    def log_results(self, logger):
+        for test, failure in self.errors + self.failures + self.unexpectedSuccesses:
+            logger.error('self test %s FAILED:\n%s', self.test_name(test), failure)
+        for test, reason in self.skipped:
+            logger.warning('self test %s skipped: %s', self.test_name(test), reason)
+
+    def successful_test_count(self):
+        return len(self.successes)
+
+
+def selftest(logger):
+    selftest_started = time.perf_counter()
+    result = SelfTestResult()
+    test_suite = TestSuite()
+    for test_case in SELFTEST_CASES:
+        test_suite.addTest(defaultTestLoader.loadTestsFromTestCase(test_case))
+    test_suite.run(result)
+    result.log_results(logger)
+    successful_tests = result.successful_test_count()
+    count_mismatch = successful_tests != SELFTEST_COUNT
+    if result.wasSuccessful() and count_mismatch:
+        # only print this if all tests succeeded
+        logger.error("self test count (%d != %d) mismatch, either test discovery is broken or a test was added "
+                     "without updating borg.selftest",
+                     successful_tests, SELFTEST_COUNT)
+    if not result.wasSuccessful() or count_mismatch:
+        logger.error("self test failed\n"
+                     "This is a bug either in Borg or in the package / distribution you use.")
+        sys.exit(2)
+        assert False, "sanity assertion failed: ran beyond sys.exit()"
+    selftest_elapsed = time.perf_counter() - selftest_started
+    logger.debug("%d self tests completed in %.2f seconds", successful_tests, selftest_elapsed)

+ 12 - 5
borg/testsuite/__init__.py

@@ -9,7 +9,8 @@ import sysconfig
 import time
 import time
 import unittest
 import unittest
 from ..xattr import get_all
 from ..xattr import get_all
-from ..logger import setup_logging
+
+# Note: this is used by borg.selftest, do not use or import py.test functionality here.
 
 
 try:
 try:
     import llfuse
     import llfuse
@@ -18,6 +19,11 @@ try:
 except ImportError:
 except ImportError:
     have_fuse_mtime_ns = False
     have_fuse_mtime_ns = False
 
 
+try:
+    from pytest import raises
+except ImportError:
+    raises = None
+
 has_lchflags = hasattr(os, 'lchflags')
 has_lchflags = hasattr(os, 'lchflags')
 
 
 
 
@@ -32,9 +38,6 @@ else:
 if sys.platform.startswith('netbsd'):
 if sys.platform.startswith('netbsd'):
     st_mtime_ns_round = -4  # only >1 microsecond resolution here?
     st_mtime_ns_round = -4  # only >1 microsecond resolution here?
 
 
-# Ensure that the loggers exist for all tests
-setup_logging()
-
 
 
 class BaseTestCase(unittest.TestCase):
 class BaseTestCase(unittest.TestCase):
     """
     """
@@ -43,9 +46,13 @@ class BaseTestCase(unittest.TestCase):
     assert_not_in = unittest.TestCase.assertNotIn
     assert_not_in = unittest.TestCase.assertNotIn
     assert_equal = unittest.TestCase.assertEqual
     assert_equal = unittest.TestCase.assertEqual
     assert_not_equal = unittest.TestCase.assertNotEqual
     assert_not_equal = unittest.TestCase.assertNotEqual
-    assert_raises = unittest.TestCase.assertRaises
     assert_true = unittest.TestCase.assertTrue
     assert_true = unittest.TestCase.assertTrue
 
 
+    if raises:
+        assert_raises = staticmethod(raises)
+    else:
+        assert_raises = unittest.TestCase.assertRaises
+
     @contextmanager
     @contextmanager
     def assert_creates_file(self, path):
     def assert_creates_file(self, path):
         self.assert_true(not os.path.exists(path), '{} should not exist'.format(path))
         self.assert_true(not os.path.exists(path), '{} should not exist'.format(path))

+ 89 - 1
borg/testsuite/archiver.py

@@ -61,6 +61,7 @@ def exec_cmd(*args, archiver=None, fork=False, exe=None, **kw):
             sys.stdout = sys.stderr = output = StringIO()
             sys.stdout = sys.stderr = output = StringIO()
             if archiver is None:
             if archiver is None:
                 archiver = Archiver()
                 archiver = Archiver()
+            archiver.prerun_checks = lambda *args: None
             archiver.exit_code = EXIT_SUCCESS
             archiver.exit_code = EXIT_SUCCESS
             args = archiver.parse_args(list(args))
             args = archiver.parse_args(list(args))
             ret = archiver.run(args)
             ret = archiver.run(args)
@@ -987,16 +988,39 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         self.cmd('init', self.repository_location)
         self.cmd('init', self.repository_location)
         self.cmd('create', self.repository_location + '::test1', src_dir)
         self.cmd('create', self.repository_location + '::test1', src_dir)
         self.cmd('create', self.repository_location + '::test2', src_dir)
         self.cmd('create', self.repository_location + '::test2', src_dir)
+        # these are not really a checkpoints, but they look like some:
+        self.cmd('create', self.repository_location + '::test3.checkpoint', src_dir)
+        self.cmd('create', self.repository_location + '::test3.checkpoint.1', src_dir)
+        self.cmd('create', self.repository_location + '::test4.checkpoint', src_dir)
         output = self.cmd('prune', '-v', '--list', '--dry-run', self.repository_location, '--keep-daily=2')
         output = self.cmd('prune', '-v', '--list', '--dry-run', self.repository_location, '--keep-daily=2')
-        self.assert_in('Keeping archive: test2', output)
         self.assert_in('Would prune:     test1', output)
         self.assert_in('Would prune:     test1', output)
+        # must keep the latest non-checkpoint archive:
+        self.assert_in('Keeping archive: test2', output)
+        # must keep the latest checkpoint archive:
+        self.assert_in('Keeping archive: test4.checkpoint', output)
         output = self.cmd('list', self.repository_location)
         output = self.cmd('list', self.repository_location)
         self.assert_in('test1', output)
         self.assert_in('test1', output)
         self.assert_in('test2', output)
         self.assert_in('test2', output)
+        self.assert_in('test3.checkpoint', output)
+        self.assert_in('test3.checkpoint.1', output)
+        self.assert_in('test4.checkpoint', output)
         self.cmd('prune', self.repository_location, '--keep-daily=2')
         self.cmd('prune', self.repository_location, '--keep-daily=2')
         output = self.cmd('list', self.repository_location)
         output = self.cmd('list', self.repository_location)
         self.assert_not_in('test1', output)
         self.assert_not_in('test1', output)
+        # the latest non-checkpoint archive must be still there:
         self.assert_in('test2', output)
         self.assert_in('test2', output)
+        # only the latest checkpoint archive must still be there:
+        self.assert_not_in('test3.checkpoint', output)
+        self.assert_not_in('test3.checkpoint.1', output)
+        self.assert_in('test4.checkpoint', output)
+        # now we supercede the latest checkpoint by a successful backup:
+        self.cmd('create', self.repository_location + '::test5', src_dir)
+        self.cmd('prune', self.repository_location, '--keep-daily=2')
+        output = self.cmd('list', self.repository_location)
+        # all checkpoints should be gone now:
+        self.assert_not_in('checkpoint', output)
+        # the latest archive must be still there
+        self.assert_in('test5', output)
 
 
     def test_prune_repository_save_space(self):
     def test_prune_repository_save_space(self):
         self.cmd('init', self.repository_location)
         self.cmd('init', self.repository_location)
@@ -1088,6 +1112,64 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         size, csize, path = output.split("\n")[1].split(" ")
         size, csize, path = output.split("\n")[1].split(" ")
         assert int(csize) < int(size)
         assert int(csize) < int(size)
 
 
+    def _get_sizes(self, compression, compressible, size=10000):
+        if compressible:
+            contents = b'X' * size
+        else:
+            contents = os.urandom(size)
+        self.create_regular_file('file', contents=contents)
+        self.cmd('init', '--encryption=none', self.repository_location)
+        archive = self.repository_location + '::test'
+        self.cmd('create', '-C', compression, archive, 'input')
+        output = self.cmd('list', '--format', '{size} {csize} {path}{NL}', archive)
+        size, csize, path = output.split("\n")[1].split(" ")
+        return int(size), int(csize)
+
+    def test_compression_none_compressible(self):
+        size, csize = self._get_sizes('none', compressible=True)
+        assert csize >= size
+        assert csize == size + 3
+
+    def test_compression_none_uncompressible(self):
+        size, csize = self._get_sizes('none', compressible=False)
+        assert csize >= size
+        assert csize == size + 3
+
+    def test_compression_zlib_compressible(self):
+        size, csize = self._get_sizes('zlib', compressible=True)
+        assert csize < size * 0.1
+        assert csize == 35
+
+    def test_compression_zlib_uncompressible(self):
+        size, csize = self._get_sizes('zlib', compressible=False)
+        assert csize >= size
+
+    def test_compression_auto_compressible(self):
+        size, csize = self._get_sizes('auto,zlib', compressible=True)
+        assert csize < size * 0.1
+        assert csize == 35  # same as compression 'zlib'
+
+    def test_compression_auto_uncompressible(self):
+        size, csize = self._get_sizes('auto,zlib', compressible=False)
+        assert csize >= size
+        assert csize == size + 3  # same as compression 'none'
+
+    def test_compression_lz4_compressible(self):
+        size, csize = self._get_sizes('lz4', compressible=True)
+        assert csize < size * 0.1
+
+    def test_compression_lz4_uncompressible(self):
+        size, csize = self._get_sizes('lz4', compressible=False)
+        assert csize >= size
+
+    def test_compression_lzma_compressible(self):
+        size, csize = self._get_sizes('lzma', compressible=True)
+        assert csize < size * 0.1
+
+    def test_compression_lzma_uncompressible(self):
+        size, csize = self._get_sizes('lzma', compressible=False)
+        assert csize >= size
+
     def test_break_lock(self):
     def test_break_lock(self):
         self.cmd('init', self.repository_location)
         self.cmd('init', self.repository_location)
         self.cmd('break-lock', self.repository_location)
         self.cmd('break-lock', self.repository_location)
@@ -1398,6 +1480,12 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         info_after = self.cmd('info', self.repository_location + '::test')
         info_after = self.cmd('info', self.repository_location + '::test')
         assert info_before == info_after  # includes archive ID
         assert info_before == info_after  # includes archive ID
 
 
+    def test_with_lock(self):
+        self.cmd('init', self.repository_location)
+        lock_path = os.path.join(self.repository_path, 'lock.exclusive')
+        cmd = 'python3', '-c', 'import os, sys; sys.exit(42 if os.path.exists("%s") else 23)' % lock_path
+        self.cmd('with-lock', self.repository_location, *cmd, fork=True, exit_code=42)
+
 
 
 @unittest.skipUnless('binary' in BORG_EXES, 'no borg.exe available')
 @unittest.skipUnless('binary' in BORG_EXES, 'no borg.exe available')
 class ArchiverTestCaseBinary(ArchiverTestCase):
 class ArchiverTestCaseBinary(ArchiverTestCase):

+ 3 - 0
borg/testsuite/chunker.py

@@ -4,6 +4,9 @@ from ..chunker import Chunker, buzhash, buzhash_update
 from ..constants import *  # NOQA
 from ..constants import *  # NOQA
 from . import BaseTestCase
 from . import BaseTestCase
 
 
+# Note: these tests are part of the self test, do not use or import py.test functionality here.
+#       See borg.selftest for details. If you add/remove test methods, update SELFTEST_COUNT
+
 
 
 class ChunkerTestCase(BaseTestCase):
 class ChunkerTestCase(BaseTestCase):
 
 

+ 4 - 0
borg/testsuite/conftest.py

@@ -0,0 +1,4 @@
+from ..logger import setup_logging
+
+# Ensure that the loggers exist for all tests
+setup_logging()

+ 3 - 0
borg/testsuite/crypto.py

@@ -3,6 +3,9 @@ from binascii import hexlify, unhexlify
 from ..crypto import AES, bytes_to_long, bytes_to_int, long_to_bytes, hmac_sha256
 from ..crypto import AES, bytes_to_long, bytes_to_int, long_to_bytes, hmac_sha256
 from . import BaseTestCase
 from . import BaseTestCase
 
 
+# Note: these tests are part of the self test, do not use or import py.test functionality here.
+#       See borg.selftest for details. If you add/remove test methods, update SELFTEST_COUNT
+
 
 
 class CryptoTestCase(BaseTestCase):
 class CryptoTestCase(BaseTestCase):
 
 

+ 17 - 15
borg/testsuite/hashindex.py

@@ -1,15 +1,16 @@
 import base64
 import base64
 import hashlib
 import hashlib
 import os
 import os
-import struct
 import tempfile
 import tempfile
 import zlib
 import zlib
 
 
-import pytest
 from ..hashindex import NSIndex, ChunkIndex
 from ..hashindex import NSIndex, ChunkIndex
 from .. import hashindex
 from .. import hashindex
 from . import BaseTestCase
 from . import BaseTestCase
 
 
+# Note: these tests are part of the self test, do not use or import py.test functionality here.
+#       See borg.selftest for details. If you add/remove test methods, update SELFTEST_COUNT
+
 
 
 def H(x):
 def H(x):
     # make some 32byte long thing that depends on x
     # make some 32byte long thing that depends on x
@@ -194,7 +195,7 @@ class HashIndexRefcountingTestCase(BaseTestCase):
     def test_decref_zero(self):
     def test_decref_zero(self):
         idx1 = ChunkIndex()
         idx1 = ChunkIndex()
         idx1[H(1)] = 0, 0, 0
         idx1[H(1)] = 0, 0, 0
-        with pytest.raises(AssertionError):
+        with self.assert_raises(AssertionError):
             idx1.decref(H(1))
             idx1.decref(H(1))
 
 
     def test_incref_decref(self):
     def test_incref_decref(self):
@@ -208,18 +209,18 @@ class HashIndexRefcountingTestCase(BaseTestCase):
 
 
     def test_setitem_raises(self):
     def test_setitem_raises(self):
         idx1 = ChunkIndex()
         idx1 = ChunkIndex()
-        with pytest.raises(AssertionError):
+        with self.assert_raises(AssertionError):
             idx1[H(1)] = hashindex.MAX_VALUE + 1, 0, 0
             idx1[H(1)] = hashindex.MAX_VALUE + 1, 0, 0
 
 
     def test_keyerror(self):
     def test_keyerror(self):
         idx = ChunkIndex()
         idx = ChunkIndex()
-        with pytest.raises(KeyError):
+        with self.assert_raises(KeyError):
             idx.incref(H(1))
             idx.incref(H(1))
-        with pytest.raises(KeyError):
+        with self.assert_raises(KeyError):
             idx.decref(H(1))
             idx.decref(H(1))
-        with pytest.raises(KeyError):
+        with self.assert_raises(KeyError):
             idx[H(1)]
             idx[H(1)]
-        with pytest.raises(OverflowError):
+        with self.assert_raises(OverflowError):
             idx.add(H(1), -1, 0, 0)
             idx.add(H(1), -1, 0, 0)
 
 
 
 
@@ -269,10 +270,11 @@ class HashIndexDataTestCase(BaseTestCase):
         assert idx1[H(3)] == (hashindex.MAX_VALUE, 6, 7)
         assert idx1[H(3)] == (hashindex.MAX_VALUE, 6, 7)
 
 
 
 
-def test_nsindex_segment_limit():
-    idx = NSIndex()
-    with pytest.raises(AssertionError):
-        idx[H(1)] = hashindex.MAX_VALUE + 1, 0
-    assert H(1) not in idx
-    idx[H(2)] = hashindex.MAX_VALUE, 0
-    assert H(2) in idx
+class NSIndexTestCase(BaseTestCase):
+    def test_nsindex_segment_limit(self):
+        idx = NSIndex()
+        with self.assert_raises(AssertionError):
+            idx[H(1)] = hashindex.MAX_VALUE + 1, 0
+        assert H(1) not in idx
+        idx[H(2)] = hashindex.MAX_VALUE, 0
+        assert H(2) in idx

+ 50 - 2
borg/testsuite/helpers.py

@@ -10,11 +10,12 @@ import msgpack
 import msgpack.fallback
 import msgpack.fallback
 import time
 import time
 
 
-from ..helpers import Location, format_file_size, format_timedelta, make_path_safe, \
+from ..helpers import Location, format_file_size, format_timedelta, make_path_safe, clean_lines, \
     prune_within, prune_split, get_cache_dir, get_keys_dir, Statistics, is_slow_msgpack, \
     prune_within, prune_split, get_cache_dir, get_keys_dir, Statistics, is_slow_msgpack, \
     yes, TRUISH, FALSISH, DEFAULTISH, \
     yes, TRUISH, FALSISH, DEFAULTISH, \
-    StableDict, int_to_bigint, bigint_to_int, bin_to_hex, parse_timestamp, CompressionSpec, ChunkerParams, Chunk, \
+    StableDict, int_to_bigint, bigint_to_int, bin_to_hex, parse_timestamp, ChunkerParams, Chunk, \
     ProgressIndicatorPercent, ProgressIndicatorEndless, load_excludes, parse_pattern, \
     ProgressIndicatorPercent, ProgressIndicatorEndless, load_excludes, parse_pattern, \
+    CompressionSpec, CompressionDecider1, CompressionDecider2, \
     PatternMatcher, RegexPattern, PathPrefixPattern, FnmatchPattern, ShellPattern, partial_format, ChunkIteratorFileWrapper
     PatternMatcher, RegexPattern, PathPrefixPattern, FnmatchPattern, ShellPattern, partial_format, ChunkIteratorFileWrapper
 from . import BaseTestCase, environment_variable, FakeInputs
 from . import BaseTestCase, environment_variable, FakeInputs
 
 
@@ -915,3 +916,50 @@ def test_chunk_file_wrapper():
     cfw = ChunkIteratorFileWrapper(iter([]))
     cfw = ChunkIteratorFileWrapper(iter([]))
     assert cfw.read(2) == b''
     assert cfw.read(2) == b''
     assert cfw.exhausted
     assert cfw.exhausted
+
+
+def test_clean_lines():
+    conf = """\
+#comment
+data1 #data1
+data2
+
+ data3
+""".splitlines(keepends=True)
+    assert list(clean_lines(conf)) == ['data1 #data1', 'data2', 'data3', ]
+    assert list(clean_lines(conf, lstrip=False)) == ['data1 #data1', 'data2', ' data3', ]
+    assert list(clean_lines(conf, rstrip=False)) == ['data1 #data1\n', 'data2\n', 'data3\n', ]
+    assert list(clean_lines(conf, remove_empty=False)) == ['data1 #data1', 'data2', '', 'data3', ]
+    assert list(clean_lines(conf, remove_comments=False)) == ['#comment', 'data1 #data1', 'data2', 'data3', ]
+
+
+def test_compression_decider1():
+    default = CompressionSpec('zlib')
+    conf = """
+# use super-fast lz4 compression on huge VM files in this path:
+lz4:/srv/vm_disks
+
+# jpeg or zip files do not compress:
+none:*.jpeg
+none:*.zip
+""".splitlines()
+
+    cd = CompressionDecider1(default, [])  # no conf, always use default
+    assert cd.decide('/srv/vm_disks/linux')['name'] == 'zlib'
+    assert cd.decide('test.zip')['name'] == 'zlib'
+    assert cd.decide('test')['name'] == 'zlib'
+
+    cd = CompressionDecider1(default, [conf, ])
+    assert cd.decide('/srv/vm_disks/linux')['name'] == 'lz4'
+    assert cd.decide('test.zip')['name'] == 'none'
+    assert cd.decide('test')['name'] == 'zlib'  # no match in conf, use default
+
+
+def test_compression_decider2():
+    default = CompressionSpec('zlib')
+
+    cd = CompressionDecider2(default)
+    compr_spec, chunk = cd.decide(Chunk(None))
+    assert compr_spec['name'] == 'zlib'
+    compr_spec, chunk = cd.decide(Chunk(None, compress=CompressionSpec('lzma')))
+    assert compr_spec['name'] == 'lzma'

+ 25 - 1
borg/testsuite/key.py

@@ -7,7 +7,7 @@ from binascii import hexlify, unhexlify
 from ..crypto import bytes_to_long, num_aes_blocks
 from ..crypto import bytes_to_long, num_aes_blocks
 from ..key import PlaintextKey, PassphraseKey, KeyfileKey
 from ..key import PlaintextKey, PassphraseKey, KeyfileKey
 from ..helpers import Location, Chunk, bin_to_hex
 from ..helpers import Location, Chunk, bin_to_hex
-from . import BaseTestCase
+from . import BaseTestCase, environment_variable
 
 
 
 
 class KeyTestCase(BaseTestCase):
 class KeyTestCase(BaseTestCase):
@@ -34,9 +34,11 @@ class KeyTestCase(BaseTestCase):
     def setUp(self):
     def setUp(self):
         self.tmppath = tempfile.mkdtemp()
         self.tmppath = tempfile.mkdtemp()
         os.environ['BORG_KEYS_DIR'] = self.tmppath
         os.environ['BORG_KEYS_DIR'] = self.tmppath
+        self.tmppath2 = tempfile.mkdtemp()
 
 
     def tearDown(self):
     def tearDown(self):
         shutil.rmtree(self.tmppath)
         shutil.rmtree(self.tmppath)
+        shutil.rmtree(self.tmppath2)
 
 
     class MockRepository:
     class MockRepository:
         class _Location:
         class _Location:
@@ -71,6 +73,20 @@ class KeyTestCase(BaseTestCase):
         chunk = Chunk(b'foo')
         chunk = Chunk(b'foo')
         self.assert_equal(chunk, key2.decrypt(key.id_hash(chunk.data), key.encrypt(chunk)))
         self.assert_equal(chunk, key2.decrypt(key.id_hash(chunk.data), key.encrypt(chunk)))
 
 
+    def test_keyfile_kfenv(self):
+        keyfile = os.path.join(self.tmppath2, 'keyfile')
+        with environment_variable(BORG_KEY_FILE=keyfile, BORG_PASSPHRASE='testkf'):
+            assert not os.path.exists(keyfile)
+            key = KeyfileKey.create(self.MockRepository(), self.MockArgs())
+            assert os.path.exists(keyfile)
+            chunk = Chunk(b'XXX')
+            chunk_id = key.id_hash(chunk.data)
+            chunk_cdata = key.encrypt(chunk)
+            key = KeyfileKey.detect(self.MockRepository(), chunk_cdata)
+            self.assert_equal(chunk, key.decrypt(chunk_id, chunk_cdata))
+            os.unlink(keyfile)
+            self.assert_raises(FileNotFoundError, KeyfileKey.detect, self.MockRepository(), chunk_cdata)
+
     def test_keyfile2(self):
     def test_keyfile2(self):
         with open(os.path.join(os.environ['BORG_KEYS_DIR'], 'keyfile'), 'w') as fd:
         with open(os.path.join(os.environ['BORG_KEYS_DIR'], 'keyfile'), 'w') as fd:
             fd.write(self.keyfile2_key_file)
             fd.write(self.keyfile2_key_file)
@@ -78,6 +94,14 @@ class KeyTestCase(BaseTestCase):
         key = KeyfileKey.detect(self.MockRepository(), self.keyfile2_cdata)
         key = KeyfileKey.detect(self.MockRepository(), self.keyfile2_cdata)
         self.assert_equal(key.decrypt(self.keyfile2_id, self.keyfile2_cdata).data, b'payload')
         self.assert_equal(key.decrypt(self.keyfile2_id, self.keyfile2_cdata).data, b'payload')
 
 
+    def test_keyfile2_kfenv(self):
+        keyfile = os.path.join(self.tmppath2, 'keyfile')
+        with open(keyfile, 'w') as fd:
+            fd.write(self.keyfile2_key_file)
+        with environment_variable(BORG_KEY_FILE=keyfile, BORG_PASSPHRASE='passphrase'):
+            key = KeyfileKey.detect(self.MockRepository(), self.keyfile2_cdata)
+            self.assert_equal(key.decrypt(self.keyfile2_id, self.keyfile2_cdata).data, b'payload')
+
     def test_passphrase(self):
     def test_passphrase(self):
         os.environ['BORG_PASSPHRASE'] = 'test'
         os.environ['BORG_PASSPHRASE'] = 'test'
         key = PassphraseKey.create(self.MockRepository(), None)
         key = PassphraseKey.create(self.MockRepository(), None)

+ 18 - 0
docs/changes.rst

@@ -70,6 +70,24 @@ Other changes:
   - ChunkBuffer: add test for leaving partial chunk in buffer, fixes #945
   - ChunkBuffer: add test for leaving partial chunk in buffer, fixes #945
 
 
 
 
+Version 1.0.3 (not released yet)
+--------------------------------
+
+Bug fixes:
+
+- prune: ignore checkpoints, #997
+- prune: fix bad validator, #942
+- fix capabilities extraction on Linux (set xattrs last, after chown())
+
+Other changes:
+
+- update readthedocs URLs, #991
+- add missing docs for "borg break-lock", #992
+- borg create help: add some words to about the archive name
+- borg create help: document format tags, #894
+- Vagrantfile: OS X: update osxfuse / install lzma package, #933
+
+
 Version 1.0.2
 Version 1.0.2
 -------------
 -------------
 
 

+ 3 - 1
docs/development.rst

@@ -139,7 +139,9 @@ Usage::
    # To create and provision the VM:
    # To create and provision the VM:
    vagrant up OS
    vagrant up OS
    # To create an ssh session to the VM:
    # To create an ssh session to the VM:
-   vagrant ssh OS command
+   vagrant ssh OS
+   # To execute a command via ssh in the VM:
+   vagrant ssh OS -c "command args"
    # To shut down the VM:
    # To shut down the VM:
    vagrant halt OS
    vagrant halt OS
    # To shut down and destroy the VM:
    # To shut down and destroy the VM:

+ 62 - 6
docs/faq.rst

@@ -133,6 +133,50 @@ into the repository.
 Yes, as an attacker with access to the remote server could delete (or
 Yes, as an attacker with access to the remote server could delete (or
 otherwise make unavailable) all your backups.
 otherwise make unavailable) all your backups.
 
 
+How can I protect against a hacked backup client?
+-------------------------------------------------
+
+Assume you backup your backup client machine C to the backup server S and
+C gets hacked. In a simple push setup, the attacker could then use borg on
+C to delete all backups residing on S.
+
+These are your options to protect against that:
+
+- Do not allow to permanently delete data from the repo, see :ref:`append-only-mode`.
+- Use a pull-mode setup using ``ssh -R``, see :issue:`900`.
+- Mount C's filesystem on another machine and then create a backup of it.
+- Do not give C filesystem-level access to S.
+
+How can I protect against a hacked backup server?
+-------------------------------------------------
+
+Just in case you got the impression that pull-mode backups are way more safe
+than push-mode, you also need to consider the case that your backup server S
+gets hacked. In case S has access to a lot of clients C, that might bring you
+into even bigger trouble than a hacked backup client in the previous FAQ entry.
+
+These are your options to protect against that:
+
+- Use the standard push-mode setup (see also previous FAQ entry).
+- Mount (the repo part of) S's filesystem on C.
+- Do not give S file-system level access to C.
+- Have your backup server at a well protected place (maybe not reachable from
+  the internet), configure it safely, apply security updates, monitor it, ...
+
+How can I protect against theft, sabotage, lightning, fire, ...?
+----------------------------------------------------------------
+
+In general: if your only backup medium is nearby the backupped machine and
+always connected, you can easily get into trouble: they likely share the same
+fate if something goes really wrong.
+
+Thus:
+
+- have multiple backup media
+- have media disconnected from network, power, computer
+- have media at another place
+- have a relatively recent backup on your media
+
 Why do I get "connection closed by remote" after a while?
 Why do I get "connection closed by remote" after a while?
 ---------------------------------------------------------
 ---------------------------------------------------------
 
 
@@ -140,8 +184,7 @@ When doing a backup to a remote server (using a ssh: repo URL), it sometimes
 stops after a while (some minutes, hours, ... - not immediately) with
 stops after a while (some minutes, hours, ... - not immediately) with
 "connection closed by remote" error message. Why?
 "connection closed by remote" error message. Why?
 
 
-That's a good question and we are trying to find a good answer in
-`ticket 636 <https://github.com/borgbackup/borg/issues/636>`_.
+That's a good question and we are trying to find a good answer in :issue:`636`.
 
 
 The borg cache eats way too much disk space, what can I do?
 The borg cache eats way too much disk space, what can I do?
 -----------------------------------------------------------
 -----------------------------------------------------------
@@ -180,12 +223,25 @@ Yes, |project_name| supports resuming backups.
 
 
 During a backup a special checkpoint archive named ``<archive-name>.checkpoint``
 During a backup a special checkpoint archive named ``<archive-name>.checkpoint``
 is saved every checkpoint interval (the default value for this is 5
 is saved every checkpoint interval (the default value for this is 5
-minutes) containing all the data backed-up until that point. This means
+minutes) containing all the data backed-up until that point. This checkpoint
+archive is a valid archive, but it is only a partial backup. Having it
+in the repo until a successful, full backup is completed is useful because it
+references all the transmitted chunks up to the checkpoint time. This means
 that at most <checkpoint interval> worth of data needs to be retransmitted
 that at most <checkpoint interval> worth of data needs to be retransmitted
-if a backup needs to be restarted.
+if you restart the backup.
+
+If a backup was interrupted, you do not need to do any special considerations,
+just invoke ``borg create`` as you always do. You may use the same archive name
+as in previous attempt or a different one (e.g. if you always include the current
+datetime), it does not matter.
+|project_name| always does full single-pass backups, so it will start again
+from the beginning - but it will be much faster, because some of the data was
+already stored into the repo (and is still referenced by the checkpoint
+archive), so it does not need to get transmitted and stored again.
 
 
 Once your backup has finished successfully, you can delete all
 Once your backup has finished successfully, you can delete all
-``<archive-name>.checkpoint`` archives.
+``<archive-name>.checkpoint`` archives. If you run ``borg prune``, it will
+also care for deleting unneeded checkpoints.
 
 
 If it crashes with a UnicodeError, what can I do?
 If it crashes with a UnicodeError, what can I do?
 -------------------------------------------------
 -------------------------------------------------
@@ -217,7 +273,7 @@ control which we do not have (and also can't get, even if we wanted).
 So, if you need that, consider RAID or a filesystem that offers redundant
 So, if you need that, consider RAID or a filesystem that offers redundant
 storage or just make backups to different locations / different hardware.
 storage or just make backups to different locations / different hardware.
 
 
-See also `ticket 225 <https://github.com/borgbackup/borg/issues/225>`_.
+See also :issue:`225`.
 
 
 Can |project_name| verify data integrity of a backup archive?
 Can |project_name| verify data integrity of a backup archive?
 -------------------------------------------------------------
 -------------------------------------------------------------

+ 1 - 1
docs/installation.rst

@@ -49,7 +49,7 @@ Ubuntu       `16.04`_, backports (PPA): `15.10`_, `14.04`_ ``apt install borgbac
 .. _[community]: https://www.archlinux.org/packages/?name=borg
 .. _[community]: https://www.archlinux.org/packages/?name=borg
 .. _jessie-backports: https://packages.debian.org/jessie-backports/borgbackup
 .. _jessie-backports: https://packages.debian.org/jessie-backports/borgbackup
 .. _stretch: https://packages.debian.org/stretch/borgbackup
 .. _stretch: https://packages.debian.org/stretch/borgbackup
-.. _unstable/sid: https://packages.debian.org/sid/borgbackup
+.. _sid: https://packages.debian.org/sid/borgbackup
 .. _ebuild: https://packages.gentoo.org/packages/app-backup/borgbackup
 .. _ebuild: https://packages.gentoo.org/packages/app-backup/borgbackup
 .. _Ports-Tree: http://www.freshports.org/archivers/py-borgbackup/
 .. _Ports-Tree: http://www.freshports.org/archivers/py-borgbackup/
 .. _pkgsrc: http://pkgsrc.se/sysutils/py-borgbackup
 .. _pkgsrc: http://pkgsrc.se/sysutils/py-borgbackup

+ 56 - 0
docs/misc/compression.conf

@@ -0,0 +1,56 @@
+# example config file for --compression-from option
+#
+# Format of non-comment / non-empty lines:
+# <compression-spec>:<path/filename pattern>
+# compression-spec is same format as for --compression option
+# path/filename pattern is same format as for --exclude option
+
+# archives / files:
+none:*.gz
+none:*.tgz
+none:*.bz2
+none:*.tbz2
+none:*.xz
+none:*.txz
+none:*.lzma
+none:*.lzo
+none:*.zip
+none:*.rar
+none:*.7z
+
+# audio:
+none:*.mp3
+none:*.ogg
+none:*.oga
+none:*.flac
+none:*.aac
+none:*.m4a
+
+# video:
+none:*.mp4
+none:*.mkv
+none:*.m4v
+none:*.avi
+none:*.mpg
+none:*.mpeg
+none:*.webm
+none:*.vob
+none:*.ts
+none:*.ogv
+none:*.mov
+none:*.flv
+none:*.ogm
+
+# pictures/images
+none:*.jpg
+none:*.jpeg
+none:*.png
+none:*.gif
+
+# disk images
+none:*.dmg
+
+# software archives
+none:*.rpm
+none:*.deb
+none:*.msi

+ 16 - 12
docs/quickstart.rst

@@ -105,23 +105,27 @@ server. The script also uses the :ref:`borg_prune` subcommand to maintain a
 certain number of old archives::
 certain number of old archives::
 
 
     #!/bin/sh
     #!/bin/sh
-    REPOSITORY=username@remoteserver.com:backup
-
-    # Backup all of /home and /var/www except a few
-    # excluded directories
-    borg create -v --stats                          \
-        $REPOSITORY::`hostname`-`date +%Y-%m-%d`    \
-        /home                                       \
-        /var/www                                    \
-        --exclude '/home/*/.cache'                  \
-        --exclude /home/Ben/Music/Justin\ Bieber    \
+
+    # setting this, so the repo does not need to be given on the commandline:
+    export BORG_REPO=username@remoteserver.com:backup
+
+    # setting this, so you won't be asked for your passphrase - make sure the
+    # script has appropriate owner/group and mode, e.g. root.root 600:
+    export BORG_PASSPHRASE=mysecret
+
+    # Backup most important stuff:
+    borg create -v --stats -C lz4 ::`hostname`-`date +%Y-%m-%d` \
+        /etc                                                    \
+        /home                                                   \
+        /var                                                    \
+        --exclude '/home/*/.cache'                              \
         --exclude '*.pyc'
         --exclude '*.pyc'
 
 
     # Use the `prune` subcommand to maintain 7 daily, 4 weekly and 6 monthly
     # Use the `prune` subcommand to maintain 7 daily, 4 weekly and 6 monthly
-    # archives of THIS machine. --prefix `hostname`- is very important to
+    # archives of THIS machine. Using --prefix is very important to
     # limit prune's operation to this machine's archives and not apply to
     # limit prune's operation to this machine's archives and not apply to
     # other machine's archives also.
     # other machine's archives also.
-    borg prune -v $REPOSITORY --prefix `hostname`- \
+    borg prune -v --prefix `hostname`- \
         --keep-daily=7 --keep-weekly=4 --keep-monthly=6
         --keep-daily=7 --keep-weekly=4 --keep-monthly=6
 
 
 .. backup_compression:
 .. backup_compression:

+ 1 - 1
docs/resources.rst

@@ -36,6 +36,6 @@ Some of them refer to attic, but you can do the same stuff (and more) with borgb
 Software
 Software
 --------
 --------
 
 
-- `BorgWeb - a very simple web UI for BorgBackup <https://borgbackup.github.io/borgweb/>`_
+- `BorgWeb - a very simple web UI for BorgBackup <https://borgweb.readthedocs.io/>`_
 - some other stuff found at the `BorgBackup Github organisation <https://github.com/borgbackup/>`_
 - some other stuff found at the `BorgBackup Github organisation <https://github.com/borgbackup/>`_
 - `atticmatic <https://github.com/witten/atticmatic/>`_ (includes borgmatic)
 - `atticmatic <https://github.com/witten/atticmatic/>`_ (includes borgmatic)

+ 22 - 9
docs/usage.rst

@@ -101,9 +101,11 @@ Some automatic "answerers" (if set, they automatically answer confirmation quest
     answer or ask you interactively, depending on whether retries are allowed (they by default are
     answer or ask you interactively, depending on whether retries are allowed (they by default are
     allowed). So please test your scripts interactively before making them a non-interactive script.
     allowed). So please test your scripts interactively before making them a non-interactive script.
 
 
-Directories:
+Directories and files:
     BORG_KEYS_DIR
     BORG_KEYS_DIR
         Default to '~/.config/borg/keys'. This directory contains keys for encrypted repositories.
         Default to '~/.config/borg/keys'. This directory contains keys for encrypted repositories.
+    BORG_KEY_FILE
+        When set, use the given filename as repository key file.
     BORG_CACHE_DIR
     BORG_CACHE_DIR
         Default to '~/.cache/borg'. This directory contains the local cache and might need a lot
         Default to '~/.cache/borg'. This directory contains the local cache and might need a lot
         of space for dealing with big repositories).
         of space for dealing with big repositories).
@@ -309,10 +311,9 @@ Examples
     # Even slower, even higher compression (N = 0..9)
     # Even slower, even higher compression (N = 0..9)
     $ borg create --compression lzma,N /path/to/repo::arch ~
     $ borg create --compression lzma,N /path/to/repo::arch ~
 
 
-    # Format tags available for archive name:
-    # {now}, {utcnow}, {fqdn}, {hostname}, {user}, {pid}
-    # add short hostname, backup username and current unixtime (seconds from epoch)
-    $ borg create  /path/to/repo::{hostname}-{user}-{now:%s} ~
+    # Use short hostname, user name and current time in archive name
+    $ borg create /path/to/repo::{hostname}-{user}-{now} ~
+    $ borg create /path/to/repo::{hostname}-{user}-{now:%Y-%m-%d_%H:%M:%S} ~
 
 
 .. include:: usage/extract.rst.inc
 .. include:: usage/extract.rst.inc
 
 
@@ -326,6 +327,9 @@ Examples
     # Extract entire archive and list files while processing
     # Extract entire archive and list files while processing
     $ borg extract -v --list /path/to/repo::my-files
     $ borg extract -v --list /path/to/repo::my-files
 
 
+    # Verify whether an archive could be successfully extracted, but do not write files to disk
+    $ borg extract --dry-run /path/to/repo::my-files
+
     # Extract the "src" directory
     # Extract the "src" directory
     $ borg extract /path/to/repo::my-files home/USERNAME/src
     $ borg extract /path/to/repo::my-files home/USERNAME/src
 
 
@@ -645,6 +649,12 @@ Examples
     ...
     ...
 
 
 
 
+.. include:: usage/with-lock.rst.inc
+
+
+.. include:: usage/break-lock.rst.inc
+
+
 Miscellaneous Help
 Miscellaneous Help
 ------------------
 ------------------
 
 
@@ -814,13 +824,16 @@ Now, let's see how to restore some LVs from such a backup. ::
     $ borg extract --stdout /path/to/repo::arch dev/vg0/home-snapshot > /dev/vg0/home
     $ borg extract --stdout /path/to/repo::arch dev/vg0/home-snapshot > /dev/vg0/home
 
 
 
 
+.. _append-only-mode:
+
 Append-only mode
 Append-only mode
 ~~~~~~~~~~~~~~~~
 ~~~~~~~~~~~~~~~~
 
 
 A repository can be made "append-only", which means that Borg will never overwrite or
 A repository can be made "append-only", which means that Borg will never overwrite or
-delete committed data. This is useful for scenarios where multiple machines back up to
-a central backup server using ``borg serve``, since a hacked machine cannot delete
-backups permanently.
+delete committed data (append-only refers to the segment files, but borg will also
+reject to delete the repository completely). This is useful for scenarios where a
+backup client machine backups remotely to a backup server using ``borg serve``, since
+a hacked client machine cannot delete backups on the server permanently.
 
 
 To activate append-only mode, edit the repository ``config`` file and add a line
 To activate append-only mode, edit the repository ``config`` file and add a line
 ``append_only=1`` to the ``[repository]`` section (or edit the line if it exists).
 ``append_only=1`` to the ``[repository]`` section (or edit the line if it exists).
@@ -881,6 +894,6 @@ repository. Make sure that backup client machines only get to access the reposit
 Ensure that no remote access is possible if the repository is temporarily set to normal mode
 Ensure that no remote access is possible if the repository is temporarily set to normal mode
 for e.g. regular pruning.
 for e.g. regular pruning.
 
 
-Further protections can be implemented, but are outside of Borgs scope. For example,
+Further protections can be implemented, but are outside of Borg's scope. For example,
 file system snapshots or wrapping ``borg serve`` to set special permissions or ACLs on
 file system snapshots or wrapping ``borg serve`` to set special permissions or ACLs on
 new data files.
 new data files.

+ 32 - 0
docs/usage/with-lock.rst.inc

@@ -0,0 +1,32 @@
+.. _borg_with-lock:
+
+borg with-lock
+--------------
+::
+
+    borg with-lock <options> REPOSITORY COMMAND ARGS
+
+positional arguments
+    REPOSITORY
+        repository to lock
+    COMMAND
+        command to run
+    ARGS
+        command arguments
+
+`Common options`_
+    |
+
+Description
+~~~~~~~~~~~
+
+This command runs a user-specified command while the repository lock is held.
+
+It will first try to acquire the lock (make sure that no other operation is
+running in the repo), then execute the given command as a subprocess and wait
+for its termination, release the lock and return the user command's return
+code as borg's return code.
+
+Note: if you copy a repository with the lock held, the lock will be present in
+      the copy, obviously. Thus, before using borg on the copy, you need to
+      use "borg break-lock" on it.

+ 7 - 4
setup.py

@@ -117,12 +117,13 @@ if sys.platform == 'win32':
     windowsIncludeDirs.append(os.path.abspath(os.path.join(gccpath, "..")))
     windowsIncludeDirs.append(os.path.abspath(os.path.join(gccpath, "..")))
     windowsIncludeDirs.append(os.path.abspath(os.path.join(gccpath, "..", "..")))
     windowsIncludeDirs.append(os.path.abspath(os.path.join(gccpath, "..", "..")))
 
 
-
 possible_openssl_prefixes = None
 possible_openssl_prefixes = None
 if sys.platform == 'win32':
 if sys.platform == 'win32':
     possible_openssl_prefixes = windowsIncludeDirs
     possible_openssl_prefixes = windowsIncludeDirs
 else:
 else:
-    possible_openssl_prefixes = ['/usr', '/usr/local', '/usr/local/opt/openssl', '/usr/local/ssl', '/usr/local/openssl', '/usr/local/borg', '/opt/local']
+    possible_openssl_prefixes = ['/usr', '/usr/local', '/usr/local/opt/openssl', '/usr/local/ssl', '/usr/local/openssl',
+                                 '/usr/local/borg', '/opt/local', '/opt/pkg', ]
+
 if os.environ.get('BORG_OPENSSL_PREFIX'):
 if os.environ.get('BORG_OPENSSL_PREFIX'):
     possible_openssl_prefixes.insert(0, os.environ.get('BORG_OPENSSL_PREFIX'))
     possible_openssl_prefixes.insert(0, os.environ.get('BORG_OPENSSL_PREFIX'))
 ssl_prefix = detect_openssl(possible_openssl_prefixes)
 ssl_prefix = detect_openssl(possible_openssl_prefixes)
@@ -135,7 +136,9 @@ possible_lz4_prefixes = None
 if sys.platform == 'win32':
 if sys.platform == 'win32':
     possible_lz4_prefixes = windowsIncludeDirs
     possible_lz4_prefixes = windowsIncludeDirs
 else:
 else:
-    possible_lz4_prefixes = ['/usr', '/usr/local', '/usr/local/opt/lz4', '/usr/local/lz4', '/usr/local/borg', '/opt/local']
+    possible_lz4_prefixes = ['/usr', '/usr/local', '/usr/local/opt/lz4', '/usr/local/lz4',
+                             '/usr/local/borg', '/opt/local', '/opt/pkg', ]
+
 if os.environ.get('BORG_LZ4_PREFIX'):
 if os.environ.get('BORG_LZ4_PREFIX'):
     possible_lz4_prefixes.insert(0, os.environ.get('BORG_LZ4_PREFIX'))
     possible_lz4_prefixes.insert(0, os.environ.get('BORG_LZ4_PREFIX'))
 lz4_prefix = detect_lz4(possible_lz4_prefixes)
 lz4_prefix = detect_lz4(possible_lz4_prefixes)
@@ -327,7 +330,7 @@ setup(
     },
     },
     author='The Borg Collective (see AUTHORS file)',
     author='The Borg Collective (see AUTHORS file)',
     author_email='borgbackup@python.org',
     author_email='borgbackup@python.org',
-    url='https://borgbackup.readthedocs.org/',
+    url='https://borgbackup.readthedocs.io/',
     description='Deduplicated, encrypted, authenticated and compressed backups',
     description='Deduplicated, encrypted, authenticated and compressed backups',
     long_description=long_description,
     long_description=long_description,
     license='BSD',
     license='BSD',