소스 검색

Merge branch 'master' into multithreading

Thomas Waldmann 10 년 전
부모
커밋
91d2cfa671
56개의 변경된 파일2372개의 추가작업 그리고 759개의 파일을 삭제
  1. 17 0
      .coveragerc
  2. 6 0
      .gitignore
  3. 45 10
      .travis.yml
  4. 47 0
      .travis/install.sh
  5. 23 0
      .travis/run.sh
  6. 13 0
      .travis/upload_coverage.sh
  7. 2 1
      AUTHORS
  8. 119 11
      CHANGES.rst
  9. 1 1
      MANIFEST.in
  10. 104 51
      README.rst
  11. 0 1
      borg/__main__.py
  12. 2 1
      borg/_chunker.c
  13. 29 5
      borg/_hashindex.c
  14. 43 33
      borg/archive.py
  15. 113 57
      borg/archiver.py
  16. 12 6
      borg/cache.py
  17. 199 0
      borg/compress.pyx
  18. 2 2
      borg/fuse.py
  19. 10 3
      borg/hashindex.pyx
  20. 49 50
      borg/helpers.py
  21. 187 101
      borg/key.py
  22. 286 0
      borg/locking.py
  23. 28 29
      borg/lrucache.py
  24. 43 56
      borg/remote.py
  25. 35 10
      borg/repository.py
  26. 1 28
      borg/testsuite/__init__.py
  27. 3 3
      borg/testsuite/archive.py
  28. 120 22
      borg/testsuite/archiver.py
  29. 102 0
      borg/testsuite/compress.py
  30. 22 0
      borg/testsuite/hashindex.py
  31. 39 33
      borg/testsuite/helpers.py
  32. 121 0
      borg/testsuite/locking.py
  33. 43 31
      borg/testsuite/lrucache.py
  34. 0 5
      borg/testsuite/mock.py
  35. 0 1
      borg/testsuite/platform.py
  36. 12 6
      borg/testsuite/repository.py
  37. 0 11
      borg/testsuite/run.py
  38. 1 1
      borg/xattr.py
  39. 10 1
      docs/_themes/local/sidebarusefullinks.html
  40. 4 0
      docs/changes.rst
  41. 6 6
      docs/conf.py
  42. 67 0
      docs/development.rst
  43. 17 2
      docs/faq.rst
  44. 0 62
      docs/foreword.rst
  45. 1 0
      docs/global.rst.inc
  46. 6 69
      docs/index.rst
  47. 64 8
      docs/installation.rst
  48. 75 17
      docs/internals.rst
  49. 7 0
      docs/intro.rst
  50. 25 3
      docs/quickstart.rst
  51. 37 0
      docs/support.rst
  52. 142 2
      docs/usage.rst
  53. 5 0
      requirements.d/development.txt
  54. 2 2
      setup.cfg
  55. 15 7
      setup.py
  56. 10 11
      tox.ini

+ 17 - 0
.coveragerc

@@ -0,0 +1,17 @@
+[run]
+branch = True
+source = borg
+omit =
+    borg/__init__.py
+    borg/__main__.py
+    borg/_version.py
+
+[report]
+exclude_lines =
+    pragma: no cover
+    def __repr__
+    raise AssertionError
+    raise NotImplementedError
+    if 0:
+    if __name__ == .__main__.:
+ignore_errors = True

+ 6 - 0
.gitignore

@@ -6,6 +6,7 @@ env
 .tox
 hashindex.c
 chunker.c
+compress.c
 crypto.c
 platform_darwin.c
 platform_freebsd.c
@@ -16,3 +17,8 @@ platform_linux.c
 *.so
 docs/usage/*.inc
 .idea/
+.cache/
+borg.build/
+borg.dist/
+borg.exe
+.coverage

+ 45 - 10
.travis.yml

@@ -1,12 +1,47 @@
+sudo: required
+
 language: python
-python:
-  - "3.2"
-  - "3.3"
-  - "3.4"
-# command to install dependencies
+
+cache:
+    directories:
+        - $HOME/.cache/pip
+
+matrix:
+    include:
+        - python: 3.2
+          os: linux
+          env: TOXENV=py32
+        - python: 3.3
+          os: linux
+          env: TOXENV=py33
+        - python: 3.4
+          os: linux
+          env: TOXENV=py34
+        - language: generic
+          os: osx
+          osx_image: xcode6.4
+          env: TOXENV=py32
+        - language: generic
+          os: osx
+          osx_image: xcode6.4
+          env: TOXENV=py33
+        - language: generic
+          os: osx
+          osx_image: xcode6.4
+          env: TOXENV=py34
+
 install:
-  - "sudo apt-get install -y libacl1-dev"
-  - "pip install --use-mirrors Cython"
-  - "pip install -e ."
-# command to run tests
-script: fakeroot -u py.test
+    - ./.travis/install.sh
+
+script:
+    - ./.travis/run.sh
+
+after_success:
+    - ./.travis/upload_coverage.sh
+
+notifications:
+    irc:
+        channels:
+            - "irc.freenode.org#borgbackup"
+        use_notice: true
+        skip_join: true

+ 47 - 0
.travis/install.sh

@@ -0,0 +1,47 @@
+#!/bin/bash
+
+set -e
+set -x
+
+if [[ "$(uname -s)" == 'Darwin' ]]; then
+    brew update || brew update
+
+    if [[ "${OPENSSL}" != "0.9.8" ]]; then
+        brew outdated openssl || brew upgrade openssl
+    fi
+
+    if which pyenv > /dev/null; then
+        eval "$(pyenv init -)"
+    fi
+
+    brew install lz4
+    brew outdated pyenv || brew upgrade pyenv
+
+    case "${TOXENV}" in
+        py32)
+            pyenv install 3.2.6
+            pyenv global 3.2.6
+            ;;
+        py33)
+            pyenv install 3.3.6
+            pyenv global 3.3.6
+            ;;
+        py34)
+            pyenv install 3.4.3
+            pyenv global 3.4.3
+            ;;
+    esac
+    pyenv rehash
+    python -m pip install --user virtualenv
+else
+    pip install virtualenv
+    sudo add-apt-repository -y ppa:gezakovacs/lz4
+    sudo apt-get update
+    sudo apt-get install -y liblz4-dev
+    sudo apt-get install -y libacl1-dev
+fi
+
+python -m virtualenv ~/.venv
+source ~/.venv/bin/activate
+pip install tox pytest pytest-cov codecov Cython
+pip install -e .

+ 23 - 0
.travis/run.sh

@@ -0,0 +1,23 @@
+#!/bin/bash
+
+set -e
+set -x
+
+if [[ "$(uname -s)" == "Darwin" ]]; then
+    eval "$(pyenv init -)"
+    if [[ "${OPENSSL}" != "0.9.8" ]]; then
+        # set our flags to use homebrew openssl
+        export ARCHFLAGS="-arch x86_64"
+        export LDFLAGS="-L/usr/local/opt/openssl/lib"
+        export CFLAGS="-I/usr/local/opt/openssl/include"
+    fi
+fi
+
+source ~/.venv/bin/activate
+
+if [[ "$(uname -s)" == "Darwin" ]]; then
+    # no fakeroot on OS X
+    sudo tox -e $TOXENV
+else
+    fakeroot -u tox
+fi

+ 13 - 0
.travis/upload_coverage.sh

@@ -0,0 +1,13 @@
+#!/bin/bash
+
+set -e
+set -x
+
+NO_COVERAGE_TOXENVS=(pep8)
+if ! [[ "${NO_COVERAGE_TOXENVS[*]}" =~ "${TOXENV}" ]]; then
+    source ~/.venv/bin/activate
+    ln .tox/.coverage .coverage
+    # on osx, tests run as root, need access to .coverage
+    sudo chmod 666 .coverage
+    codecov -e TRAVIS_OS_NAME TOXENV
+fi

+ 2 - 1
AUTHORS

@@ -2,7 +2,8 @@ Borg Developers / Contributors ("The Borg Collective")
 ``````````````````````````````````````````````````````
 - Thomas Waldmann <tw@waldmann-edv.de>
 - Antoine Beaupré
-
+- Radek Podgorny <radek@podgorny.cz>
+- Yuri D'Elia
 
 Borg is a fork of Attic. Attic is written and maintained
 by Jonas Borgström and various contributors:

+ 119 - 11
CHANGES → CHANGES.rst

@@ -2,36 +2,144 @@ Borg Changelog
 ==============
 
 
+Version 0.25.0 (not released yet)
+---------------------------------
+
+Compatibility notes:
+
+- the new compression code is very compatible: as long as you stay with zlib
+  compression, older borg releases will still be able to read data from a
+  repo/archive made with the new code (note: this is not the case for the
+  default "none" compression, use "zlib,0" if you want a "no compression" mode
+  that can be read by older borg). Also the new code is able to read repos and
+  archives made with older borg versions (for all zlib levels  0..9).
+
+Deprecations:
+
+- --compression N (with N being a number, as in 0.24) is deprecated.
+  We keep the --compression 0..9 for now to not break scripts, but it is
+  deprecated and will be removed later, so better fix your scripts now:
+  --compression 0 (as in 0.24) is the same as --compression zlib,0 (now).
+  BUT: if you do not want compression, you rather want --compression none
+  (which is the default).
+  --compression 1 (in 0.24) is the same as --compression zlib,1 (now)
+  --compression 9 (in 0.24) is the same as --compression zlib,9 (now)
+
+
+New features:
+
+- create --compression none (default, means: do not compress, just pass through
+  data "as is". this is more efficient than zlib level 0 as used in borg 0.24)
+- create --compression lz4 (super-fast, but not very high compression)
+  Please note that borgbackup needs lz4 library as additional requirement.
+- create --compression zlib,N (slower, higher compression, default for N is 6)
+- create --compression lzma,N (slowest, highest compression, default N is 6)
+- honor the nodump flag (UF_NODUMP) and do not backup such items
+
+Bug fixes:
+
+- close fds of segments we delete (during compaction)
+
+Other changes:
+
+- none yet
+
+
 Version 0.24.0
 --------------
 
+Incompatible changes (compared to 0.23):
+
+- borg now always issues --umask NNN option when invoking another borg via ssh
+  on the repository server. By that, it's making sure it uses the same umask
+  for remote repos as for local ones. Because of this, you must upgrade both
+  server and client(s) to 0.24.
+- the default umask is 077 now (if you do not specify via --umask) which might
+  be a different one as you used previously. The default umask avoids that
+  you accidentally give access permissions for group and/or others to files
+  created by borg (e.g. the repository).
+
+Deprecations:
+
+- "--encryption passphrase" mode is deprecated, see #85 and #97.
+  See the new "--encryption repokey" mode for a replacement.
+
 New features:
 
-- borg create --chunker-params ... to configure the chunker.
+- borg create --chunker-params ... to configure the chunker, fixes #16
+  (attic #302, attic #300, and somehow also #41).
+  This can be used to reduce memory usage caused by chunk management overhead,
+  so borg does not create a huge chunks index/repo index and eats all your RAM
+  if you back up lots of data in huge files (like VM disk images).
   See docs/misc/create_chunker-params.txt for more information.
 - borg info now reports chunk counts in the chunk index.
+- borg create --compression 0..9 to select zlib compression level, fixes #66
+  (attic #295).
+- borg init --encryption repokey (to store the encryption key into the repo),
+  fixes #85
+- improve at-end error logging, always log exceptions and set exit_code=1
+- LoggedIO: better error checks / exceptions / exception handling
+- implement --remote-path to allow non-default-path borg locations, #125
+- implement --umask M and use 077 as default umask for better security, #117
+- borg check: give a named single archive to it, fixes #139
+- cache sync: show progress indication
+- cache sync: reimplement the chunk index merging in C
 
 Bug fixes:
 
-- reduce memory usage, see --chunker-params, fixes #16.
-  This can be used to reduce chunk management overhead, so borg does not create
-  a huge chunks index/repo index and eats all your RAM if you back up lots of
-  data in huge files (like VM disk images).
-- better Exception msg if there is no Borg installed on the remote repo server.
+- fix segfault that happened for unreadable files (chunker: n needs to be a
+  signed size_t), #116
+- fix the repair mode, #144
+- repo delete: add destroy to allowed rpc methods, fixes issue #114
+- more compatible repository locking code (based on mkdir), maybe fixes #92
+  (attic #317, attic #201).
+- better Exception msg if no Borg is installed on the remote repo server, #56
+- create a RepositoryCache implementation that can cope with >2GiB,
+  fixes attic #326.
+- fix Traceback when running check --repair, attic #232
+- clarify help text, fixes #73.
+- add help string for --no-files-cache, fixes #140
 
 Other changes:
 
-- Fedora/Fedora-based install instructions added to docs.
-- added docs/misc directory for misc. writeups that won't be included "as is"
-  into the html docs.
-
+- improved docs:
+
+  - added docs/misc directory for misc. writeups that won't be included
+    "as is" into the html docs.
+  - document environment variables and return codes (attic #324, attic #52)
+  - web site: add related projects, fix web site url, IRC #borgbackup
+  - Fedora/Fedora-based install instructions added to docs
+  - Cygwin-based install instructions added to docs
+  - updated AUTHORS
+  - add FAQ entries about redundancy / integrity
+  - clarify that borg extract uses the cwd as extraction target
+  - update internals doc about chunker params, memory usage and compression
+  - added docs about development
+  - add some words about resource usage in general
+  - document how to backup a raw disk
+  - add note about how to run borg from virtual env
+  - add solutions for (ll)fuse installation problems
+  - document what borg check does, fixes #138
+  - reorganize borgbackup.github.io sidebar, prev/next at top
+  - deduplicate and refactor the docs / README.rst
+
+- use borg-tmp as prefix for temporary files / directories
+- short prune options without "keep-" are deprecated, do not suggest them
+- improved tox configuration
+- remove usage of unittest.mock, always use mock from pypi
+- use entrypoints instead of scripts, for better use of the wheel format and
+  modern installs
+- add requirements.d/development.txt and modify tox.ini
+- use travis-ci for testing based on Linux and (new) OS X
+- use coverage.py, pytest-cov and codecov.io for test coverage support
 
 I forgot to list some stuff already implemented in 0.23.0, here they are:
 
 New features:
 
 - efficient archive list from manifest, meaning a big speedup for slow
-  repo connections and "list <repo>", "delete <repo>", "prune"
+  repo connections and "list <repo>", "delete <repo>", "prune" (attic #242,
+  attic #167)
 - big speedup for chunks cache sync (esp. for slow repo connections), fixes #18
 - hashindex: improve error messages
 

+ 1 - 1
MANIFEST.in

@@ -1,4 +1,4 @@
-include README.rst AUTHORS LICENSE CHANGES MANIFEST.in versioneer.py
+include README.rst AUTHORS LICENSE CHANGES.rst MANIFEST.in versioneer.py
 recursive-include borg *.pyx
 recursive-include docs *
 recursive-exclude docs *.pyc

+ 104 - 51
README.rst

@@ -1,77 +1,130 @@
-|build|
+What is BorgBackup?
+-------------------
+BorgBackup (short: Borg) is a deduplicating backup program.
+Optionally, it supports compression and authenticated encryption.
 
-What is Borg?
--------------
-Borg is a deduplicating backup program. The main goal of Borg is to provide
-an efficient and secure way to backup data. The data deduplication
-technique used makes Borg suitable for daily backups since only changes
-are stored.
+The main goal of Borg is to provide an efficient and secure way to backup data.
+The data deduplication technique used makes Borg suitable for daily backups
+since only changes are stored.
+The authenticated encryption technique makes it suitable for backups to not
+fully trusted targets.
 
-Borg is a fork of Attic and maintained by "The Borg Collective" (see AUTHORS file).
+`Borg Installation docs <http://borgbackup.github.io/borgbackup/installation.html>`_
 
-BORG IS NOT COMPATIBLE WITH ORIGINAL ATTIC.
-EXPECT THAT WE WILL BREAK COMPATIBILITY REPEATEDLY WHEN MAJOR RELEASE NUMBER
-CHANGES (like when going from 0.x.y to 1.0.0). Please read CHANGES document.
 
-NOT RELEASED DEVELOPMENT VERSIONS HAVE UNKNOWN COMPATIBILITY PROPERTIES.
+Main features
+~~~~~~~~~~~~~
+**Space efficient storage**
+  Deduplication based on content-defined chunking is used to reduce the number
+  of bytes stored: each file is split into a number of variable length chunks
+  and only chunks that have never been seen before are added to the repository.
 
-THIS IS SOFTWARE IN DEVELOPMENT, DECIDE YOURSELF WHETHER IT FITS YOUR NEEDS.
+  To deduplicate, all the chunks in the same repository are considered, no
+  matter whether they come from different machines, from previous backups,
+  from the same backup or even from the same single file.
+
+  Compared to other deduplication approaches, this method does NOT depend on:
+
+  * file/directory names staying the same
+
+    So you can move your stuff around without killing the deduplication,
+    even between machines sharing a repo.
+
+  * complete files or time stamps staying the same
+
+    If a big file changes a little, only a few new chunks will be stored -
+    this is great for VMs or raw disks.
+
+  * the absolute position of a data chunk inside a file
+
+    Stuff may get shifted and will still be found by the deduplication
+    algorithm.
+
+**Speed**
+  * performance critical code (chunking, compression, encryption) is
+    implemented in C/Cython
+  * local caching of files/chunks index data
+  * quick detection of unmodified files
+
+**Data encryption**
+    All data can be protected using 256-bit AES encryption, data integrity and
+    authenticity is verified using HMAC-SHA256.
 
-Read issue #1 on the issue tracker, goals are being defined there.
+**Compression**
+    All data can be compressed by lz4 (super fast, low compression), zlib
+    (medium speed and compression) or lzma (low speed, high compression).
+
+**Off-site backups**
+    Borg can store data on any remote host accessible over SSH.  If Borg is
+    installed on the remote host, big performance gains can be achieved
+    compared to using a network filesystem (sshfs, nfs, ...).
+
+**Backups mountable as filesystems**
+    Backup archives are mountable as userspace filesystems for easy interactive
+    backup examination and restores (e.g. by using a regular file manager).
+
+**Platforms Borg works on**
+  * Linux
+  * FreeBSD
+  * Mac OS X
+  * Cygwin (unsupported)
+
+**Free and Open Source Software**
+  * security and functionality can be audited independently
+  * licensed under the BSD (3-clause) license
 
-Please also see the LICENSE for more informations.
 
 Easy to use
 ~~~~~~~~~~~
-Initialize backup repository and create a backup archive::
+Initialize a new backup repository and create a backup archive::
 
     $ borg init /mnt/backup
-    $ borg create -v /mnt/backup::documents ~/Documents
+    $ borg create /mnt/backup::Monday ~/Documents
 
-Main features
-~~~~~~~~~~~~~
-Space efficient storage
-  Variable block size deduplication is used to reduce the number of bytes 
-  stored by detecting redundant data. Each file is split into a number of
-  variable length chunks and only chunks that have never been seen before are
-  compressed and added to the repository.
+Now doing another backup, just to show off the great deduplication::
 
-Optional data encryption
-    All data can be protected using 256-bit AES encryption and data integrity
-    and authenticity is verified using HMAC-SHA256.
+    $ borg create --stats /mnt/backup::Tuesday ~/Documents
 
-Off-site backups
-    Borg can store data on any remote host accessible over SSH.  This is
-    most efficient if Borg is also installed on the remote host.
+    Archive name: Tuesday
+    Archive fingerprint: 387a5e3f9b0e792e91c...
+    Start time: Tue Mar 25 12:00:10 2014
+    End time:   Tue Mar 25 12:00:10 2014
+    Duration: 0.08 seconds
+    Number of files: 358
+                      Original size    Compressed size    Deduplicated size
+    This archive:          57.16 MB           46.78 MB            151.67 kB  <--- !
+    All archives:         114.02 MB           93.46 MB             44.81 MB
 
-Backups mountable as filesystems
-    Backup archives are mountable as userspace filesystems for easy backup
-    verification and restores.
+For a graphical frontend refer to our complementary project
+`BorgWeb <https://github.com/borgbackup/borgweb>`_.
 
-What do I need?
----------------
-Borg requires Python 3.2 or above to work.
-Borg also requires a sufficiently recent OpenSSL (>= 1.0.0).
-In order to mount archives as filesystems, llfuse is required.
 
-How do I install it?
---------------------
-::
+Notes
+-----
 
-  $ pip3 install borgbackup
+Borg is a fork of `Attic <https://github.com/jborg/attic>`_ and maintained by
+"`The Borg Collective <https://github.com/borgbackup/borg/blob/master/AUTHORS>`_".
 
-Where are the docs?
--------------------
-Go to https://borgbackup.github.io/ for a prebuilt version of the documentation.
-You can also build it yourself from the docs folder.
+Read `issue #1 <https://github.com/borgbackup/borg/issues/1>`_ about the initial
+considerations regarding project goals and policy of the Borg project.
+
+BORG IS NOT COMPATIBLE WITH ORIGINAL ATTIC.
+EXPECT THAT WE WILL BREAK COMPATIBILITY REPEATEDLY WHEN MAJOR RELEASE NUMBER
+CHANGES (like when going from 0.x.y to 1.0.0). Please read CHANGES document.
+
+NOT RELEASED DEVELOPMENT VERSIONS HAVE UNKNOWN COMPATIBILITY PROPERTIES.
+
+THIS IS SOFTWARE IN DEVELOPMENT, DECIDE YOURSELF WHETHER IT FITS YOUR NEEDS.
 
-Where are the tests?
---------------------
-The tests are in the borg/testsuite package. To run the test suite use the
-following command::
+For more information, please also see the
+`LICENSE  <https://github.com/borgbackup/borg/blob/master/LICENSE>`_.
 
-  $ fakeroot -u tox  # you need to have tox and pytest installed
+|build| |coverage|
 
 .. |build| image:: https://travis-ci.org/borgbackup/borg.svg
         :alt: Build Status
         :target: https://travis-ci.org/borgbackup/borg
+
+.. |coverage| image:: http://codecov.io/github/borgbackup/borg/coverage.svg?branch=master
+        :alt: Test Coverage
+        :target: http://codecov.io/github/borgbackup/borg?branch=master

+ 0 - 1
scripts/borg → borg/__main__.py

@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 from borg.archiver import main
 main()
 

+ 2 - 1
borg/_chunker.c

@@ -127,7 +127,8 @@ chunker_free(Chunker *c)
 static int
 chunker_fill(Chunker *c, PyThreadState **tstatep)
 {
-    size_t n;
+    ssize_t n;
+    PyObject *data;
     memmove(c->data, c->data + c->last, c->position + c->remaining - c->last);
     c->position -= c->last;
     c->last = 0;

+ 29 - 5
borg/_hashindex.c

@@ -145,10 +145,12 @@ hashindex_read(const char *path)
     bytes_read = fread(&header, 1, sizeof(HashHeader), fd);
     if(bytes_read != sizeof(HashHeader)) {
         if(ferror(fd)) {
-            EPRINTF_PATH(path, "fread header failed (expected %ld, got %ld)", sizeof(HashHeader), bytes_read);
+            EPRINTF_PATH(path, "fread header failed (expected %ju, got %ju)",
+                         (uintmax_t) sizeof(HashHeader), (uintmax_t) bytes_read);
         }
         else {
-            EPRINTF_MSG_PATH(path, "fread header failed (expected %ld, got %ld)", sizeof(HashHeader), bytes_read);
+            EPRINTF_MSG_PATH(path, "fread header failed (expected %ju, got %ju)",
+                             (uintmax_t) sizeof(HashHeader), (uintmax_t) bytes_read);
         }
         goto fail;
     }
@@ -170,7 +172,8 @@ hashindex_read(const char *path)
     }
     buckets_length = (off_t)_le32toh(header.num_buckets) * (header.key_size + header.value_size);
     if(length != sizeof(HashHeader) + buckets_length) {
-        EPRINTF_MSG_PATH(path, "Incorrect file length (expected %ld, got %ld)", sizeof(HashHeader) + buckets_length, length);
+        EPRINTF_MSG_PATH(path, "Incorrect file length (expected %ju, got %ju)",
+                         (uintmax_t) sizeof(HashHeader) + buckets_length, (uintmax_t) length);
         goto fail;
     }
     if(!(index = malloc(sizeof(HashIndex)))) {
@@ -186,10 +189,12 @@ hashindex_read(const char *path)
     bytes_read = fread(index->buckets, 1, buckets_length, fd);
     if(bytes_read != buckets_length) {
         if(ferror(fd)) {
-            EPRINTF_PATH(path, "fread buckets failed (expected %ld, got %ld)", buckets_length, bytes_read);
+            EPRINTF_PATH(path, "fread buckets failed (expected %ju, got %ju)",
+                         (uintmax_t) buckets_length, (uintmax_t) bytes_read);
         }
         else {
-            EPRINTF_MSG_PATH(path, "fread buckets failed (expected %ld, got %ld)", buckets_length, bytes_read);
+            EPRINTF_MSG_PATH(path, "fread buckets failed (expected %ju, got %ju)",
+                             (uintmax_t) buckets_length, (uintmax_t) bytes_read);
         }
         free(index->buckets);
         free(index);
@@ -385,3 +390,22 @@ hashindex_summarize(HashIndex *index, long long *total_size, long long *total_cs
     *total_unique_chunks = unique_chunks;
     *total_chunks = chunks;
 }
+
+static void
+hashindex_merge(HashIndex *index, HashIndex *other)
+{
+    int32_t key_size = index->key_size;
+    const int32_t *other_values;
+    int32_t *my_values;
+    void *key = NULL;
+
+    while((key = hashindex_next_key(other, key))) {
+        other_values = key + key_size;
+        my_values = (int32_t *)hashindex_get(index, key);
+        if(my_values == NULL) {
+            hashindex_set(index, key, other_values);
+        } else {
+            *my_values += *other_values;
+        }
+    }
+}

+ 43 - 33
borg/archive.py

@@ -2,8 +2,6 @@ from datetime import datetime
 from getpass import getuser
 from itertools import groupby
 import errno
-import shutil
-import tempfile
 import threading
 from .key import key_factory
 from .remote import cache_if_remote
@@ -110,7 +108,7 @@ class ChunkBuffer:
 class CacheChunkBuffer(ChunkBuffer):
 
     def __init__(self, cache, key, stats, chunker_params=CHUNKER_PARAMS):
-        super(CacheChunkBuffer, self).__init__(key, chunker_params)
+        super().__init__(key, chunker_params)
         self.cache = cache
         self.stats = stats
 
@@ -317,7 +315,6 @@ class Archive:
     class IncompatibleFilesystemEncodingError(Error):
         """Failed to encode filename "{}" into file system encoding "{}". Consider configuring the LANG environment variable."""
 
-
     def __init__(self, repository, key, manifest, name, cache=None, create=False,
                  checkpoint_interval=300, numeric_owner=False, progress=False,
                  chunker_params=CHUNKER_PARAMS):
@@ -432,9 +429,11 @@ class Archive:
             count, size, csize = cache.chunks[id]
             stats.update(size, csize, count == 1)
             cache.chunks[id] = count - 1, size, csize
+
         def add_file_chunks(chunks):
             for id, _, _ in chunks:
                 add(id)
+
         # This function is a bit evil since it abuses the cache to calculate
         # the stats. The cache transaction must be rolled back afterwards
         unpacker = msgpack.Unpacker(use_list=False)
@@ -751,13 +750,13 @@ class Archive:
         return Archive._open_rb(path, st)
 
 
-class RobustUnpacker():
+class RobustUnpacker:
     """A restartable/robust version of the streaming msgpack unpacker
     """
     item_keys = [msgpack.packb(name) for name in ('path', 'mode', 'source', 'chunks', 'rdev', 'xattrs', 'user', 'group', 'uid', 'gid', 'mtime')]
 
     def __init__(self, validator):
-        super(RobustUnpacker, self).__init__()
+        super().__init__()
         self.validator = validator
         self._buffered_data = []
         self._resync = False
@@ -815,13 +814,10 @@ class ArchiveChecker:
     def __init__(self):
         self.error_found = False
         self.possibly_superseded = set()
-        self.tmpdir = tempfile.mkdtemp()
 
-    def __del__(self):
-        shutil.rmtree(self.tmpdir)
-
-    def check(self, repository, repair=False, last=None):
+    def check(self, repository, repair=False, archive=None, last=None):
         self.report_progress('Starting archive consistency check...')
+        self.check_all = archive is None and last is None
         self.repair = repair
         self.repository = repository
         self.init_chunks()
@@ -830,11 +826,9 @@ class ArchiveChecker:
             self.manifest = self.rebuild_manifest()
         else:
             self.manifest, _ = Manifest.load(repository, key=self.key)
-        self.rebuild_refcounts(last=last)
-        if last is None:
-            self.verify_chunks()
-        else:
-            self.report_progress('Orphaned objects check skipped (needs all archives checked)')
+        self.rebuild_refcounts(archive=archive, last=last)
+        self.orphan_chunks_check()
+        self.finish()
         if not self.error_found:
             self.report_progress('Archive consistency check complete, no problems found.')
         return self.repair or not self.error_found
@@ -842,7 +836,7 @@ class ArchiveChecker:
     def init_chunks(self):
         """Fetch a list of all object keys from repository
         """
-        # Explicity set the initial hash table capacity to avoid performance issues
+        # Explicitly set the initial hash table capacity to avoid performance issues
         # due to hash table "resonance"
         capacity = int(len(self.repository) * 1.2)
         self.chunks = ChunkIndex(capacity)
@@ -891,7 +885,7 @@ class ArchiveChecker:
         self.report_progress('Manifest rebuild complete', error=True)
         return manifest
 
-    def rebuild_refcounts(self, last=None):
+    def rebuild_refcounts(self, archive=None, last=None):
         """Rebuild object reference counts by walking the metadata
 
         Missing and/or incorrect data is repaired when detected
@@ -966,13 +960,24 @@ class ArchiveChecker:
                 for chunk_id, cdata in zip(items, repository.get_many(items)):
                     unpacker.feed(self.key.decrypt(chunk_id, cdata))
                     for item in unpacker:
+                        if not isinstance(item, dict):
+                            self.report_progress('Did not get expected metadata dict - archive corrupted!',
+                                                 error=True)
+                            continue
                         yield item
 
         repository = cache_if_remote(self.repository)
-        num_archives = len(self.manifest.archives)
-        archive_items = sorted(self.manifest.archives.items(), reverse=True,
-                               key=lambda name_info: name_info[1][b'time'])
-        end = None if last is None else min(num_archives, last)
+        if archive is None:
+            # we need last N or all archives
+            archive_items = sorted(self.manifest.archives.items(), reverse=True,
+                                   key=lambda name_info: name_info[1][b'time'])
+            num_archives = len(self.manifest.archives)
+            end = None if last is None else min(num_archives, last)
+        else:
+            # we only want one specific archive
+            archive_items = [item for item in self.manifest.archives.items() if item[0] == archive]
+            num_archives = 1
+            end = 1
         for i, (name, info) in enumerate(archive_items[:end]):
             self.report_progress('Analyzing archive {} ({}/{})'.format(name, num_archives - i, num_archives))
             archive_id = info[b'id']
@@ -1003,17 +1008,22 @@ class ArchiveChecker:
             add_reference(new_archive_id, len(data), len(cdata), cdata)
             info[b'id'] = new_archive_id
 
-    def verify_chunks(self):
-        unused = set()
-        for id_, (count, size, csize) in self.chunks.iteritems():
-            if count == 0:
-                unused.add(id_)
-        orphaned = unused - self.possibly_superseded
-        if orphaned:
-            self.report_progress('{} orphaned objects found'.format(len(orphaned)), error=True)
+    def orphan_chunks_check(self):
+        if self.check_all:
+            unused = set()
+            for id_, (count, size, csize) in self.chunks.iteritems():
+                if count == 0:
+                    unused.add(id_)
+            orphaned = unused - self.possibly_superseded
+            if orphaned:
+                self.report_progress('{} orphaned objects found'.format(len(orphaned)), error=True)
+            if self.repair:
+                for id_ in unused:
+                    self.repository.delete(id_)
+        else:
+            self.report_progress('Orphaned objects check skipped (needs all archives checked)')
+
+    def finish(self):
         if self.repair:
-            for id_ in unused:
-                self.repository.delete(id_)
             self.manifest.write()
             self.repository.commit()
-

+ 113 - 57
borg/archiver.py

@@ -14,6 +14,7 @@ import traceback
 
 from . import __version__
 from .archive import Archive, ArchiveChecker, CHUNKER_PARAMS
+from .compress import Compressor, COMPR_BUFFER
 from .repository import Repository
 from .cache import Cache
 from .key import key_creator
@@ -21,9 +22,11 @@ from .helpers import Error, location_validator, format_time, format_file_size, \
     format_file_mode, ExcludePattern, exclude_path, adjust_patterns, to_localtime, timestamp, \
     get_cache_dir, get_keys_dir, format_timedelta, prune_within, prune_split, \
     Manifest, remove_surrogates, update_excludes, format_archive, check_extension_modules, Statistics, \
-    is_cachedir, bigint_to_int, ChunkerParams
+    is_cachedir, bigint_to_int, ChunkerParams, CompressionSpec
 from .remote import RepositoryServer, RemoteRepository
 
+has_lchflags = hasattr(os, 'lchflags')
+
 
 class Archiver:
 
@@ -85,8 +88,9 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
                 print('Repository check complete, no problems found.')
             else:
                 return 1
-        if not args.repo_only and not ArchiveChecker().check(repository, repair=args.repair, last=args.last):
-                return 1
+        if not args.repo_only and not ArchiveChecker().check(
+                repository, repair=args.repair, archive=args.repository.archive, last=args.last):
+            return 1
         return 0
 
     def do_change_passphrase(self, args):
@@ -101,7 +105,9 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
         t0 = datetime.now()
         repository = self.open_repository(args.archive, exclusive=True)
         manifest, key = Manifest.load(repository)
-        key.compression_level = args.compression
+        compr_args = dict(buffer=COMPR_BUFFER)
+        compr_args.update(args.compression)
+        key.compressor = Compressor(**compr_args)
         cache = Cache(repository, key, manifest, do_files=args.cache_files)
         archive = Archive(repository, key, manifest, args.archive.archive, cache=cache,
                           create=True, checkpoint_interval=args.checkpoint_interval,
@@ -174,6 +180,9 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
         if restrict_dev and st.st_dev != restrict_dev:
             return
         status = None
+        # Ignore if nodump flag is set
+        if has_lchflags and (st.st_flags & stat.UF_NODUMP):
+            return
         if stat.S_ISREG(st.st_mode):
             try:
                 status = archive.process_file(path, st, cache)
@@ -223,7 +232,6 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
         # be restrictive when restoring files, restore permissions later
         if sys.getfilesystemencoding() == 'ascii':
             print('Warning: File system encoding is "ascii", extracting non-ascii filenames will not be supported.')
-        os.umask(0o077)
         repository = self.open_repository(args.archive)
         manifest, key = Manifest.load(repository)
         archive = Archive(repository, key, manifest, args.archive.archive,
@@ -291,11 +299,13 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
             print("You requested to completely DELETE the repository *including* all archives it contains:")
             for archive_info in manifest.list_archive_infos(sort_by='ts'):
                 print(format_archive(archive_info))
-            print("""Type "YES" if you understand this and want to continue.\n""")
-            if input('Do you want to continue? ') == 'YES':
-                repository.destroy()
-                cache.destroy()
-                print("Repository and corresponding cache were deleted.")
+            while not os.environ.get('BORG_CHECK_I_KNOW_WHAT_I_AM_DOING'):
+                print("""Type "YES" if you understand this and want to continue.\n""")
+                if input('Do you want to continue? ') == 'YES':
+                    break
+            repository.destroy()
+            cache.destroy()
+            print("Repository and corresponding cache were deleted.")
         return self.exit_code
 
     def do_mount(self, args):
@@ -330,34 +340,38 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
         repository = self.open_repository(args.src)
         manifest, key = Manifest.load(repository)
         if args.src.archive:
-            tmap = {1: 'p', 2: 'c', 4: 'd', 6: 'b', 0o10: '-', 0o12: 'l', 0o14: 's'}
             archive = Archive(repository, key, manifest, args.src.archive)
-            for item in archive.iter_items():
-                type = tmap.get(item[b'mode'] // 4096, '?')
-                mode = format_file_mode(item[b'mode'])
-                size = 0
-                if type == '-':
+            if args.short:
+                for item in archive.iter_items():
+                    print(remove_surrogates(item[b'path']))
+            else:
+                tmap = {1: 'p', 2: 'c', 4: 'd', 6: 'b', 0o10: '-', 0o12: 'l', 0o14: 's'}
+                for item in archive.iter_items():
+                    type = tmap.get(item[b'mode'] // 4096, '?')
+                    mode = format_file_mode(item[b'mode'])
+                    size = 0
+                    if type == '-':
+                        try:
+                            size = sum(size for _, size, _ in item[b'chunks'])
+                        except KeyError:
+                            pass
                     try:
-                        size = sum(size for _, size, _ in item[b'chunks'])
-                    except KeyError:
-                        pass
-                try:
-                    mtime = datetime.fromtimestamp(bigint_to_int(item[b'mtime']) / 1e9)
-                except ValueError:
-                    # likely a broken mtime and datetime did not want to go beyond year 9999
-                    mtime = datetime(9999, 12, 31, 23, 59, 59)
-                if b'source' in item:
-                    if type == 'l':
-                        extra = ' -> %s' % item[b'source']
+                        mtime = datetime.fromtimestamp(bigint_to_int(item[b'mtime']) / 1e9)
+                    except ValueError:
+                        # likely a broken mtime and datetime did not want to go beyond year 9999
+                        mtime = datetime(9999, 12, 31, 23, 59, 59)
+                    if b'source' in item:
+                        if type == 'l':
+                            extra = ' -> %s' % item[b'source']
+                        else:
+                            type = 'h'
+                            extra = ' link to %s' % item[b'source']
                     else:
-                        type = 'h'
-                        extra = ' link to %s' % item[b'source']
-                else:
-                    extra = ''
-                print('%s%s %-6s %-6s %8d %s %s%s' % (
-                    type, mode, item[b'user'] or item[b'uid'],
-                    item[b'group'] or item[b'gid'], size, format_time(mtime),
-                    remove_surrogates(item[b'path']), extra))
+                        extra = ''
+                    print('%s%s %-6s %-6s %8d %s %s%s' % (
+                        type, mode, item[b'user'] or item[b'uid'],
+                        item[b'group'] or item[b'gid'], size, format_time(mtime),
+                        remove_surrogates(item[b'path']), extra))
         else:
             for archive_info in manifest.list_archive_infos(sort_by='ts'):
                 print(format_archive(archive_info))
@@ -511,7 +525,12 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
         common_parser.add_argument('-v', '--verbose', dest='verbose', action='store_true',
                                    default=False,
                                    help='verbose output')
-        common_parser.add_argument('--no-files-cache', dest='cache_files', action='store_false')
+        common_parser.add_argument('--no-files-cache', dest='cache_files', action='store_false',
+                                   help='do not load/update the file metadata cache used to detect unchanged files')
+        common_parser.add_argument('--umask', dest='umask', type=lambda s: int(s, 8), default=0o077, metavar='M',
+                                   help='set umask to M (local and remote, default: 0o077)')
+        common_parser.add_argument('--remote-path', dest='remote_path', default='borg', metavar='PATH',
+                                   help='set remote path to executable (default: "borg")')
 
         # We can't use argparse for "serve" since we don't want it to show up in "Available commands"
         if args:
@@ -533,6 +552,8 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
         This command initializes an empty repository. A repository is a filesystem
         directory containing the deduplicated data from zero or more archives.
         Encryption can be enabled at repository init time.
+        Please note that the 'passphrase' encryption mode is DEPRECATED (instead of it,
+        consider using 'repokey').
         """)
         subparser = subparsers.add_parser('init', parents=[common_parser],
                                           description=self.do_init.__doc__, epilog=init_epilog,
@@ -542,27 +563,51 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
                                type=location_validator(archive=False),
                                help='repository to create')
         subparser.add_argument('-e', '--encryption', dest='encryption',
-                               choices=('none', 'passphrase', 'keyfile'), default='none',
-                               help='select encryption method')
+                               choices=('none', 'keyfile', 'repokey', 'passphrase'), default='none',
+                               help='select encryption key mode')
 
         check_epilog = textwrap.dedent("""
-        The check command verifies the consistency of a repository and the corresponding
-        archives. The underlying repository data files are first checked to detect bit rot
-        and other types of damage. After that the consistency and correctness of the archive
-        metadata is verified.
-
-        The archive metadata checks can be time consuming and requires access to the key
-        file and/or passphrase if encryption is enabled. These checks can be skipped using
-        the --repository-only option.
+        The check command verifies the consistency of a repository and the corresponding archives.
+
+        First, the underlying repository data files are checked:
+        - For all segments the segment magic (header) is checked
+        - For all objects stored in the segments, all metadata (e.g. crc and size) and
+          all data is read. The read data is checked by size and CRC. Bit rot and other
+          types of accidental damage can be detected this way.
+        - If we are in repair mode and a integrity error is detected for a segment,
+          we try to recover as many objects from the segment as possible.
+        - In repair mode, it makes sure that the index is consistent with the data
+          stored in the segments.
+        - If you use a remote repo server via ssh:, the repo check is executed on the
+          repo server without causing significant network traffic.
+        - The repository check can be skipped using the --archives-only option.
+
+        Second, the consistency and correctness of the archive metadata is verified:
+        - Is the repo manifest present? If not, it is rebuilt from archive metadata
+          chunks (this requires reading and decrypting of all metadata and data).
+        - Check if archive metadata chunk is present. if not, remove archive from
+          manifest.
+        - For all files (items) in the archive, for all chunks referenced by these
+          files, check if chunk is present (if not and we are in repair mode, replace
+          it with a same-size chunk of zeros). This requires reading of archive and
+          file metadata, but not data.
+        - If we are in repair mode and we checked all the archives: delete orphaned
+          chunks from the repo.
+        - if you use a remote repo server via ssh:, the archive check is executed on
+          the client machine (because if encryption is enabled, the checks will require
+          decryption and this is always done client-side, because key access will be
+          required).
+        - The archive checks can be time consuming, they can be skipped using the
+          --repository-only option.
         """)
         subparser = subparsers.add_parser('check', parents=[common_parser],
                                           description=self.do_check.__doc__,
                                           epilog=check_epilog,
                                           formatter_class=argparse.RawDescriptionHelpFormatter)
         subparser.set_defaults(func=self.do_check)
-        subparser.add_argument('repository', metavar='REPOSITORY',
-                               type=location_validator(archive=False),
-                               help='repository to check consistency of')
+        subparser.add_argument('repository', metavar='REPOSITORY_OR_ARCHIVE',
+                               type=location_validator(),
+                               help='repository or archive to check consistency of')
         subparser.add_argument('--repository-only', dest='repo_only', action='store_true',
                                default=False,
                                help='only perform repository checks')
@@ -593,7 +638,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
         traversing all paths specified. The archive will consume almost no disk space for
         files or parts of files that have already been stored in other archives.
 
-        See "borg help patterns" for more help on exclude patterns.
+        See the output of the "borg help patterns" command for more help on exclude patterns.
         """)
 
         subparser = subparsers.add_parser('create', parents=[common_parser],
@@ -635,9 +680,14 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
                                metavar='CHUNK_MIN_EXP,CHUNK_MAX_EXP,HASH_MASK_BITS,HASH_WINDOW_SIZE',
                                help='specify the chunker parameters. default: %d,%d,%d,%d' % CHUNKER_PARAMS)
         subparser.add_argument('-C', '--compression', dest='compression',
-                               type=int, default=0, metavar='N',
-                               help='select compression algorithm and level. 0..9 is supported and means zlib '
-                                    'level 0 (no compression, fast, default) .. zlib level 9 (high compression, slow).')
+                               type=CompressionSpec, default=dict(name='none'), metavar='COMPRESSION',
+                               help='select compression algorithm (and level): '
+                                    'none == no compression (default), '
+                                    'lz4 == lz4, '
+                                    'zlib == zlib (default level 6), '
+                                    'zlib,0 .. zlib,9 == zlib (with level 0..9), '
+                                    'lzma == lzma (default level 6), '
+                                    'lzma,0 .. lzma,9 == lzma (with level 0..9).')
         subparser.add_argument('archive', metavar='ARCHIVE',
                                type=location_validator(archive=True),
                                help='archive to create')
@@ -650,7 +700,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
         by passing a list of ``PATHs`` as arguments. The file selection can further
         be restricted by using the ``--exclude`` option.
 
-        See "borg help patterns" for more help on exclude patterns.
+        See the output of the "borg help patterns" command for more help on exclude patterns.
         """)
         subparser = subparsers.add_parser('extract', parents=[common_parser],
                                           description=self.do_extract.__doc__,
@@ -723,6 +773,9 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
                                           epilog=list_epilog,
                                           formatter_class=argparse.RawDescriptionHelpFormatter)
         subparser.set_defaults(func=self.do_list)
+        subparser.add_argument('--short', dest='short',
+                               action='store_true', default=False,
+                               help='only print file/directory names, nothing else')
         subparser.add_argument('src', metavar='REPOSITORY_OR_ARCHIVE', type=location_validator(),
                                help='repository/archive to list contents of')
         mount_epilog = textwrap.dedent("""
@@ -822,11 +875,14 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
 
         args = parser.parse_args(args or ['-h'])
         self.verbose = args.verbose
+        os.umask(args.umask)
+        RemoteRepository.remote_path = args.remote_path
+        RemoteRepository.umask = args.umask
         update_excludes(args)
         return args.func(args)
 
 
-def sig_info_handler(signum, stack):
+def sig_info_handler(signum, stack):  # pragma: no cover
     """search the stack for infos about the currently processed file and print them"""
     for frame in inspect.getouterframes(stack):
         func, loc = frame[3], frame[0].f_locals
@@ -849,7 +905,7 @@ def sig_info_handler(signum, stack):
             break
 
 
-def setup_signal_handlers():
+def setup_signal_handlers():  # pragma: no cover
     sigs = []
     if hasattr(signal, 'SIGUSR1'):
         sigs.append(signal.SIGUSR1)  # kill -USR1 pid
@@ -859,7 +915,7 @@ def setup_signal_handlers():
         signal.signal(sig, sig_info_handler)
 
 
-def main():
+def main():  # pragma: no cover
     # Make sure stdout and stderr have errors='replace') to avoid unicode
     # issues when print()-ing unicode file names
     sys.stdout = io.TextIOWrapper(sys.stdout.buffer, sys.stdout.encoding, 'replace', line_buffering=True)

+ 12 - 6
borg/cache.py

@@ -11,8 +11,9 @@ import tarfile
 import tempfile
 
 from .key import PlaintextKey
-from .helpers import Error, get_cache_dir, decode_dict, st_mtime_ns, unhexlify, UpgradableLock, int_to_bigint, \
+from .helpers import Error, get_cache_dir, decode_dict, st_mtime_ns, unhexlify, int_to_bigint, \
     bigint_to_int
+from .locking import UpgradableLock
 from .hashindex import ChunkIndex
 
 
@@ -22,7 +23,6 @@ class Cache:
     class RepositoryReplay(Error):
         """Cache is newer than repository, refusing to continue"""
 
-
     class CacheInitAbortedError(Error):
         """Cache initialization aborted"""
 
@@ -129,7 +129,7 @@ class Cache:
     def open(self):
         if not os.path.isdir(self.path):
             raise Exception('%s Does not look like a Borg cache' % self.path)
-        self.lock = UpgradableLock(os.path.join(self.path, 'config'), exclusive=True)
+        self.lock = UpgradableLock(os.path.join(self.path, 'lock'), exclusive=True).acquire()
         self.rollback()
 
     def close(self):
@@ -292,6 +292,9 @@ class Cache:
                 add(chunk_idx, item_id, len(data), len(chunk))
                 unpacker.feed(data)
                 for item in unpacker:
+                    if not isinstance(item, dict):
+                        print('Error: Did not get expected metadata dict - archive corrupted!')
+                        continue
                     if b'chunks' in item:
                         for chunk_id, size, csize in item[b'chunks']:
                             add(chunk_idx, chunk_id, size, csize)
@@ -308,17 +311,20 @@ class Cache:
             chunk_idx.clear()
             for tarinfo in tf_in:
                 archive_id_hex = tarinfo.name
+                archive_name = tarinfo.pax_headers['archive_name']
+                print("- extracting archive %s ..." % archive_name)
                 tf_in.extract(archive_id_hex, tmp_dir)
                 chunk_idx_path = os.path.join(tmp_dir, archive_id_hex).encode('utf-8')
+                print("- reading archive ...")
                 archive_chunk_idx = ChunkIndex.read(chunk_idx_path)
-                for chunk_id, (count, size, csize) in archive_chunk_idx.iteritems():
-                    add(chunk_idx, chunk_id, size, csize, incr=count)
+                print("- merging archive ...")
+                chunk_idx.merge(archive_chunk_idx)
                 os.unlink(chunk_idx_path)
 
         self.begin_txn()
         print('Synchronizing chunks cache...')
         # XXX we have to do stuff on disk due to lacking ChunkIndex api
-        with tempfile.TemporaryDirectory() as tmp_dir:
+        with tempfile.TemporaryDirectory(prefix='borg-tmp') as tmp_dir:
             repository = cache_if_remote(self.repository)
             out_archive = open_out_archive()
             in_archive = open_in_archive()

+ 199 - 0
borg/compress.pyx

@@ -0,0 +1,199 @@
+import zlib
+try:
+    import lzma
+except ImportError:
+    lzma = None
+
+cdef extern from "lz4.h":
+    int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize) nogil
+    int LZ4_decompress_safe(const char* source, char* dest, int inputSize, int maxOutputSize) nogil
+
+
+cdef class CompressorBase:
+    """
+    base class for all (de)compression classes,
+    also handles compression format auto detection and
+    adding/stripping the ID header (which enable auto detection).
+    """
+    ID = b'\xFF\xFF'  # reserved and not used
+                      # overwrite with a unique 2-bytes bytestring in child classes
+    name = 'baseclass'
+
+    @classmethod
+    def detect(cls, data):
+        return data.startswith(cls.ID)
+
+    def __init__(self, **kwargs):
+        pass
+
+    def compress(self, data):
+        # add ID bytes
+        return self.ID + data
+
+    def decompress(self, data):
+        # strip ID bytes
+        return data[2:]
+
+
+class CNONE(CompressorBase):
+    """
+    none - no compression, just pass through data
+    """
+    ID = b'\x00\x00'
+    name = 'none'
+
+    def compress(self, data):
+        return super().compress(data)
+
+    def decompress(self, data):
+        data = super().decompress(data)
+        if not isinstance(data, bytes):
+            data = bytes(data)
+        return data
+
+
+cdef class LZ4(CompressorBase):
+    """
+    raw LZ4 compression / decompression (liblz4).
+
+    Features:
+        - lz4 is super fast
+        - wrapper releases CPython's GIL to support multithreaded code
+        - buffer given by caller, avoiding frequent reallocation and buffer duplication
+        - uses safe lz4 methods that never go beyond the end of the output buffer
+
+    But beware:
+        - this is not very generic, the given buffer MUST be large enough to
+          handle all compression or decompression output (or it will fail).
+        - you must not do method calls to the same LZ4 instance from different
+          threads at the same time - create one LZ4 instance per thread!
+    """
+    ID = b'\x01\x00'
+    name = 'lz4'
+
+    cdef char *buffer  # helper buffer for (de)compression output
+    cdef int bufsize  # size of this buffer
+
+    def __cinit__(self, **kwargs):
+        buffer = kwargs['buffer']
+        self.buffer = buffer
+        self.bufsize = len(buffer)
+
+    def compress(self, idata):
+        if not isinstance(idata, bytes):
+            idata = bytes(idata)  # code below does not work with memoryview
+        cdef int isize = len(idata)
+        cdef int osize = self.bufsize
+        cdef char *source = idata
+        cdef char *dest = self.buffer
+        with nogil:
+            osize = LZ4_compress_limitedOutput(source, dest, isize, osize)
+        if not osize:
+            raise Exception('lz4 compress failed')
+        return super().compress(dest[:osize])
+
+    def decompress(self, idata):
+        if not isinstance(idata, bytes):
+            idata = bytes(idata)  # code below does not work with memoryview
+        idata = super().decompress(idata)
+        cdef int isize = len(idata)
+        cdef int osize = self.bufsize
+        cdef char *source = idata
+        cdef char *dest = self.buffer
+        with nogil:
+            osize = LZ4_decompress_safe(source, dest, isize, osize)
+        if osize < 0:
+            # malformed input data, buffer too small, ...
+            raise Exception('lz4 decompress failed')
+        return dest[:osize]
+
+
+class LZMA(CompressorBase):
+    """
+    lzma compression / decompression (python 3.3+ stdlib)
+    """
+    ID = b'\x02\x00'
+    name = 'lzma'
+
+    def __init__(self, level=6, **kwargs):
+        super().__init__(**kwargs)
+        self.level = level
+        if lzma is None:
+            raise ValueError('No lzma support found.')
+
+    def compress(self, data):
+        # we do not need integrity checks in lzma, we do that already
+        data = lzma.compress(data, preset=self.level, check=lzma.CHECK_NONE)
+        return super().compress(data)
+
+    def decompress(self, data):
+        data = super().decompress(data)
+        return lzma.decompress(data)
+
+
+class ZLIB(CompressorBase):
+    """
+    zlib compression / decompression (python stdlib)
+    """
+    ID = b'\x08\x00'  # not used here, see detect()
+                      # avoid all 0x.8.. IDs elsewhere!
+    name = 'zlib'
+
+    @classmethod
+    def detect(cls, data):
+        # matches misc. patterns 0x.8.. used by zlib
+        cmf, flg = data[:2]
+        is_deflate = cmf & 0x0f == 8
+        check_ok = (cmf * 256 + flg) % 31 == 0
+        return check_ok and is_deflate
+
+    def __init__(self, level=6, **kwargs):
+        super().__init__(**kwargs)
+        self.level = level
+
+    def compress(self, data):
+        # note: for compatibility no super call, do not add ID bytes
+        return zlib.compress(data, self.level)
+
+    def decompress(self, data):
+        # note: for compatibility no super call, do not strip ID bytes
+        return zlib.decompress(data)
+
+
+COMPRESSOR_TABLE = {
+    CNONE.name: CNONE,
+    LZ4.name: LZ4,
+    ZLIB.name: ZLIB,
+    LZMA.name: LZMA,
+}
+COMPRESSOR_LIST = [LZ4, CNONE, ZLIB, LZMA, ]  # check fast stuff first
+
+def get_compressor(name, **kwargs):
+    cls = COMPRESSOR_TABLE[name]
+    return cls(**kwargs)
+
+
+class Compressor:
+    """
+    compresses using a compressor with given name and parameters
+    decompresses everything we can handle (autodetect)
+    """
+    def __init__(self, name='null', **kwargs):
+        self.params = kwargs
+        self.compressor = get_compressor(name, **self.params)
+
+    def compress(self, data):
+        return self.compressor.compress(data)
+
+    def decompress(self, data):
+        hdr = bytes(data[:2])  # detect() does not work with memoryview
+        for cls in COMPRESSOR_LIST:
+            if cls.detect(hdr):
+                return cls(**self.params).decompress(data)
+        else:
+            raise ValueError('No decompressor for this data found: %r.', data[:2])
+
+
+# a buffer used for (de)compression result, which can be slightly bigger
+# than the chunk buffer in the worst (incompressible data) case, add 10%:
+COMPR_BUFFER = bytes(int(1.1 * 2 ** 23))  # CHUNK_MAX_EXP == 23

+ 2 - 2
borg/fuse.py

@@ -17,7 +17,7 @@ have_fuse_mtime_ns = hasattr(llfuse.EntryAttributes, 'st_mtime_ns')
 
 class ItemCache:
     def __init__(self):
-        self.fd = tempfile.TemporaryFile()
+        self.fd = tempfile.TemporaryFile(prefix='borg-tmp')
         self.offset = 1000000
 
     def add(self, item):
@@ -34,7 +34,7 @@ class FuseOperations(llfuse.Operations):
     """Export archive as a fuse filesystem
     """
     def __init__(self, key, repository, manifest, archive):
-        super(FuseOperations, self).__init__()
+        super().__init__()
         self._inode_count = 0
         self.key = key
         self.repository = cache_if_remote(repository)

+ 10 - 3
borg/hashindex.pyx

@@ -14,6 +14,7 @@ cdef extern from "_hashindex.c":
     void hashindex_summarize(HashIndex *index, long long *total_size, long long *total_csize,
                              long long *unique_size, long long *unique_csize,
                              long long *total_unique_chunks, long long *total_chunks)
+    void hashindex_merge(HashIndex *index, HashIndex *other)
     int hashindex_get_size(HashIndex *index)
     int hashindex_write(HashIndex *index, char *path)
     void *hashindex_get(HashIndex *index, void *key)
@@ -24,15 +25,18 @@ cdef extern from "_hashindex.c":
     int _le32toh(int v)
 
 
-_NoDefault = object()
+cdef _NoDefault = object()
 
+cimport cython
+
+@cython.internal
 cdef class IndexBase:
     cdef HashIndex *index
     key_size = 32
 
     def __cinit__(self, capacity=0, path=None):
         if path:
-            self.index = hashindex_read(<bytes>os.fsencode(path))
+            self.index = hashindex_read(os.fsencode(path))
             if not self.index:
                 raise Exception('hashindex_read failed')
         else:
@@ -49,7 +53,7 @@ cdef class IndexBase:
         return cls(path=path)
 
     def write(self, path):
-        if not hashindex_write(self.index, <bytes>os.fsencode(path)):
+        if not hashindex_write(self.index, os.fsencode(path)):
             raise Exception('hashindex_write failed')
 
     def clear(self):
@@ -187,6 +191,9 @@ cdef class ChunkIndex(IndexBase):
                             &total_unique_chunks, &total_chunks)
         return total_size, total_csize, unique_size, unique_csize, total_unique_chunks, total_chunks
 
+    def merge(self, ChunkIndex other):
+        hashindex_merge(self.index, other.index)
+
 
 cdef class ChunkKeyIterator:
     cdef ChunkIndex idx

+ 49 - 50
borg/helpers.py

@@ -2,7 +2,6 @@ import argparse
 import binascii
 from collections import namedtuple
 import grp
-import msgpack
 import os
 import pwd
 import queue
@@ -12,7 +11,8 @@ import time
 from datetime import datetime, timezone, timedelta
 from fnmatch import translate
 from operator import attrgetter
-import fcntl
+
+import msgpack
 
 from . import hashindex
 from . import chunker
@@ -34,52 +34,15 @@ class ExtensionModuleError(Error):
     """The Borg binary extension modules do not seem to be properly installed"""
 
 
-class UpgradableLock:
-
-    class ReadLockFailed(Error):
-        """Failed to acquire read lock on {}"""
-
-    class WriteLockFailed(Error):
-        """Failed to acquire write lock on {}"""
-
-    def __init__(self, path, exclusive=False):
-        self.path = path
-        try:
-            self.fd = open(path, 'r+')
-        except IOError:
-            self.fd = open(path, 'r')
-        try:
-            if exclusive:
-                fcntl.lockf(self.fd, fcntl.LOCK_EX)
-            else:
-                fcntl.lockf(self.fd, fcntl.LOCK_SH)
-        # Python 3.2 raises IOError, Python3.3+ raises OSError
-        except (IOError, OSError):
-            if exclusive:
-                raise self.WriteLockFailed(self.path)
-            else:
-                raise self.ReadLockFailed(self.path)
-        self.is_exclusive = exclusive
-
-    def upgrade(self):
-        try:
-            fcntl.lockf(self.fd, fcntl.LOCK_EX)
-        # Python 3.2 raises IOError, Python3.3+ raises OSError
-        except (IOError, OSError):
-            raise self.WriteLockFailed(self.path)
-        self.is_exclusive = True
-
-    def release(self):
-        fcntl.lockf(self.fd, fcntl.LOCK_UN)
-        self.fd.close()
-
-
 def check_extension_modules():
     from . import platform
-    if (hashindex.API_VERSION != 2 or
-        chunker.API_VERSION != 2 or
-        crypto.API_VERSION != 2 or
-        platform.API_VERSION != 2):
+    if hashindex.API_VERSION != 2:
+        raise ExtensionModuleError
+    if chunker.API_VERSION != 2:
+        raise ExtensionModuleError
+    if crypto.API_VERSION != 2:
+        raise ExtensionModuleError
+    if platform.API_VERSION != 2:
         raise ExtensionModuleError
 
 
@@ -318,9 +281,45 @@ def timestamp(s):
 
 def ChunkerParams(s):
     window_size, chunk_mask, chunk_min, chunk_max = s.split(',')
+    if int(chunk_max) > 23:
+        # do not go beyond 2**23 (8MB) chunk size now,
+        # COMPR_BUFFER can only cope with up to this size
+        raise ValueError
     return int(window_size), int(chunk_mask), int(chunk_min), int(chunk_max)
 
 
+def CompressionSpec(s):
+    values = s.split(',')
+    count = len(values)
+    if count < 1:
+        raise ValueError
+    compression = values[0]
+    try:
+        compression = int(compression)
+        if count > 1:
+            raise ValueError
+        # DEPRECATED: it is just --compression N
+        if 0 <= compression <= 9:
+            return dict(name='zlib', level=compression)
+        raise ValueError
+    except ValueError:
+        # --compression algo[,...]
+        name = compression
+        if name in ('none', 'lz4', ):
+            return dict(name=name)
+        if name in ('zlib', 'lzma', ):
+            if count < 2:
+                level = 6  # default compression level in py stdlib
+            elif count == 2:
+                level = int(values[1])
+                if not 0 <= level <= 9:
+                    raise ValueError
+            else:
+                raise ValueError
+            return dict(name=name, level=level)
+        raise ValueError
+
+
 def is_cachedir(path):
     """Determines whether the specified path is a cache directory (and
     therefore should potentially be excluded from the backup) according to
@@ -532,9 +531,9 @@ class Location:
             else:
                 path = self.path
             return 'ssh://{}{}{}{}'.format('{}@'.format(self.user) if self.user else '',
-                                                        self.host,
-                                                        ':{}'.format(self.port) if self.port else '',
-                                                        path)
+                                           self.host,
+                                           ':{}'.format(self.port) if self.port else '',
+                                           path)
 
 
 def location_validator(archive=None):
@@ -609,7 +608,7 @@ def daemonize():
 class StableDict(dict):
     """A dict subclass with stable items() ordering"""
     def items(self):
-        return sorted(super(StableDict, self).items())
+        return sorted(super().items())
 
 
 if sys.version < '3.3':

+ 187 - 101
borg/key.py

@@ -1,13 +1,14 @@
 from binascii import hexlify, a2b_base64, b2a_base64
-from getpass import getpass
+import configparser
+import getpass
 import os
 import msgpack
 import textwrap
 import hmac
 from hashlib import sha256
-import zlib
 
 from .crypto import pbkdf2_sha256, get_random_bytes, AES, bytes_to_long, long_to_bytes, bytes_to_int, num_aes_blocks
+from .compress import Compressor, COMPR_BUFFER
 from .helpers import IntegrityError, get_keys_dir, Error
 
 PREFIX = b'\0' * 8
@@ -17,11 +18,17 @@ class UnsupportedPayloadError(Error):
     """Unsupported payload type {}. A newer version is required to access this repository.
     """
 
+
 class KeyfileNotFoundError(Error):
     """No key file for repository {} found in {}.
     """
 
 
+class RepoKeyNotFoundError(Error):
+    """No key entry found in the config of repository {}.
+    """
+
+
 class HMAC(hmac.HMAC):
     """Workaround a bug in Python < 3.4 Where HMAC does not accept memoryviews
     """
@@ -32,28 +39,36 @@ class HMAC(hmac.HMAC):
 def key_creator(repository, args):
     if args.encryption == 'keyfile':
         return KeyfileKey.create(repository, args)
-    elif args.encryption == 'passphrase':
+    elif args.encryption == 'repokey':
+        return RepoKey.create(repository, args)
+    elif args.encryption == 'passphrase':  # deprecated, kill in 1.x
         return PassphraseKey.create(repository, args)
     else:
         return PlaintextKey.create(repository, args)
 
 
 def key_factory(repository, manifest_data):
-    if manifest_data[0] == KeyfileKey.TYPE:
+    key_type = manifest_data[0]
+    if key_type == KeyfileKey.TYPE:
         return KeyfileKey.detect(repository, manifest_data)
-    elif manifest_data[0] == PassphraseKey.TYPE:
+    elif key_type == RepoKey.TYPE:
+        return RepoKey.detect(repository, manifest_data)
+    elif key_type == PassphraseKey.TYPE:  # deprecated, kill in 1.x
         return PassphraseKey.detect(repository, manifest_data)
-    elif manifest_data[0] == PlaintextKey.TYPE:
+    elif key_type == PlaintextKey.TYPE:
         return PlaintextKey.detect(repository, manifest_data)
     else:
-        raise UnsupportedPayloadError(manifest_data[0])
+        raise UnsupportedPayloadError(key_type)
 
 
 class KeyBase:
+    TYPE = None  # override in subclasses
 
-    def __init__(self):
+    def __init__(self, repository):
         self.TYPE_STR = bytes([self.TYPE])
-        self.compression_level = 0
+        self.repository = repository
+        self.target = None  # key location file path / repo obj
+        self.compressor = Compressor('none', buffer=COMPR_BUFFER)
 
     def id_hash(self, data):
         """Return HMAC hash using the "id" HMAC key
@@ -73,23 +88,23 @@ class PlaintextKey(KeyBase):
 
     @classmethod
     def create(cls, repository, args):
-        print('Encryption NOT enabled.\nUse the "--encryption=passphrase|keyfile" to enable encryption.')
-        return cls()
+        print('Encryption NOT enabled.\nUse the "--encryption=repokey|keyfile|passphrase" to enable encryption.')
+        return cls(repository)
 
     @classmethod
     def detect(cls, repository, manifest_data):
-        return cls()
+        return cls(repository)
 
     def id_hash(self, data):
         return sha256(data).digest()
 
     def encrypt(self, data):
-        return b''.join([self.TYPE_STR, zlib.compress(data, self.compression_level)])
+        return b''.join([self.TYPE_STR, self.compressor.compress(data)])
 
     def decrypt(self, id, data):
         if data[0] != self.TYPE:
             raise IntegrityError('Invalid encryption envelope')
-        data = zlib.decompress(memoryview(data)[1:])
+        data = self.compressor.decompress(memoryview(data)[1:])
         if id and sha256(data).digest() != id:
             raise IntegrityError('Chunk id verification failed')
         return data
@@ -116,7 +131,7 @@ class AESKeyBase(KeyBase):
         return HMAC(self.id_key, data, sha256).digest()
 
     def encrypt(self, data):
-        data = zlib.compress(data, self.compression_level)
+        data = self.compressor.compress(data)
         self.enc_cipher.reset()
         data = b''.join((self.enc_cipher.iv[8:], self.enc_cipher.encrypt(data)))
         hmac = HMAC(self.enc_hmac_key, data, sha256).digest()
@@ -129,7 +144,7 @@ class AESKeyBase(KeyBase):
         if memoryview(HMAC(self.enc_hmac_key, memoryview(data)[33:], sha256).digest()) != hmac:
             raise IntegrityError('Encryption envelope checksum mismatch')
         self.dec_cipher.reset(iv=PREFIX + data[33:41])
-        data = zlib.decompress(self.dec_cipher.decrypt(data[41:]))  # should use memoryview
+        data = self.compressor.decompress(self.dec_cipher.decrypt(data[41:]))
         if id and HMAC(self.id_key, data, sha256).digest() != id:
             raise IntegrityError('Chunk id verification failed')
         return data
@@ -154,38 +169,65 @@ class AESKeyBase(KeyBase):
         self.dec_cipher = AES(is_encrypt=False, key=self.enc_key)
 
 
+class Passphrase(str):
+    @classmethod
+    def env_passphrase(cls, default=None):
+        passphrase = os.environ.get('BORG_PASSPHRASE', default)
+        if passphrase is not None:
+            return cls(passphrase)
+
+    @classmethod
+    def getpass(cls, prompt):
+        return cls(getpass.getpass(prompt))
+
+    @classmethod
+    def new(cls, allow_empty=False):
+        passphrase = cls.env_passphrase()
+        if passphrase is not None:
+            return passphrase
+        while True:
+            passphrase = cls.getpass('Enter new passphrase: ')
+            if allow_empty or passphrase:
+                passphrase2 = cls.getpass('Enter same passphrase again: ')
+                if passphrase == passphrase2:
+                    print('Remember your passphrase. Your data will be inaccessible without it.')
+                    return passphrase
+                else:
+                    print('Passphrases do not match')
+            else:
+                print('Passphrase must not be blank')
+
+    def __repr__(self):
+        return '<Passphrase "***hidden***">'
+
+    def kdf(self, salt, iterations, length):
+        return pbkdf2_sha256(self.encode('utf-8'), salt, iterations, length)
+
+
 class PassphraseKey(AESKeyBase):
+    # This mode is DEPRECATED and will be killed at 1.0 release.
+    # With this mode:
+    # - you can never ever change your passphrase for existing repos.
+    # - you can never ever use a different iterations count for existing repos.
     TYPE = 0x01
-    iterations = 100000
+    iterations = 100000  # must not be changed ever!
 
     @classmethod
     def create(cls, repository, args):
-        key = cls()
-        passphrase = os.environ.get('BORG_PASSPHRASE')
-        if passphrase is not None:
-            passphrase2 = passphrase
-        else:
-            passphrase, passphrase2 = 1, 2
-        while passphrase != passphrase2:
-            passphrase = getpass('Enter passphrase: ')
-            if not passphrase:
-                print('Passphrase must not be blank')
-                continue
-            passphrase2 = getpass('Enter same passphrase again: ')
-            if passphrase != passphrase2:
-                print('Passphrases do not match')
+        key = cls(repository)
+        print('WARNING: "passphrase" mode is deprecated and will be removed in 1.0.')
+        print('If you want something similar (but with less issues), use "repokey" mode.')
+        passphrase = Passphrase.new(allow_empty=False)
         key.init(repository, passphrase)
-        if passphrase:
-            print('Remember your passphrase. Your data will be inaccessible without it.')
         return key
 
     @classmethod
     def detect(cls, repository, manifest_data):
         prompt = 'Enter passphrase for %s: ' % repository._location.orig
-        key = cls()
-        passphrase = os.environ.get('BORG_PASSPHRASE')
+        key = cls(repository)
+        passphrase = Passphrase.env_passphrase()
         if passphrase is None:
-            passphrase = getpass(prompt)
+            passphrase = Passphrase.getpass(prompt)
         while True:
             key.init(repository, passphrase)
             try:
@@ -194,7 +236,7 @@ class PassphraseKey(AESKeyBase):
                 key.init_ciphers(PREFIX + long_to_bytes(key.extract_nonce(manifest_data) + num_blocks))
                 return key
             except IntegrityError:
-                passphrase = getpass(prompt)
+                passphrase = Passphrase.getpass(prompt)
 
     def change_passphrase(self):
         class ImmutablePassphraseError(Error):
@@ -203,42 +245,31 @@ class PassphraseKey(AESKeyBase):
         raise ImmutablePassphraseError
 
     def init(self, repository, passphrase):
-        self.init_from_random_data(pbkdf2_sha256(passphrase.encode('utf-8'), repository.id, self.iterations, 100))
+        self.init_from_random_data(passphrase.kdf(repository.id, self.iterations, 100))
         self.init_ciphers()
 
 
-class KeyfileKey(AESKeyBase):
-    FILE_ID = 'BORG_KEY'
-    TYPE = 0x00
-
+class KeyfileKeyBase(AESKeyBase):
     @classmethod
     def detect(cls, repository, manifest_data):
-        key = cls()
-        path = cls.find_key_file(repository)
-        prompt = 'Enter passphrase for key file %s: ' % path
-        passphrase = os.environ.get('BORG_PASSPHRASE', '')
-        while not key.load(path, passphrase):
-            passphrase = getpass(prompt)
+        key = cls(repository)
+        target = key.find_key()
+        prompt = 'Enter passphrase for key %s: ' % target
+        passphrase = Passphrase.env_passphrase(default='')
+        while not key.load(target, passphrase):
+            passphrase = Passphrase.getpass(prompt)
         num_blocks = num_aes_blocks(len(manifest_data) - 41)
         key.init_ciphers(PREFIX + long_to_bytes(key.extract_nonce(manifest_data) + num_blocks))
         return key
 
-    @classmethod
-    def find_key_file(cls, repository):
-        id = hexlify(repository.id).decode('ascii')
-        keys_dir = get_keys_dir()
-        for name in os.listdir(keys_dir):
-            filename = os.path.join(keys_dir, name)
-            with open(filename, 'r') as fd:
-                line = fd.readline().strip()
-                if (line and line.startswith(cls.FILE_ID) and
-                    line[len(cls.FILE_ID)+1:] == id):
-                    return filename
-        raise KeyfileNotFoundError(repository._location.canonical_path(), get_keys_dir())
+    def find_key(self):
+        raise NotImplementedError
 
-    def load(self, filename, passphrase):
-        with open(filename, 'r') as fd:
-            cdata = a2b_base64(''.join(fd.readlines()[1:]).encode('ascii'))  # .encode needed for Python 3.[0-2]
+    def load(self, target, passphrase):
+        raise NotImplementedError
+
+    def _load(self, key_data, passphrase):
+        cdata = a2b_base64(key_data.encode('ascii'))  # .encode needed for Python 3.[0-2]
         data = self.decrypt_key_file(cdata, passphrase)
         if data:
             key = msgpack.unpackb(data)
@@ -249,23 +280,22 @@ class KeyfileKey(AESKeyBase):
             self.enc_hmac_key = key[b'enc_hmac_key']
             self.id_key = key[b'id_key']
             self.chunk_seed = key[b'chunk_seed']
-            self.path = filename
             return True
+        return False
 
     def decrypt_key_file(self, data, passphrase):
         d = msgpack.unpackb(data)
         assert d[b'version'] == 1
         assert d[b'algorithm'] == b'sha256'
-        key = pbkdf2_sha256(passphrase.encode('utf-8'), d[b'salt'], d[b'iterations'], 32)
+        key = passphrase.kdf(d[b'salt'], d[b'iterations'], 32)
         data = AES(is_encrypt=False, key=key).decrypt(d[b'data'])
-        if HMAC(key, data, sha256).digest() != d[b'hash']:
-            return None
-        return data
+        if HMAC(key, data, sha256).digest() == d[b'hash']:
+            return data
 
     def encrypt_key_file(self, data, passphrase):
         salt = get_random_bytes(32)
         iterations = 100000
-        key = pbkdf2_sha256(passphrase.encode('utf-8'), salt, iterations, 32)
+        key = passphrase.kdf(salt, iterations, 32)
         hash = HMAC(key, data, sha256).digest()
         cdata = AES(is_encrypt=True, key=key).encrypt(data)
         d = {
@@ -278,7 +308,7 @@ class KeyfileKey(AESKeyBase):
         }
         return msgpack.packb(d)
 
-    def save(self, path, passphrase):
+    def _save(self, passphrase):
         key = {
             'version': 1,
             'repository_id': self.repository_id,
@@ -288,45 +318,101 @@ class KeyfileKey(AESKeyBase):
             'chunk_seed': self.chunk_seed,
         }
         data = self.encrypt_key_file(msgpack.packb(key), passphrase)
-        with open(path, 'w') as fd:
-            fd.write('%s %s\n' % (self.FILE_ID, hexlify(self.repository_id).decode('ascii')))
-            fd.write('\n'.join(textwrap.wrap(b2a_base64(data).decode('ascii'))))
-            fd.write('\n')
-        self.path = path
+        key_data = '\n'.join(textwrap.wrap(b2a_base64(data).decode('ascii')))
+        return key_data
 
     def change_passphrase(self):
-        passphrase, passphrase2 = 1, 2
-        while passphrase != passphrase2:
-            passphrase = getpass('New passphrase: ')
-            passphrase2 = getpass('Enter same passphrase again: ')
-            if passphrase != passphrase2:
-                print('Passphrases do not match')
-        self.save(self.path, passphrase)
-        print('Key file "%s" updated' % self.path)
+        passphrase = Passphrase.new(allow_empty=True)
+        self.save(self.target, passphrase)
+        print('Key updated')
 
     @classmethod
     def create(cls, repository, args):
+        passphrase = Passphrase.new(allow_empty=True)
+        key = cls(repository)
+        key.repository_id = repository.id
+        key.init_from_random_data(get_random_bytes(100))
+        key.init_ciphers()
+        target = key.get_new_target(args)
+        key.save(target, passphrase)
+        print('Key in "%s" created.' % target)
+        print('Keep this key safe. Your data will be inaccessible without it.')
+        return key
+
+    def save(self, target, passphrase):
+        raise NotImplementedError
+
+    def get_new_target(self, args):
+        raise NotImplementedError
+
+
+class KeyfileKey(KeyfileKeyBase):
+    TYPE = 0x00
+    FILE_ID = 'BORG_KEY'
+
+    def find_key(self):
+        id = hexlify(self.repository.id).decode('ascii')
+        keys_dir = get_keys_dir()
+        for name in os.listdir(keys_dir):
+            filename = os.path.join(keys_dir, name)
+            with open(filename, 'r') as fd:
+                line = fd.readline().strip()
+                if line.startswith(self.FILE_ID) and line[len(self.FILE_ID)+1:] == id:
+                    return filename
+        raise KeyfileNotFoundError(self.repository._location.canonical_path(), get_keys_dir())
+
+    def get_new_target(self, args):
         filename = args.repository.to_key_filename()
         path = filename
         i = 1
         while os.path.exists(path):
             i += 1
             path = filename + '.%d' % i
-        passphrase = os.environ.get('BORG_PASSPHRASE')
-        if passphrase is not None:
-            passphrase2 = passphrase
-        else:
-            passphrase, passphrase2 = 1, 2
-        while passphrase != passphrase2:
-            passphrase = getpass('Enter passphrase (empty for no passphrase):')
-            passphrase2 = getpass('Enter same passphrase again: ')
-            if passphrase != passphrase2:
-                print('Passphrases do not match')
-        key = cls()
-        key.repository_id = repository.id
-        key.init_from_random_data(get_random_bytes(100))
-        key.init_ciphers()
-        key.save(path, passphrase)
-        print('Key file "%s" created.' % key.path)
-        print('Keep this file safe. Your data will be inaccessible without it.')
-        return key
+        return path
+
+    def load(self, target, passphrase):
+        with open(target, 'r') as fd:
+            key_data = ''.join(fd.readlines()[1:])
+        success = self._load(key_data, passphrase)
+        if success:
+            self.target = target
+        return success
+
+    def save(self, target, passphrase):
+        key_data = self._save(passphrase)
+        with open(target, 'w') as fd:
+            fd.write('%s %s\n' % (self.FILE_ID, hexlify(self.repository_id).decode('ascii')))
+            fd.write(key_data)
+            fd.write('\n')
+        self.target = target
+
+
+class RepoKey(KeyfileKeyBase):
+    TYPE = 0x03
+
+    def find_key(self):
+        loc = self.repository._location.canonical_path()
+        try:
+            self.repository.load_key()
+            return loc
+        except configparser.NoOptionError:
+            raise RepoKeyNotFoundError(loc)
+
+    def get_new_target(self, args):
+        return self.repository
+
+    def load(self, target, passphrase):
+        # what we get in target is just a repo location, but we already have the repo obj:
+        target = self.repository
+        key_data = target.load_key()
+        key_data = key_data.decode('utf-8')  # remote repo: msgpack issue #99, getting bytes
+        success = self._load(key_data, passphrase)
+        if success:
+            self.target = target
+        return success
+
+    def save(self, target, passphrase):
+        key_data = self._save(passphrase)
+        key_data = key_data.encode('utf-8')  # remote repo: msgpack issue #99, giving bytes
+        target.save_key(key_data)
+        self.target = target

+ 286 - 0
borg/locking.py

@@ -0,0 +1,286 @@
+import errno
+import json
+import os
+import socket
+import threading
+import time
+
+from borg.helpers import Error
+
+ADD, REMOVE = 'add', 'remove'
+SHARED, EXCLUSIVE = 'shared', 'exclusive'
+
+
+def get_id():
+    """Get identification tuple for 'us'"""
+    hostname = socket.gethostname()
+    pid = os.getpid()
+    tid = threading.current_thread().ident & 0xffffffff
+    return hostname, pid, tid
+
+
+class TimeoutTimer:
+    """
+    A timer for timeout checks (can also deal with no timeout, give timeout=None [default]).
+    It can also compute and optionally execute a reasonable sleep time (e.g. to avoid
+    polling too often or to support thread/process rescheduling).
+    """
+    def __init__(self, timeout=None, sleep=None):
+        """
+        Initialize a timer.
+
+        :param timeout: time out interval [s] or None (no timeout)
+        :param sleep: sleep interval [s] (>= 0: do sleep call, <0: don't call sleep)
+                      or None (autocompute: use 10% of timeout, or 1s for no timeout)
+        """
+        if timeout is not None and timeout < 0:
+            raise ValueError("timeout must be >= 0")
+        self.timeout_interval = timeout
+        if sleep is None:
+            if timeout is None:
+                sleep = 1.0
+            else:
+                sleep = timeout / 10.0
+        self.sleep_interval = sleep
+        self.start_time = None
+        self.end_time = None
+
+    def __repr__(self):
+        return "<%s: start=%r end=%r timeout=%r sleep=%r>" % (
+            self.__class__.__name__, self.start_time, self.end_time,
+            self.timeout_interval, self.sleep_interval)
+
+    def start(self):
+        self.start_time = time.time()
+        if self.timeout_interval is not None:
+            self.end_time = self.start_time + self.timeout_interval
+        return self
+
+    def sleep(self):
+        if self.sleep_interval >= 0:
+            time.sleep(self.sleep_interval)
+
+    def timed_out(self):
+        return self.end_time is not None and time.time() >= self.end_time
+
+    def timed_out_or_sleep(self):
+        if self.timed_out():
+            return True
+        else:
+            self.sleep()
+            return False
+
+
+class ExclusiveLock:
+    """An exclusive Lock based on mkdir fs operation being atomic"""
+    class LockError(Error):
+        """Failed to acquire the lock {}."""
+
+    class LockTimeout(LockError):
+        """Failed to create/acquire the lock {} (timeout)."""
+
+    class LockFailed(LockError):
+        """Failed to create/acquire the lock {} ({})."""
+
+    class UnlockError(Error):
+        """Failed to release the lock {}."""
+
+    class NotLocked(UnlockError):
+        """Failed to release the lock {} (was not locked)."""
+
+    class NotMyLock(UnlockError):
+        """Failed to release the lock {} (was/is locked, but not by me)."""
+
+    def __init__(self, path, timeout=None, sleep=None, id=None):
+        self.timeout = timeout
+        self.sleep = sleep
+        self.path = os.path.abspath(path)
+        self.id = id or get_id()
+        self.unique_name  = os.path.join(self.path, "%s.%d-%x" % self.id)
+
+    def __enter__(self):
+        return self.acquire()
+
+    def __exit__(self, *exc):
+        self.release()
+
+    def __repr__(self):
+        return "<%s: %r>" % (self.__class__.__name__, self.unique_name)
+
+    def acquire(self, timeout=None, sleep=None):
+        if timeout is None:
+            timeout = self.timeout
+        if sleep is None:
+            sleep = self.sleep
+        timer = TimeoutTimer(timeout, sleep).start()
+        while True:
+            try:
+                os.mkdir(self.path)
+            except OSError as err:
+                if err.errno == errno.EEXIST:  # already locked
+                    if self.by_me():
+                        return self
+                    if timer.timed_out_or_sleep():
+                        raise self.LockTimeout(self.path)
+                else:
+                    raise self.LockFailed(self.path, str(err))
+            else:
+                with open(self.unique_name, "wb"):
+                    pass
+                return self
+
+    def release(self):
+        if not self.is_locked():
+            raise self.NotLocked(self.path)
+        if not self.by_me():
+            raise self.NotMyLock(self.path)
+        os.unlink(self.unique_name)
+        os.rmdir(self.path)
+
+    def is_locked(self):
+        return os.path.exists(self.path)
+
+    def by_me(self):
+        return os.path.exists(self.unique_name)
+
+    def break_lock(self):
+        if self.is_locked():
+            for name in os.listdir(self.path):
+                os.unlink(os.path.join(self.path, name))
+            os.rmdir(self.path)
+
+
+class LockRoster:
+    """
+    A Lock Roster to track shared/exclusive lockers.
+
+    Note: you usually should call the methods with an exclusive lock held,
+    to avoid conflicting access by multiple threads/processes/machines.
+    """
+    def __init__(self, path, id=None):
+        self.path = path
+        self.id = id or get_id()
+
+    def load(self):
+        try:
+            with open(self.path) as f:
+                data = json.load(f)
+        except IOError as err:
+            if err.errno != errno.ENOENT:
+                raise
+            data = {}
+        return data
+
+    def save(self, data):
+        with open(self.path, "w") as f:
+            json.dump(data, f)
+
+    def remove(self):
+        os.unlink(self.path)
+
+    def get(self, key):
+        roster = self.load()
+        return set(tuple(e) for e in roster.get(key, []))
+
+    def modify(self, key, op):
+        roster = self.load()
+        try:
+            elements = set(tuple(e) for e in roster[key])
+        except KeyError:
+            elements = set()
+        if op == ADD:
+            elements.add(self.id)
+        elif op == REMOVE:
+            elements.remove(self.id)
+        else:
+            raise ValueError('Unknown LockRoster op %r' % op)
+        roster[key] = list(list(e) for e in elements)
+        self.save(roster)
+
+
+class UpgradableLock:
+    """
+    A Lock for a resource that can be accessed in a shared or exclusive way.
+    Typically, write access to a resource needs an exclusive lock (1 writer,
+    noone is allowed reading) and read access to a resource needs a shared
+    lock (multiple readers are allowed).
+    """
+    class SharedLockFailed(Error):
+        """Failed to acquire shared lock [{}]"""
+
+    class ExclusiveLockFailed(Error):
+        """Failed to acquire write lock [{}]"""
+
+    def __init__(self, path, exclusive=False, sleep=None, id=None):
+        self.path = path
+        self.is_exclusive = exclusive
+        self.sleep = sleep
+        self.id = id or get_id()
+        # globally keeping track of shared and exclusive lockers:
+        self._roster = LockRoster(path + '.roster', id=id)
+        # an exclusive lock, used for:
+        # - holding while doing roster queries / updates
+        # - holding while the UpgradableLock itself is exclusive
+        self._lock = ExclusiveLock(path + '.exclusive', id=id)
+
+    def __enter__(self):
+        return self.acquire()
+
+    def __exit__(self, *exc):
+        self.release()
+
+    def __repr__(self):
+        return "<%s: %r>" % (self.__class__.__name__, self.id)
+
+    def acquire(self, exclusive=None, remove=None, sleep=None):
+        if exclusive is None:
+            exclusive = self.is_exclusive
+        sleep = sleep or self.sleep or 0.2
+        try:
+            if exclusive:
+                self._wait_for_readers_finishing(remove, sleep)
+                self._roster.modify(EXCLUSIVE, ADD)
+            else:
+                with self._lock:
+                    if remove is not None:
+                        self._roster.modify(remove, REMOVE)
+                    self._roster.modify(SHARED, ADD)
+            self.is_exclusive = exclusive
+            return self
+        except ExclusiveLock.LockError as err:
+            msg = str(err)
+            if exclusive:
+                raise self.ExclusiveLockFailed(msg)
+            else:
+                raise self.SharedLockFailed(msg)
+
+    def _wait_for_readers_finishing(self, remove, sleep):
+        while True:
+            self._lock.acquire()
+            if remove is not None:
+                self._roster.modify(remove, REMOVE)
+                remove = None
+            if len(self._roster.get(SHARED)) == 0:
+                return  # we are the only one and we keep the lock!
+            self._lock.release()
+            time.sleep(sleep)
+
+    def release(self):
+        if self.is_exclusive:
+            self._roster.modify(EXCLUSIVE, REMOVE)
+            self._lock.release()
+        else:
+            with self._lock:
+                self._roster.modify(SHARED, REMOVE)
+
+    def upgrade(self):
+        if not self.is_exclusive:
+            self.acquire(exclusive=True, remove=SHARED)
+
+    def downgrade(self):
+        if self.is_exclusive:
+            self.acquire(exclusive=False, remove=EXCLUSIVE)
+
+    def break_lock(self):
+        self._roster.remove()
+        self._lock.break_lock()

+ 28 - 29
borg/lrucache.py

@@ -1,42 +1,41 @@
-class LRUCache(dict):
-
-    def __init__(self, capacity):
-        super(LRUCache, self).__init__()
+class LRUCache:
+    def __init__(self, capacity, dispose):
+        self._cache = {}
         self._lru = []
         self._capacity = capacity
+        self._dispose = dispose
 
     def __setitem__(self, key, value):
-        try:
-            self._lru.remove(key)
-        except ValueError:
-            pass
+        assert key not in self._cache, (
+            "Unexpected attempt to replace a cached item,"
+            " without first deleting the old item.")
         self._lru.append(key)
         while len(self._lru) > self._capacity:
             del self[self._lru[0]]
-        return super(LRUCache, self).__setitem__(key, value)
+        self._cache[key] = value
 
     def __getitem__(self, key):
-        try:
-            self._lru.remove(key)
-            self._lru.append(key)
-        except ValueError:
-            pass
-        return super(LRUCache, self).__getitem__(key)
+        value = self._cache[key]  # raise KeyError if not found
+        self._lru.remove(key)
+        self._lru.append(key)
+        return value
 
     def __delitem__(self, key):
-        try:
-            self._lru.remove(key)
-        except ValueError:
-            pass
-        return super(LRUCache, self).__delitem__(key)
+        value = self._cache.pop(key)  # raise KeyError if not found
+        self._dispose(value)
+        self._lru.remove(key)
+
+    def __contains__(self, key):
+        return key in self._cache
+
+    def clear(self):
+        for value in self._cache.values():
+            self._dispose(value)
+        self._cache.clear()
 
-    def pop(self, key, default=None):
-        try:
-            self._lru.remove(key)
-        except ValueError:
-            pass
-        return super(LRUCache, self).pop(key, default)
+    # useful for testing
+    def items(self):
+        return self._cache.items()
 
-    def _not_implemented(self, *args, **kw):
-        raise NotImplementedError
-    popitem = setdefault = update = _not_implemented
+    def __len__(self):
+        return len(self._cache)

+ 43 - 56
borg/remote.py

@@ -3,7 +3,6 @@ import fcntl
 import msgpack
 import os
 import select
-import shutil
 from subprocess import Popen, PIPE
 import sys
 import tempfile
@@ -11,7 +10,6 @@ import traceback
 
 from . import __version__
 
-from .hashindex import NSIndex
 from .helpers import Error, IntegrityError
 from .repository import Repository
 
@@ -25,24 +23,28 @@ class ConnectionClosed(Error):
 class PathNotAllowed(Error):
     """Repository path not allowed"""
 
+
 class InvalidRPCMethod(Error):
     """RPC method is not valid"""
 
 
-class RepositoryServer:
+class RepositoryServer:  # pragma: no cover
     rpc_methods = (
-            '__len__',
-            'check',
-            'commit',
-            'delete',
-            'get',
-            'list',
-            'negotiate',
-            'open',
-            'put',
-            'repair',
-            'rollback',
-            )
+        '__len__',
+        'check',
+        'commit',
+        'delete',
+        'destroy',
+        'get',
+        'list',
+        'negotiate',
+        'open',
+        'put',
+        'repair',
+        'rollback',
+        'save_key',
+        'load_key',
+    )
 
     def __init__(self, restrict_to_paths):
         self.repository = None
@@ -71,7 +73,7 @@ class RepositoryServer:
                     type, msgid, method, args = unpacked
                     method = method.decode('ascii')
                     try:
-                        if not method in self.rpc_methods:
+                        if method not in self.rpc_methods:
                             raise InvalidRPCMethod(method)
                         try:
                             f = getattr(self, method)
@@ -106,9 +108,10 @@ class RepositoryServer:
 
 class RemoteRepository:
     extra_test_args = []
+    remote_path = None
+    umask = None
 
     class RPCError(Exception):
-
         def __init__(self, name):
             self.name = name
 
@@ -122,9 +125,11 @@ class RemoteRepository:
         self.responses = {}
         self.unpacker = msgpack.Unpacker(use_list=False)
         self.p = None
+        # use local umask also for the remote process
+        umask = ['--umask', '%03o' % self.umask]
         if location.host == '__testsuite__':
-            args = [sys.executable, '-m', 'borg.archiver', 'serve'] + self.extra_test_args
-        else:
+            args = [sys.executable, '-m', 'borg.archiver', 'serve'] + umask + self.extra_test_args
+        else:  # pragma: no cover
             args = ['ssh']
             if location.port:
                 args += ['-p', str(location.port)]
@@ -132,7 +137,7 @@ class RemoteRepository:
                 args.append('%s@%s' % (location.user, location.host))
             else:
                 args.append('%s' % location.host)
-            args += ['borg', 'serve']
+            args += [self.remote_path, 'serve'] + umask
         self.p = Popen(args, bufsize=0, stdin=PIPE, stdout=PIPE)
         self.stdin_fd = self.p.stdin.fileno()
         self.stdout_fd = self.p.stdout.fileno()
@@ -152,6 +157,9 @@ class RemoteRepository:
     def __del__(self):
         self.close()
 
+    def __repr__(self):
+        return '<%s %s>' % (self.__class__.__name__, self.location.canonical_path())
+
     def call(self, cmd, *args, **kw):
         for resp in self.call_many(cmd, [args], **kw):
             return resp
@@ -199,7 +207,7 @@ class RemoteRepository:
                     break
             r, w, x = select.select(self.r_fds, w_fds, self.x_fds, 1)
             if x:
-                raise Exception('FD exception occured')
+                raise Exception('FD exception occurred')
             if r:
                 data = os.read(self.stdout_fd, BUFSIZE)
                 if not data:
@@ -277,6 +285,12 @@ class RemoteRepository:
     def delete(self, id_, wait=True):
         return self.call('delete', id_, wait=wait)
 
+    def save_key(self, keydata):
+        return self.call('save_key', keydata)
+
+    def load_key(self):
+        return self.call('load_key')
+
     def close(self):
         if self.p:
             self.p.stdin.close()
@@ -291,56 +305,29 @@ class RemoteRepository:
 class RepositoryCache:
     """A caching Repository wrapper
 
-    Caches Repository GET operations using a temporary file
+    Caches Repository GET operations using a local temporary Repository.
     """
     def __init__(self, repository):
-        self.tmppath = None
-        self.index = None
-        self.data_fd = None
         self.repository = repository
-        self.entries = {}
-        self.initialize()
+        tmppath = tempfile.mkdtemp(prefix='borg-tmp')
+        self.caching_repo = Repository(tmppath, create=True, exclusive=True)
 
     def __del__(self):
-        self.cleanup()
-
-    def initialize(self):
-        self.tmppath = tempfile.mkdtemp()
-        self.index = NSIndex()
-        self.data_fd = open(os.path.join(self.tmppath, 'data'), 'a+b')
-
-    def cleanup(self):
-        del self.index
-        if self.data_fd:
-            self.data_fd.close()
-        if self.tmppath:
-            shutil.rmtree(self.tmppath)
-
-    def load_object(self, offset, size):
-        self.data_fd.seek(offset)
-        data = self.data_fd.read(size)
-        assert len(data) == size
-        return data
-
-    def store_object(self, key, data):
-        self.data_fd.seek(0, os.SEEK_END)
-        self.data_fd.write(data)
-        offset = self.data_fd.tell()
-        self.index[key] = offset - len(data), len(data)
+        self.caching_repo.destroy()
 
     def get(self, key):
         return next(self.get_many([key]))
 
     def get_many(self, keys):
-        unknown_keys = [key for key in keys if key not in self.index]
+        unknown_keys = [key for key in keys if key not in self.caching_repo]
         repository_iterator = zip(unknown_keys, self.repository.get_many(unknown_keys))
         for key in keys:
             try:
-                yield self.load_object(*self.index[key])
-            except KeyError:
+                yield self.caching_repo.get(key)
+            except Repository.ObjectNotFound:
                 for key_, data in repository_iterator:
                     if key_ == key:
-                        self.store_object(key, data)
+                        self.caching_repo.put(key, data)
                         yield data
                         break
         # Consume any pending requests

+ 35 - 10
borg/repository.py

@@ -9,7 +9,8 @@ import sys
 from zlib import crc32
 
 from .hashindex import NSIndex
-from .helpers import Error, IntegrityError, read_msgpack, write_msgpack, unhexlify, UpgradableLock
+from .helpers import Error, IntegrityError, read_msgpack, write_msgpack, unhexlify
+from .locking import UpgradableLock
 from .lrucache import LRUCache
 
 MAX_OBJECT_SIZE = 20 * 1024 * 1024
@@ -61,6 +62,9 @@ class Repository:
     def __del__(self):
         self.close()
 
+    def __repr__(self):
+        return '<%s %s>' % (self.__class__.__name__, self.path)
+
     def create(self, path):
         """Create a new empty repository at `path`
         """
@@ -77,9 +81,23 @@ class Repository:
         config.set('repository', 'segments_per_dir', self.DEFAULT_SEGMENTS_PER_DIR)
         config.set('repository', 'max_segment_size', self.DEFAULT_MAX_SEGMENT_SIZE)
         config.set('repository', 'id', hexlify(os.urandom(32)).decode('ascii'))
-        with open(os.path.join(path, 'config'), 'w') as fd:
+        self.save_config(path, config)
+
+    def save_config(self, path, config):
+        config_path = os.path.join(path, 'config')
+        with open(config_path, 'w') as fd:
             config.write(fd)
 
+    def save_key(self, keydata):
+        assert self.config
+        keydata = keydata.decode('utf-8')  # remote repo: msgpack issue #99, getting bytes
+        self.config.set('repository', 'key', keydata)
+        self.save_config(self.path, self.config)
+
+    def load_key(self):
+        keydata = self.config.get('repository', 'key')
+        return keydata.encode('utf-8')  # remote repo: msgpack issue #99, returning bytes
+
     def destroy(self):
         """Destroy the repository at `self.path`
         """
@@ -113,11 +131,11 @@ class Repository:
         self.path = path
         if not os.path.isdir(path):
             raise self.DoesNotExist(path)
+        self.lock = UpgradableLock(os.path.join(path, 'lock'), exclusive).acquire()
         self.config = RawConfigParser()
         self.config.read(os.path.join(self.path, 'config'))
         if 'repository' not in self.config.sections() or self.config.getint('repository', 'version') != 1:
             raise self.InvalidRepository(path)
-        self.lock = UpgradableLock(os.path.join(path, 'config'), exclusive)
         self.max_segment_size = self.config.getint('repository', 'max_segment_size')
         self.segments_per_dir = self.config.getint('repository', 'segments_per_dir')
         self.id = unhexlify(self.config.get('repository', 'id').strip())
@@ -148,7 +166,7 @@ class Repository:
         self._active_txn = True
         try:
             self.lock.upgrade()
-        except UpgradableLock.WriteLockFailed:
+        except UpgradableLock.ExclusiveLockFailed:
             # if upgrading the lock to exclusive fails, we do not have an
             # active transaction. this is important for "serve" mode, where
             # the repository instance lives on - even if exceptions happened.
@@ -316,7 +334,6 @@ class Repository:
             report_error('Adding commit tag to segment {}'.format(transaction_id))
             self.io.segment = transaction_id + 1
             self.io.write_commit()
-            self.io.close_segment()
         if current_index and not repair:
             if len(current_index) != len(self.index):
                 report_error('Index object count mismatch. {} != {}'.format(len(current_index), len(self.index)))
@@ -341,6 +358,11 @@ class Repository:
             self.index = self.open_index(self.get_transaction_id())
         return len(self.index)
 
+    def __contains__(self, id):
+        if not self.index:
+            self.index = self.open_index(self.get_transaction_id())
+        return id in self.index
+
     def list(self, limit=None, marker=None):
         if not self.index:
             self.index = self.open_index(self.get_transaction_id())
@@ -390,7 +412,7 @@ class Repository:
         self.segments.setdefault(segment, 0)
 
     def preload(self, ids):
-        """Preload objects (only applies to remote repositories
+        """Preload objects (only applies to remote repositories)
         """
 
 
@@ -410,7 +432,8 @@ class LoggedIO:
 
     def __init__(self, path, limit, segments_per_dir, capacity=90):
         self.path = path
-        self.fds = LRUCache(capacity)
+        self.fds = LRUCache(capacity,
+                            dispose=lambda fd: fd.close())
         self.segment = 0
         self.limit = limit
         self.segments_per_dir = segments_per_dir
@@ -418,9 +441,8 @@ class LoggedIO:
         self._write_fd = None
 
     def close(self):
-        for segment in list(self.fds.keys()):
-            self.fds.pop(segment).close()
         self.close_segment()
+        self.fds.clear()
         self.fds = None  # Just to make sure we're disabled
 
     def segment_iterator(self, reverse=False):
@@ -494,6 +516,8 @@ class LoggedIO:
             return fd
 
     def delete_segment(self, segment):
+        if segment in self.fds:
+            del self.fds[segment]
         try:
             os.unlink(self.segment_filename(segment))
         except OSError:
@@ -536,7 +560,8 @@ class LoggedIO:
             header = fd.read(self.header_fmt.size)
 
     def recover_segment(self, segment, filename):
-        self.fds.pop(segment).close()
+        if segment in self.fds:
+            del self.fds[segment]
         # FIXME: save a copy of the original file
         with open(filename, 'rb') as fd:
             data = memoryview(fd.read())

+ 1 - 28
borg/testsuite/__init__.py

@@ -73,7 +73,7 @@ class BaseTestCase(unittest.TestCase):
             d1 = [filename] + [getattr(s1, a) for a in attrs]
             d2 = [filename] + [getattr(s2, a) for a in attrs]
             if not os.path.islink(path1) or utime_supports_fd:
-                # Older versions of llfuse does not support ns precision properly
+                # Older versions of llfuse do not support ns precision properly
                 if fuse and not have_fuse_mtime_ns:
                     d1.append(round(st_mtime_ns(s1), -4))
                     d2.append(round(st_mtime_ns(s2), -4))
@@ -94,30 +94,3 @@ class BaseTestCase(unittest.TestCase):
                 return
             time.sleep(.1)
         raise Exception('wait_for_mount(%s) timeout' % path)
-
-
-def get_tests(suite):
-    """Generates a sequence of tests from a test suite
-    """
-    for item in suite:
-        try:
-            # TODO: This could be "yield from..." with Python 3.3+
-            for i in get_tests(item):
-                yield i
-        except TypeError:
-            yield item
-
-
-class TestLoader(unittest.TestLoader):
-    """A customized test loader that properly detects and filters our test cases
-    """
-
-    def loadTestsFromName(self, pattern, module=None):
-        suite = self.discover('borg.testsuite', '*.py')
-        tests = unittest.TestSuite()
-        for test in get_tests(suite):
-            if pattern.lower() in test.id().lower():
-                tests.addTest(test)
-        return tests
-
-

+ 3 - 3
borg/testsuite/archive.py

@@ -1,12 +1,12 @@
 from datetime import datetime, timezone
 
 import msgpack
+from mock import Mock
 
 from ..archive import Archive, CacheChunkBuffer, RobustUnpacker
 from ..key import PlaintextKey
 from ..helpers import Manifest
 from . import BaseTestCase
-from .mock import Mock
 
 
 class MockCache:
@@ -23,7 +23,7 @@ class ArchiveTimestampTestCase(BaseTestCase):
 
     def _test_timestamp_parsing(self, isoformat, expected):
         repository = Mock()
-        key = PlaintextKey()
+        key = PlaintextKey(repository)
         manifest = Manifest(repository, key)
         a = Archive(repository, key, manifest, 'test', create=True)
         a.close()
@@ -46,7 +46,7 @@ class ChunkBufferTestCase(BaseTestCase):
     def test(self):
         data = [{b'foo': 1}, {b'bar': 2}]
         cache = MockCache()
-        key = PlaintextKey()
+        key = PlaintextKey(None)
         chunks = CacheChunkBuffer(cache, key, None)
         for d in data:
             chunks.add(d)

+ 120 - 22
borg/testsuite/archiver.py

@@ -11,6 +11,9 @@ import time
 import unittest
 from hashlib import sha256
 
+from mock import patch
+import pytest
+
 from .. import xattr
 from ..archive import Archive, ChunkBuffer, CHUNK_MAX_EXP
 from ..archiver import Archiver
@@ -20,11 +23,10 @@ from ..helpers import Manifest
 from ..remote import RemoteRepository, PathNotAllowed
 from ..repository import Repository
 from . import BaseTestCase
-from .mock import patch
 
 try:
     import llfuse
-    has_llfuse = True
+    has_llfuse = True or llfuse  # avoids "unused import"
 except ImportError:
     has_llfuse = False
 
@@ -32,6 +34,12 @@ has_lchflags = hasattr(os, 'lchflags')
 
 src_dir = os.path.join(os.getcwd(), os.path.dirname(__file__), '..')
 
+# Python <= 3.2 raises OSError instead of PermissionError (See #164)
+try:
+    PermissionError = PermissionError
+except NameError:
+    PermissionError = OSError
+
 
 class changedir:
     def __init__(self, dir):
@@ -57,7 +65,9 @@ class environment_variable:
 
     def __exit__(self, *args, **kw):
         for k, v in self.old_values.items():
-            if v is not None:
+            if v is None:
+                del os.environ[k]
+            else:
                 os.environ[k] = v
 
 
@@ -83,13 +93,13 @@ class ArchiverTestCaseBase(BaseTestCase):
         os.mkdir(self.keys_path)
         os.mkdir(self.cache_path)
         with open(self.exclude_file_path, 'wb') as fd:
-            fd.write(b'input/file2\n# A commment line, then a blank line\n\n')
+            fd.write(b'input/file2\n# A comment line, then a blank line\n\n')
         self._old_wd = os.getcwd()
         os.chdir(self.tmpdir)
 
     def tearDown(self):
-        shutil.rmtree(self.tmpdir)
         os.chdir(self._old_wd)
+        shutil.rmtree(self.tmpdir)
 
     def cmd(self, *args, **kw):
         exit_code = kw.get('exit_code', 0)
@@ -143,7 +153,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         self.create_regular_file('empty', size=0)
         # next code line raises OverflowError on 32bit cpu (raspberry pi 2):
         # 2600-01-01 > 2**64 ns
-        #os.utime('input/empty', (19880895600, 19880895600))
+        # os.utime('input/empty', (19880895600, 19880895600))
         # thus, we better test with something not that far in future:
         # 2038-01-19 (1970 + 2^31 - 1 seconds) is the 32bit "deadline":
         os.utime('input/empty', (2**31 - 1, 2**31 - 1))
@@ -151,15 +161,8 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         self.create_regular_file('flagfile', size=1024)
         # Directory
         self.create_regular_file('dir2/file2', size=1024 * 80)
-        # File owner
-        os.chown('input/file1', 100, 200)
         # File mode
         os.chmod('input/file1', 0o7755)
-        os.chmod('input/dir2', 0o555)
-        # Block device
-        os.mknod('input/bdev', 0o600 | stat.S_IFBLK,  os.makedev(10, 20))
-        # Char device
-        os.mknod('input/cdev', 0o600 | stat.S_IFCHR,  os.makedev(30, 40))
         # Hard link
         os.link(os.path.join(self.input_path, 'file1'),
                 os.path.join(self.input_path, 'hardlink'))
@@ -172,24 +175,59 @@ class ArchiverTestCase(ArchiverTestCaseBase):
             # same for newer ubuntu and centos.
             # if this is supported just on specific platform, platform should be checked first,
             # so that the test setup for all tests using it does not fail here always for others.
-            #xattr.setxattr(os.path.join(self.input_path, 'link1'), 'user.foo_symlink', b'bar_symlink', follow_symlinks=False)
+            # xattr.setxattr(os.path.join(self.input_path, 'link1'), 'user.foo_symlink', b'bar_symlink', follow_symlinks=False)
         # FIFO node
         os.mkfifo(os.path.join(self.input_path, 'fifo1'))
         if has_lchflags:
             os.lchflags(os.path.join(self.input_path, 'flagfile'), stat.UF_NODUMP)
+        try:
+            # Block device
+            os.mknod('input/bdev', 0o600 | stat.S_IFBLK, os.makedev(10, 20))
+            # Char device
+            os.mknod('input/cdev', 0o600 | stat.S_IFCHR, os.makedev(30, 40))
+            # File mode
+            os.chmod('input/dir2', 0o555)  # if we take away write perms, we need root to remove contents
+            # File owner
+            os.chown('input/file1', 100, 200)
+            have_root = True  # we have (fake)root
+        except PermissionError:
+            have_root = False
+        return have_root
 
     def test_basic_functionality(self):
-        self.create_test_files()
+        have_root = self.create_test_files()
         self.cmd('init', self.repository_location)
         self.cmd('create', self.repository_location + '::test', 'input')
-        self.cmd('create', self.repository_location + '::test.2', 'input')
+        self.cmd('create', '--stats', self.repository_location + '::test.2', 'input')
         with changedir('output'):
             self.cmd('extract', self.repository_location + '::test')
         self.assert_equal(len(self.cmd('list', self.repository_location).splitlines()), 2)
-        self.assert_equal(len(self.cmd('list', self.repository_location + '::test').splitlines()), 11)
+        expected =  [
+            'input',
+            'input/bdev',
+            'input/cdev',
+            'input/dir2',
+            'input/dir2/file2',
+            'input/empty',
+            'input/fifo1',
+            'input/file1',
+            'input/flagfile',
+            'input/hardlink',
+            'input/link1',
+        ]
+        if not have_root:
+            # we could not create these device files without (fake)root
+            expected.remove('input/bdev')
+            expected.remove('input/cdev')
+        if has_lchflags:
+            # remove the file we did not backup, so input and output become equal
+            expected.remove('input/flagfile') # this file is UF_NODUMP
+            os.remove(os.path.join('input', 'flagfile'))
+        self.assert_equal(self.cmd('list', '--short', self.repository_location + '::test').splitlines(), expected)
         self.assert_dirs_equal('input', 'output/input')
         info_output = self.cmd('info', self.repository_location + '::test')
-        self.assert_in('Number of files: 4', info_output)
+        item_count = 3 if has_lchflags else 4  # one file is UF_NODUMP
+        self.assert_in('Number of files: %d' % item_count, info_output)
         shutil.rmtree(self.cache_path)
         with environment_variable(BORG_UNKNOWN_UNENCRYPTED_REPO_ACCESS_IS_OK='1'):
             info_output2 = self.cmd('info', self.repository_location + '::test')
@@ -243,6 +281,19 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         if sparse_support and hasattr(st, 'st_blocks'):
             self.assert_true(st.st_blocks * 512 < total_len / 10)  # is output sparse?
 
+    def test_unusual_filenames(self):
+        filenames = ['normal', 'with some blanks', '(with_parens)', ]
+        for filename in filenames:
+            filename = os.path.join(self.input_path, filename)
+            with open(filename, 'wb') as fd:
+                pass
+        self.cmd('init', self.repository_location)
+        self.cmd('create', self.repository_location + '::test', 'input')
+        for filename in filenames:
+            with changedir('output'):
+                self.cmd('extract', self.repository_location + '::test', os.path.join('input', filename))
+            assert os.path.exists(os.path.join('output', 'input', filename))
+
     def test_repository_swap_detection(self):
         self.create_test_files()
         os.environ['BORG_PASSPHRASE'] = 'passphrase'
@@ -253,7 +304,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         self.cmd('init', '--encryption=none', self.repository_location)
         self._set_repository_id(self.repository_path, repository_id)
         self.assert_equal(repository_id, self._extract_repository_id(self.repository_path))
-        self.assert_raises(Cache.EncryptionMethodMismatch, lambda :self.cmd('create', self.repository_location + '::test.2', 'input'))
+        self.assert_raises(Cache.EncryptionMethodMismatch, lambda: self.cmd('create', self.repository_location + '::test.2', 'input'))
 
     def test_repository_swap_detection2(self):
         self.create_test_files()
@@ -263,7 +314,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         self.cmd('create', self.repository_location + '_encrypted::test', 'input')
         shutil.rmtree(self.repository_path + '_encrypted')
         os.rename(self.repository_path + '_unencrypted', self.repository_path + '_encrypted')
-        self.assert_raises(Cache.RepositoryAccessAborted, lambda :self.cmd('create', self.repository_location + '_encrypted::test.2', 'input'))
+        self.assert_raises(Cache.RepositoryAccessAborted, lambda: self.cmd('create', self.repository_location + '_encrypted::test.2', 'input'))
 
     def test_strip_components(self):
         self.cmd('init', self.repository_location)
@@ -389,11 +440,21 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         self.cmd('extract', '--dry-run', self.repository_location + '::test.2')
         self.cmd('delete', self.repository_location + '::test')
         self.cmd('extract', '--dry-run', self.repository_location + '::test.2')
-        self.cmd('delete', self.repository_location + '::test.2')
+        self.cmd('delete', '--stats', self.repository_location + '::test.2')
         # Make sure all data except the manifest has been deleted
         repository = Repository(self.repository_path)
         self.assert_equal(len(repository), 1)
 
+    def test_delete_repo(self):
+        self.create_regular_file('file1', size=1024 * 80)
+        self.create_regular_file('dir2/file2', size=1024 * 80)
+        self.cmd('init', self.repository_location)
+        self.cmd('create', self.repository_location + '::test', 'input')
+        self.cmd('create', self.repository_location + '::test.2', 'input')
+        self.cmd('delete', self.repository_location)
+        # Make sure the repo is gone
+        self.assertFalse(os.path.exists(self.repository_path))
+
     def test_corrupted_repository(self):
         self.cmd('init', self.repository_location)
         self.create_src_archive('test')
@@ -405,6 +466,8 @@ class ArchiverTestCase(ArchiverTestCaseBase):
             fd.write(b'XXXX')
         self.cmd('check', self.repository_location, exit_code=1)
 
+    # we currently need to be able to create a lock directory inside the repo:
+    @pytest.mark.xfail(reason="we need to be able to create the lock directory inside the repo")
     def test_readonly_repository(self):
         self.cmd('init', self.repository_location)
         self.create_src_archive('test')
@@ -415,6 +478,13 @@ class ArchiverTestCase(ArchiverTestCaseBase):
             # Restore permissions so shutil.rmtree is able to delete it
             os.system('chmod -R u+w ' + self.repository_path)
 
+    def test_umask(self):
+        self.create_regular_file('file1', size=1024 * 80)
+        self.cmd('init', self.repository_location)
+        self.cmd('create', self.repository_location + '::test', 'input')
+        mode = os.stat(self.repository_path).st_mode
+        self.assertEqual(stat.S_IMODE(mode), 0o700)
+
     def test_cmdline_compatibility(self):
         self.create_regular_file('file1', size=1024 * 80)
         self.cmd('init', self.repository_location)
@@ -439,10 +509,38 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         self.assert_not_in('test1', output)
         self.assert_in('test2', output)
 
+    def test_prune_repository_prefix(self):
+        self.cmd('init', self.repository_location)
+        self.cmd('create', self.repository_location + '::foo-2015-08-12-10:00', src_dir)
+        self.cmd('create', self.repository_location + '::foo-2015-08-12-20:00', src_dir)
+        self.cmd('create', self.repository_location + '::bar-2015-08-12-10:00', src_dir)
+        self.cmd('create', self.repository_location + '::bar-2015-08-12-20:00', src_dir)
+        output = self.cmd('prune', '-v', '--dry-run', self.repository_location, '--keep-daily=2', '--prefix=foo-')
+        self.assert_in('Keeping archive: foo-2015-08-12-20:00', output)
+        self.assert_in('Would prune:     foo-2015-08-12-10:00', output)
+        output = self.cmd('list', self.repository_location)
+        self.assert_in('foo-2015-08-12-10:00', output)
+        self.assert_in('foo-2015-08-12-20:00', output)
+        self.assert_in('bar-2015-08-12-10:00', output)
+        self.assert_in('bar-2015-08-12-20:00', output)
+        self.cmd('prune', self.repository_location, '--keep-daily=2', '--prefix=foo-')
+        output = self.cmd('list', self.repository_location)
+        self.assert_not_in('foo-2015-08-12-10:00', output)
+        self.assert_in('foo-2015-08-12-20:00', output)
+        self.assert_in('bar-2015-08-12-10:00', output)
+        self.assert_in('bar-2015-08-12-20:00', output)
+
     def test_usage(self):
         self.assert_raises(SystemExit, lambda: self.cmd())
         self.assert_raises(SystemExit, lambda: self.cmd('-h'))
 
+    def test_help(self):
+        assert 'Borg' in self.cmd('help')
+        assert 'patterns' in self.cmd('help', 'patterns')
+        assert 'Initialize' in self.cmd('help', 'init')
+        assert 'positional arguments' not in self.cmd('help', 'init', '--epilog-only')
+        assert 'This command initializes' not in self.cmd('help', 'init', '--usage-only')
+
     @unittest.skipUnless(has_llfuse, 'llfuse not installed')
     def test_fuse_mount_repository(self):
         mountpoint = os.path.join(self.tmpdir, 'mountpoint')
@@ -524,7 +622,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
 class ArchiverCheckTestCase(ArchiverTestCaseBase):
 
     def setUp(self):
-        super(ArchiverCheckTestCase, self).setUp()
+        super().setUp()
         with patch.object(ChunkBuffer, 'BUFFER_SIZE', 10):
             self.cmd('init', self.repository_location)
             self.create_src_archive('archive1')

+ 102 - 0
borg/testsuite/compress.py

@@ -0,0 +1,102 @@
+import zlib
+try:
+    import lzma
+except ImportError:
+    lzma = None
+
+import pytest
+
+from ..compress import get_compressor, Compressor, CNONE, ZLIB, LZ4
+
+
+buffer = bytes(2**16)
+data = b'fooooooooobaaaaaaaar' * 10
+params = dict(name='zlib', level=6, buffer=buffer)
+
+
+def test_get_compressor():
+    c = get_compressor(name='none')
+    assert isinstance(c, CNONE)
+    c = get_compressor(name='lz4', buffer=buffer)
+    assert isinstance(c, LZ4)
+    c = get_compressor(name='zlib')
+    assert isinstance(c, ZLIB)
+    with pytest.raises(KeyError):
+        get_compressor(name='foobar')
+
+
+def test_cnull():
+    c = get_compressor(name='none')
+    cdata = c.compress(data)
+    assert len(cdata) > len(data)
+    assert data in cdata  # it's not compressed and just in there 1:1
+    assert data == c.decompress(cdata)
+    assert data == Compressor(**params).decompress(cdata)  # autodetect
+
+
+def test_lz4():
+    c = get_compressor(name='lz4', buffer=buffer)
+    cdata = c.compress(data)
+    assert len(cdata) < len(data)
+    assert data == c.decompress(cdata)
+    assert data == Compressor(**params).decompress(cdata)  # autodetect
+
+
+def test_zlib():
+    c = get_compressor(name='zlib')
+    cdata = c.compress(data)
+    assert len(cdata) < len(data)
+    assert data == c.decompress(cdata)
+    assert data == Compressor(**params).decompress(cdata)  # autodetect
+
+
+def test_lzma():
+    if lzma is None:
+        pytest.skip("No lzma support found.")
+    c = get_compressor(name='lzma')
+    cdata = c.compress(data)
+    assert len(cdata) < len(data)
+    assert data == c.decompress(cdata)
+    assert data == Compressor(**params).decompress(cdata)  # autodetect
+
+
+def test_autodetect_invalid():
+    with pytest.raises(ValueError):
+        Compressor(**params).decompress(b'\xff\xfftotalcrap')
+    with pytest.raises(ValueError):
+        Compressor(**params).decompress(b'\x08\x00notreallyzlib')
+
+
+def test_zlib_compat():
+    # for compatibility reasons, we do not add an extra header for zlib,
+    # nor do we expect one when decompressing / autodetecting
+    for level in range(10):
+        c = get_compressor(name='zlib', level=level)
+        cdata1 = c.compress(data)
+        cdata2 = zlib.compress(data, level)
+        assert cdata1 == cdata2
+        data2 = c.decompress(cdata2)
+        assert data == data2
+        data2 = Compressor(**params).decompress(cdata2)
+        assert data == data2
+
+
+def test_compressor():
+    params_list = [
+        dict(name='none', buffer=buffer),
+        dict(name='lz4', buffer=buffer),
+        dict(name='zlib', level=0, buffer=buffer),
+        dict(name='zlib', level=6, buffer=buffer),
+        dict(name='zlib', level=9, buffer=buffer),
+    ]
+    if lzma:
+        params_list += [
+            dict(name='lzma', level=0, buffer=buffer),
+            dict(name='lzma', level=6, buffer=buffer),
+            dict(name='lzma', level=9, buffer=buffer),
+        ]
+    for params in params_list:
+        c = Compressor(**params)
+        assert data == c.decompress(c.compress(data))
+
+

+ 22 - 0
borg/testsuite/hashindex.py

@@ -6,6 +6,11 @@ from ..hashindex import NSIndex, ChunkIndex
 from . import BaseTestCase
 
 
+def H(x):
+    # make some 32byte long thing that depends on x
+    return bytes('%-0.32d' % x, 'ascii')
+
+
 class HashIndexTestCase(BaseTestCase):
 
     def _generic_test(self, cls, make_value, sha):
@@ -78,3 +83,20 @@ class HashIndexTestCase(BaseTestCase):
         second_half = list(idx.iteritems(marker=all[49][0]))
         self.assert_equal(len(second_half), 50)
         self.assert_equal(second_half, all[50:])
+
+    def test_chunkindex_merge(self):
+        idx1 = ChunkIndex()
+        idx1[H(1)] = 1, 100, 100
+        idx1[H(2)] = 2, 200, 200
+        idx1[H(3)] = 3, 300, 300
+        # no H(4) entry
+        idx2 = ChunkIndex()
+        idx2[H(1)] = 4, 100, 100
+        idx2[H(2)] = 5, 200, 200
+        # no H(3) entry
+        idx2[H(4)] = 6, 400, 400
+        idx1.merge(idx2)
+        assert idx1[H(1)] == (5, 100, 100)
+        assert idx1[H(2)] == (7, 200, 200)
+        assert idx1[H(3)] == (3, 300, 300)
+        assert idx1[H(4)] == (6, 400, 400)

+ 39 - 33
borg/testsuite/helpers.py

@@ -1,14 +1,13 @@
 import hashlib
 from time import mktime, strptime
 from datetime import datetime, timezone, timedelta
-import os
-import tempfile
-import unittest
 
+import pytest
 import msgpack
 
-from ..helpers import adjust_patterns, exclude_path, Location, format_timedelta, IncludePattern, ExcludePattern, make_path_safe, UpgradableLock, prune_within, prune_split, to_localtime, \
-    StableDict, int_to_bigint, bigint_to_int, parse_timestamp
+from ..helpers import adjust_patterns, exclude_path, Location, format_timedelta, ExcludePattern, make_path_safe, \
+    prune_within, prune_split, \
+    StableDict, int_to_bigint, bigint_to_int, parse_timestamp, CompressionSpec
 from . import BaseTestCase
 
 
@@ -96,7 +95,7 @@ class PatternTestCase(BaseTestCase):
                           ['/etc/passwd', '/etc/hosts', '/home', '/var/log/messages', '/var/log/dmesg'])
         self.assert_equal(self.evaluate(['/home/u'], []), [])
         self.assert_equal(self.evaluate(['/', '/home', '/etc/hosts'], ['/']), [])
-        self.assert_equal(self.evaluate(['/home/'], ['/home/user2']), 
+        self.assert_equal(self.evaluate(['/home/'], ['/home/user2']),
                           ['/home', '/home/user/.profile', '/home/user/.bashrc'])
         self.assert_equal(self.evaluate(['/'], ['*.profile', '/var/log']),
                           ['/etc/passwd', '/etc/hosts', '/home', '/home/user/.bashrc', '/home/user2/public_html/index.html'])
@@ -106,6 +105,30 @@ class PatternTestCase(BaseTestCase):
                           ['/etc/passwd', '/etc/hosts', '/var/log/messages', '/var/log/dmesg'])
 
 
+def test_compression_specs():
+    with pytest.raises(ValueError):
+        CompressionSpec('')
+    assert CompressionSpec('0') == dict(name='zlib', level=0)
+    assert CompressionSpec('1') == dict(name='zlib', level=1)
+    assert CompressionSpec('9') == dict(name='zlib', level=9)
+    with pytest.raises(ValueError):
+        CompressionSpec('10')
+    assert CompressionSpec('none') == dict(name='none')
+    assert CompressionSpec('lz4') == dict(name='lz4')
+    assert CompressionSpec('zlib') == dict(name='zlib', level=6)
+    assert CompressionSpec('zlib,0') == dict(name='zlib', level=0)
+    assert CompressionSpec('zlib,9') == dict(name='zlib', level=9)
+    with pytest.raises(ValueError):
+        CompressionSpec('zlib,9,invalid')
+    assert CompressionSpec('lzma') == dict(name='lzma', level=6)
+    assert CompressionSpec('lzma,0') == dict(name='lzma', level=0)
+    assert CompressionSpec('lzma,9') == dict(name='lzma', level=9)
+    with pytest.raises(ValueError):
+        CompressionSpec('lzma,9,invalid')
+    with pytest.raises(ValueError):
+        CompressionSpec('invalid')
+
+
 class MakePathSafeTestCase(BaseTestCase):
 
     def test(self):
@@ -118,23 +141,6 @@ class MakePathSafeTestCase(BaseTestCase):
         self.assert_equal(make_path_safe('/'), '.')
         self.assert_equal(make_path_safe('/'), '.')
 
-class UpgradableLockTestCase(BaseTestCase):
-
-    def test(self):
-        file = tempfile.NamedTemporaryFile()
-        lock = UpgradableLock(file.name)
-        lock.upgrade()
-        lock.upgrade()
-        lock.release()
-
-    @unittest.skipIf(os.getuid() == 0, 'Root can always open files for writing')
-    def test_read_only_lock_file(self):
-        file = tempfile.NamedTemporaryFile()
-        os.chmod(file.name, 0o444)
-        lock = UpgradableLock(file.name)
-        self.assert_raises(UpgradableLock.WriteLockFailed, lock.upgrade)
-        lock.release()
-
 
 class MockArchive:
 
@@ -161,7 +167,7 @@ class PruneSplitTestCase(BaseTestCase):
             for ta in test_archives, reversed(test_archives):
                 self.assert_equal(set(prune_split(ta, '%Y-%m', n, skip)),
                                   subset(test_archives, indices))
-            
+
         test_pairs = [(1, 1), (2, 1), (2, 28), (3, 1), (3, 2), (3, 31), (5, 1)]
         test_dates = [local_to_UTC(month, day) for month, day in test_pairs]
         test_archives = [MockArchive(date) for date in test_dates]
@@ -185,24 +191,24 @@ class PruneWithinTestCase(BaseTestCase):
             for ta in test_archives, reversed(test_archives):
                 self.assert_equal(set(prune_within(ta, within)),
                                   subset(test_archives, indices))
-            
+
         # 1 minute, 1.5 hours, 2.5 hours, 3.5 hours, 25 hours, 49 hours
         test_offsets = [60, 90*60, 150*60, 210*60, 25*60*60, 49*60*60]
         now = datetime.now(timezone.utc)
         test_dates = [now - timedelta(seconds=s) for s in test_offsets]
         test_archives = [MockArchive(date) for date in test_dates]
 
-        dotest(test_archives, '1H',  [0])
-        dotest(test_archives, '2H',  [0, 1])
-        dotest(test_archives, '3H',  [0, 1, 2])
+        dotest(test_archives, '1H', [0])
+        dotest(test_archives, '2H', [0, 1])
+        dotest(test_archives, '3H', [0, 1, 2])
         dotest(test_archives, '24H', [0, 1, 2, 3])
         dotest(test_archives, '26H', [0, 1, 2, 3, 4])
-        dotest(test_archives, '2d',  [0, 1, 2, 3, 4])
+        dotest(test_archives, '2d', [0, 1, 2, 3, 4])
         dotest(test_archives, '50H', [0, 1, 2, 3, 4, 5])
-        dotest(test_archives, '3d',  [0, 1, 2, 3, 4, 5])
-        dotest(test_archives, '1w',  [0, 1, 2, 3, 4, 5])
-        dotest(test_archives, '1m',  [0, 1, 2, 3, 4, 5])
-        dotest(test_archives, '1y',  [0, 1, 2, 3, 4, 5])
+        dotest(test_archives, '3d', [0, 1, 2, 3, 4, 5])
+        dotest(test_archives, '1w', [0, 1, 2, 3, 4, 5])
+        dotest(test_archives, '1m', [0, 1, 2, 3, 4, 5])
+        dotest(test_archives, '1y', [0, 1, 2, 3, 4, 5])
 
 
 class StableDictTestCase(BaseTestCase):

+ 121 - 0
borg/testsuite/locking.py

@@ -0,0 +1,121 @@
+import time
+
+import pytest
+
+from ..locking import get_id, TimeoutTimer, ExclusiveLock , UpgradableLock, LockRoster, ADD, REMOVE, SHARED, EXCLUSIVE
+
+
+ID1 = "foo", 1, 1
+ID2 = "bar", 2, 2
+
+def test_id():
+    hostname, pid, tid = get_id()
+    assert isinstance(hostname, str)
+    assert isinstance(pid, int)
+    assert isinstance(tid, int)
+    assert len(hostname) > 0
+    assert pid > 0
+
+
+class TestTimeoutTimer:
+    def test_timeout(self):
+        timeout = 0.5
+        t = TimeoutTimer(timeout).start()
+        assert not t.timed_out()
+        time.sleep(timeout * 1.5)
+        assert t.timed_out()
+
+    def test_notimeout_sleep(self):
+        timeout, sleep = None, 0.5
+        t = TimeoutTimer(timeout, sleep).start()
+        assert not t.timed_out_or_sleep()
+        assert time.time() >= t.start_time + 1 * sleep
+        assert not t.timed_out_or_sleep()
+        assert time.time() >= t.start_time + 2 * sleep
+
+
+@pytest.fixture()
+def lockpath(tmpdir):
+    return str(tmpdir.join('lock'))
+
+
+class TestExclusiveLock:
+    def test_checks(self, lockpath):
+        with ExclusiveLock(lockpath, timeout=1) as lock:
+            assert lock.is_locked() and lock.by_me()
+
+    def test_acquire_break_reacquire(self, lockpath):
+        lock = ExclusiveLock(lockpath, id=ID1).acquire()
+        lock.break_lock()
+        with ExclusiveLock(lockpath, id=ID2):
+            pass
+
+    def test_timeout(self, lockpath):
+        with ExclusiveLock(lockpath, id=ID1):
+            with pytest.raises(ExclusiveLock.LockTimeout):
+                ExclusiveLock(lockpath, id=ID2, timeout=0.1).acquire()
+
+
+class TestUpgradableLock:
+    def test_shared(self, lockpath):
+        lock1 = UpgradableLock(lockpath, exclusive=False, id=ID1).acquire()
+        lock2 = UpgradableLock(lockpath, exclusive=False, id=ID2).acquire()
+        assert len(lock1._roster.get(SHARED)) == 2
+        assert len(lock1._roster.get(EXCLUSIVE)) == 0
+        lock1.release()
+        lock2.release()
+
+    def test_exclusive(self, lockpath):
+        with UpgradableLock(lockpath, exclusive=True, id=ID1) as lock:
+            assert len(lock._roster.get(SHARED)) == 0
+            assert len(lock._roster.get(EXCLUSIVE)) == 1
+
+    def test_upgrade(self, lockpath):
+        with UpgradableLock(lockpath, exclusive=False) as lock:
+            lock.upgrade()
+            lock.upgrade()  # NOP
+            assert len(lock._roster.get(SHARED)) == 0
+            assert len(lock._roster.get(EXCLUSIVE)) == 1
+
+    def test_downgrade(self, lockpath):
+        with UpgradableLock(lockpath, exclusive=True) as lock:
+            lock.downgrade()
+            lock.downgrade()  # NOP
+            assert len(lock._roster.get(SHARED)) == 1
+            assert len(lock._roster.get(EXCLUSIVE)) == 0
+
+    def test_break(self, lockpath):
+        lock = UpgradableLock(lockpath, exclusive=True, id=ID1).acquire()
+        lock.break_lock()
+        assert len(lock._roster.get(SHARED)) == 0
+        assert len(lock._roster.get(EXCLUSIVE)) == 0
+        with UpgradableLock(lockpath, exclusive=True, id=ID2):
+            pass
+
+
+@pytest.fixture()
+def rosterpath(tmpdir):
+    return str(tmpdir.join('roster'))
+
+
+class TestLockRoster:
+    def test_empty(self, rosterpath):
+        roster = LockRoster(rosterpath)
+        empty = roster.load()
+        roster.save(empty)
+        assert empty == {}
+
+    def test_modify_get(self, rosterpath):
+        roster1 = LockRoster(rosterpath, id=ID1)
+        assert roster1.get(SHARED) == set()
+        roster1.modify(SHARED, ADD)
+        assert roster1.get(SHARED) == {ID1, }
+        roster2 = LockRoster(rosterpath, id=ID2)
+        roster2.modify(SHARED, ADD)
+        assert roster2.get(SHARED) == {ID1, ID2, }
+        roster1 = LockRoster(rosterpath, id=ID1)
+        roster1.modify(SHARED, REMOVE)
+        assert roster1.get(SHARED) == {ID2, }
+        roster2 = LockRoster(rosterpath, id=ID2)
+        roster2.modify(SHARED, REMOVE)
+        assert roster2.get(SHARED) == set()

+ 43 - 31
borg/testsuite/lrucache.py

@@ -1,40 +1,52 @@
 from ..lrucache import LRUCache
-from . import BaseTestCase
+import pytest
+from tempfile import TemporaryFile
 
 
-class LRUCacheTestCase(BaseTestCase):
+class TestLRUCache:
 
-    def test(self):
-        c = LRUCache(2)
-        self.assert_equal(len(c), 0)
+    def test_lrucache(self):
+        c = LRUCache(2, dispose=lambda _: None)
+        assert len(c) == 0
+        assert c.items() == set()
         for i, x in enumerate('abc'):
             c[x] = i
-        self.assert_equal(len(c), 2)
-        self.assert_equal(set(c), set(['b', 'c']))
-        self.assert_equal(set(c.items()), set([('b', 1), ('c', 2)]))
-        self.assert_equal(False, 'a' in c)
-        self.assert_equal(True, 'b' in c)
-        self.assert_raises(KeyError, lambda: c['a'])
-        self.assert_equal(c['b'], 1)
-        self.assert_equal(c['c'], 2)
+        assert len(c) == 2
+        assert c.items() == set([('b', 1), ('c', 2)])
+        assert 'a' not in c
+        assert 'b' in c
+        with pytest.raises(KeyError):
+            c['a']
+        assert c['b'] == 1
+        assert c['c'] == 2
         c['d'] = 3
-        self.assert_equal(len(c), 2)
-        self.assert_equal(c['c'], 2)
-        self.assert_equal(c['d'], 3)
-        c['c'] = 22
-        c['e'] = 4
-        self.assert_equal(len(c), 2)
-        self.assert_raises(KeyError, lambda: c['d'])
-        self.assert_equal(c['c'], 22)
-        self.assert_equal(c['e'], 4)
+        assert len(c) == 2
+        assert c['c'] == 2
+        assert c['d'] == 3
         del c['c']
-        self.assert_equal(len(c), 1)
-        self.assert_raises(KeyError, lambda: c['c'])
-        self.assert_equal(c['e'], 4)
+        assert len(c) == 1
+        with pytest.raises(KeyError):
+            c['c']
+        assert c['d'] == 3
+        c.clear()
+        assert c.items() == set()
 
-    def test_pop(self):
-        c = LRUCache(2)
-        c[1] = 1
-        c[2] = 2
-        c.pop(1)
-        c[3] = 3
+    def test_dispose(self):
+        c = LRUCache(2, dispose=lambda f: f.close())
+        f1 = TemporaryFile()
+        f2 = TemporaryFile()
+        f3 = TemporaryFile()
+        c[1] = f1
+        c[2] = f2
+        assert not f2.closed
+        c[3] = f3
+        assert 1 not in c
+        assert f1.closed
+        assert 2 in c
+        assert not f2.closed
+        del c[2]
+        assert 2 not in c
+        assert f2.closed
+        c.clear()
+        assert c.items() == set()
+        assert f3.closed

+ 0 - 5
borg/testsuite/mock.py

@@ -1,5 +0,0 @@
-try:
-    # Only available in python 3.3+
-    from unittest.mock import *
-except ImportError:
-    from mock import *

+ 0 - 1
borg/testsuite/platform.py

@@ -102,4 +102,3 @@ class PlatformDarwinTestCase(BaseTestCase):
         self.set_acl(file2.name, b'!#acl 1\ngroup:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000000:staff:0:allow:read\nuser:FFFFEEEE-DDDD-CCCC-BBBB-AAAA00000000:root:0:allow:read\n', numeric_owner=True)
         self.assert_in(b'group:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000000:wheel:0:allow:read', self.get_acl(file2.name)[b'acl_extended'])
         self.assert_in(b'group:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000000::0:allow:read', self.get_acl(file2.name, numeric_owner=True)[b'acl_extended'])
-

+ 12 - 6
borg/testsuite/repository.py

@@ -2,12 +2,14 @@ import os
 import shutil
 import tempfile
 
+from mock import patch
+
 from ..hashindex import NSIndex
-from ..helpers import Location, IntegrityError, UpgradableLock
+from ..helpers import Location, IntegrityError
+from ..locking import UpgradableLock
 from ..remote import RemoteRepository, InvalidRPCMethod
 from ..repository import Repository
 from . import BaseTestCase
-from .mock import patch
 
 
 class RepositoryTestCaseBase(BaseTestCase):
@@ -156,10 +158,10 @@ class RepositoryCommitTestCase(RepositoryTestCaseBase):
         for name in os.listdir(self.repository.path):
             if name.startswith('index.'):
                 os.unlink(os.path.join(self.repository.path, name))
-        with patch.object(UpgradableLock, 'upgrade', side_effect=UpgradableLock.WriteLockFailed) as upgrade:
+        with patch.object(UpgradableLock, 'upgrade', side_effect=UpgradableLock.ExclusiveLockFailed) as upgrade:
             self.reopen()
-            self.assert_raises(UpgradableLock.WriteLockFailed, lambda: len(self.repository))
-            upgrade.assert_called_once()
+            self.assert_raises(UpgradableLock.ExclusiveLockFailed, lambda: len(self.repository))
+            upgrade.assert_called_once_with()
 
     def test_crash_before_write_index(self):
         self.add_keys()
@@ -309,7 +311,7 @@ class RepositoryCheckTestCase(RepositoryTestCaseBase):
         # Simulate a crash before compact
         with patch.object(Repository, 'compact_segments') as compact:
             self.repository.commit()
-            compact.assert_called_once()
+            compact.assert_called_once_with()
         self.reopen()
         self.check(repair=True)
         self.assert_equal(self.repository.get(bytes(32)), b'data2')
@@ -328,3 +330,7 @@ class RemoteRepositoryCheckTestCase(RepositoryCheckTestCase):
 
     def open(self, create=False):
         return RemoteRepository(Location('__testsuite__:' + os.path.join(self.tmppath, 'repository')), create=create)
+
+    def test_crash_before_compact(self):
+        # skip this test, we can't mock-patch a Repository class in another process!
+        pass

+ 0 - 11
borg/testsuite/run.py

@@ -1,11 +0,0 @@
-import unittest
-
-from . import TestLoader
-
-
-def main():
-    unittest.main(testLoader=TestLoader(), defaultTest='')
-
-
-if __name__ == '__main__':
-    main()

+ 1 - 1
borg/xattr.py

@@ -11,7 +11,7 @@ from ctypes.util import find_library
 def is_enabled(path=None):
     """Determine if xattr is enabled on the filesystem
     """
-    with tempfile.NamedTemporaryFile(dir=path) as fd:
+    with tempfile.NamedTemporaryFile(dir=path, prefix='borg-tmp') as fd:
         try:
             setxattr(fd.fileno(), 'user.name', b'value')
         except OSError:

+ 10 - 1
docs/_themes/local/sidebarusefullinks.html

@@ -3,9 +3,18 @@
 
 <h3>Useful Links</h3>
 <ul>
-  <li><a href="https://borgbackup.github.io/">Main Web Site</a></li>
+  <li><a href="https://borgbackup.github.io/borgbackup/">Main Web Site</a></li>
   <li><a href="https://pypi.python.org/pypi/borgbackup">PyPI packages</a></li>
+  <li><a href="https://github.com/borgbackup/borg/issues/147">Binary Packages</a></li>
+  <li><a href="https://github.com/borgbackup/borg/blob/master/CHANGES.rst">Current ChangeLog</a></li>
   <li><a href="https://github.com/borgbackup/borg">GitHub</a></li>
   <li><a href="https://github.com/borgbackup/borg/issues">Issue Tracker</a></li>
+  <li><a href="https://www.bountysource.com/teams/borgbackup">Bounties &amp; Fundraisers</a></li>
   <li><a href="http://librelist.com/browser/borgbackup/">Mailing List</a></li>
 </ul>
+
+<h3>Related Projects</h3>
+<ul>
+  <li><a href="https://borgbackup.github.io/borgweb/">BorgWeb</a></li>
+</ul>
+

+ 4 - 0
docs/changes.rst

@@ -0,0 +1,4 @@
+.. include:: global.rst.inc
+.. _changelog:
+
+.. include:: ../CHANGES.rst

+ 6 - 6
docs/conf.py

@@ -11,13 +11,13 @@
 # All configuration values have a default; values that are commented out
 # serve to show the default.
 
-import sys, os
-from borg import __version__ as sw_version
-
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
-#sys.path.insert(0, os.path.abspath('.'))
+import sys, os
+sys.path.insert(0, os.path.abspath('..'))
+
+from borg import __version__ as sw_version
 
 # -- General configuration -----------------------------------------------------
 
@@ -42,7 +42,7 @@ master_doc = 'index'
 
 # General information about the project.
 project = 'Borg - Deduplicating Archiver'
-copyright = '2010-2014, Jonas Borgström'
+copyright = '2010-2014, Jonas Borgström, 2015 The Borg Collective (see AUTHORS file)'
 
 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
@@ -134,7 +134,7 @@ html_static_path = []
 # Custom sidebar templates, maps document names to template names.
 html_sidebars = {
     'index': ['sidebarlogo.html', 'sidebarusefullinks.html', 'searchbox.html'],
-    '**': ['sidebarlogo.html', 'localtoc.html', 'relations.html', 'sidebarusefullinks.html', 'searchbox.html']
+    '**': ['sidebarlogo.html', 'relations.html', 'searchbox.html', 'localtoc.html', 'sidebarusefullinks.html']
 }
 # Additional templates that should be rendered to pages, maps page names to
 # template names.

+ 67 - 0
docs/development.rst

@@ -0,0 +1,67 @@
+.. include:: global.rst.inc
+.. _development:
+
+Development
+===========
+
+This chapter will get you started with |project_name|' development.
+
+|project_name| is written in Python (with a little bit of Cython and C for
+the performance critical parts).
+
+
+Building a development environment
+----------------------------------
+
+First, just install borg into a virtual env as described before.
+
+To install some additional packages needed for running the tests, activate your
+virtual env and run::
+
+  pip install -r requirements.d/development.txt
+
+
+Running the tests
+-----------------
+
+The tests are in the borg/testsuite package.
+
+To run them, you need to have fakeroot, tox and pytest installed.
+
+To run the test suite use the following command::
+
+  fakeroot -u tox  # run all tests
+
+Some more advanced examples::
+
+  # verify a changed tox.ini (run this after any change to tox.ini):
+  fakeroot -u tox --recreate
+
+  fakeroot -u tox -e py32  # run all tests, but only on python 3.2
+
+  fakeroot -u tox borg.testsuite.locking  # only run 1 test module
+
+  fakeroot -u tox borg.testsuite.locking -- -k '"not Timer"'  # exclude some tests
+
+  fakeroot -u tox borg.testsuite -- -v  # verbose py.test
+
+Important notes:
+
+- Without fakeroot -u some tests will fail.
+- When using -- to give options to py.test, you MUST also give borg.testsuite[.module].
+
+Building the docs with Sphinx
+-----------------------------
+
+The documentation (in reStructuredText format, .rst) is in docs/.
+
+To build the html version of it, you need to have sphinx installed::
+
+  pip3 install sphinx
+
+Now run::
+
+  cd docs/
+  make html
+
+Then point a web browser at docs/_build/html/index.html.

+ 17 - 2
docs/faq.rst

@@ -70,8 +70,9 @@ When backing up to remote encrypted repos, is encryption done locally?
 
 When backing up to remote servers, do I have to trust the remote server?
     Yes and No.
-    No, as far as data confidentiality is concerned - all your files/dirs data
-    and metadata are stored in their encrypted form into the repository.
+    No, as far as data confidentiality is concerned - if you use encryption,
+    all your files/dirs data and metadata are stored in their encrypted form
+    into the repository.
     Yes, as an attacker with access to the remote server could delete (or
     otherwise make unavailable) all your backups.
 
@@ -90,6 +91,20 @@ If I want to run |project_name| on a ARM CPU older than ARM v6?
     
         echo "2" > /proc/cpu/alignment
 
+Can |project_name| add redundancy to the backup data to deal with hardware malfunction?
+    No, it can't. While that at first sounds like a good idea to defend against some
+    defect HDD sectors or SSD flash blocks, dealing with this in a reliable way needs a lot
+    of low-level storage layout information and control which we do not have (and also can't
+    get, even if we wanted).
+
+    So, if you need that, consider RAID1 or a filesystems that offers redundant storage.
+
+Can |project_name| verify data integrity of a backup archive?
+    Yes, if you want to detect accidental data damage (like bit rot), use the ``check``
+    operation. It will notice corruption using CRCs and hashes.
+    If you want to be able to detect malicious tampering also, use a encrypted repo.
+    It will then be able to check using CRCs and HMACs.
+
 Why was Borg forked from Attic?
     Borg was created in May 2015 in response to the difficulty of
     getting new code or larger changes incorporated into Attic and

+ 0 - 62
docs/foreword.rst

@@ -1,62 +0,0 @@
-.. include:: global.rst.inc
-.. _foreword:
-
-Foreword
-========
-
-|project_name| is a secure backup program for Linux, FreeBSD and Mac OS X. 
-|project_name| is designed for efficient data storage where only new or
-modified data is stored.
-
-Features
---------
-
-Space efficient storage
-    Variable block size `deduplication`_ is used to reduce the number of bytes 
-    stored by detecting redundant data. Each file is split into a number of
-    variable length chunks and only chunks that have never been seen before
-    are compressed and added to the repository.
-
-Optional data encryption
-    All data can be protected using 256-bit AES_ encryption and data integrity
-    and authenticity is verified using `HMAC-SHA256`_.
-
-Off-site backups
-    |project_name| can store data on any remote host accessible over SSH as
-    long as |project_name| is installed.
-
-Backups mountable as filesystems
-    Backup archives are :ref:`mountable <borg_mount>` as
-    `userspace filesystems`_ for easy backup verification and restores.
-
-
-Glossary
---------
-
-.. _deduplication_def:
-
-Deduplication
-    Deduplication is a technique for improving storage utilization by
-    eliminating redundant data. 
-
-.. _archive_def:
-
-Archive
-    An archive is a collection of files along with metadata that include file
-    permissions, directory structure and various file attributes.
-    Since each archive in a repository must have a unique name a good naming
-    convention is ``hostname-YYYY-MM-DD``.
-
-.. _repository_def:
-
-Repository
-    A repository is a filesystem directory storing data from zero or more
-    archives. The data in a repository is both deduplicated and 
-    optionally encrypted making it both efficient and safe. Repositories are
-    created using :ref:`borg_init` and the contents can be listed using
-    :ref:`borg_list`.
-
-Key file
-    When a repository is initialized a key file containing a password
-    protected encryption key is created. It is vital to keep this file safe
-    since the repository data is totally inaccessible without it.

+ 1 - 0
docs/global.rst.inc

@@ -13,6 +13,7 @@
 .. _PBKDF2: https://en.wikipedia.org/wiki/PBKDF2
 .. _ACL: https://en.wikipedia.org/wiki/Access_control_list
 .. _libacl: http://savannah.nongnu.org/projects/acl/
+.. _liblz4: https://github.com/Cyan4973/lz4
 .. _OpenSSL: https://www.openssl.org/
 .. _Python: http://www.python.org/
 .. _Buzhash: https://en.wikipedia.org/wiki/Buzhash

+ 6 - 69
docs/index.rst

@@ -1,81 +1,18 @@
 .. include:: global.rst.inc
 
-Welcome to Borg
-================
-|project_name| is a deduplicating and compressing backup program.
-Optionally, it also supports authenticated encryption.
 
-The main goal of |project_name| is to provide an efficient and secure way
-to backup data. The data deduplication technique used makes |project_name|
-suitable for daily backups since only the changes are stored. The authenticated
-encryption makes it suitable for backups to not fully trusted targets.
-
-|project_name| is written in Python (with a little bit of Cython and C for
-the performance critical parts).
-
-
-Easy to use
------------
-Initialize a new backup :ref:`repository <repository_def>` and create your
-first backup :ref:`archive <archive_def>` in two lines::
-
-    $ borg init /mnt/backup
-    $ borg create /mnt/backup::Monday ~/Documents
-    $ borg create --stats /mnt/backup::Tuesday ~/Documents
-    Archive name: Tuesday
-    Archive fingerprint: 387a5e3f9b0e792e91ce87134b0f4bfe17677d9248cb5337f3fbf3a8e157942a
-    Start time: Tue Mar 25 12:00:10 2014
-    End time:   Tue Mar 25 12:00:10 2014
-    Duration: 0.08 seconds
-    Number of files: 358
-                           Original size      Compressed size    Deduplicated size
-    This archive:               57.16 MB             46.78 MB            151.67 kB
-    All archives:              114.02 MB             93.46 MB             44.81 MB
-
-See the :ref:`quickstart` chapter for a more detailed example.
-
-Easy installation
------------------
-You can use pip to install |project_name| quickly and easily::
-
-    $ pip3 install borgbackup
-
-Need more help with installing? See :ref:`installation`.
-
-User's Guide
-============
+Borg Documentation
+==================
 
 .. toctree::
    :maxdepth: 2
 
-   foreword
+   intro
    installation
    quickstart
    usage
    faq
+   support
+   changes
    internals
-
-Getting help
-============
-
-If you've found a bug or have a concrete feature request, please create a new
-ticket on the project's `issue tracker`_ (after checking whether someone else
-already has reported the same thing).
-
-For more general questions or discussions, IRC or mailing list are preferred.
-
-IRC
----
-Join us on channel ##borgbackup on chat.freenode.net. As usual on IRC, just
-ask or tell directly and then patiently wait for replies. Stay connected.
-
-Mailing list
-------------
-
-There is a mailing list for Borg on librelist_ that you can use for feature
-requests and general discussions about Borg. A mailing list archive is
-available `here <http://librelist.com/browser/borgbackup/>`_.
-
-To subscribe to the list, send an email to borgbackup@librelist.com and reply
-to the confirmation mail. Likewise, to unsubscribe, send an email to 
-borgbackup-unsubscribe@librelist.com and reply to the confirmation mail.
+   development

+ 64 - 8
docs/installation.rst

@@ -9,6 +9,7 @@ Installation
 * Python_ >= 3.2
 * OpenSSL_ >= 1.0.0
 * libacl_
+* liblz4_
 * some python dependencies, see install_requires in setup.py
 
 General notes
@@ -19,12 +20,18 @@ usually available as an optional install.
 Virtualenv_ can be used to build and install |project_name| without affecting
 the system Python or requiring root access.
 
+Important:
+if you install into a virtual environment, you need to activate
+the virtual env first (``source borg-env/bin/activate``).
+Alternatively, directly run ``borg-env/bin/borg`` (or symlink that into some
+directory that is in your PATH so you can just run ``borg``).
+
 The llfuse_ python package is also required if you wish to mount an
 archive as a FUSE filesystem. Only FUSE >= 2.8.0 can support llfuse.
 
 You only need Cython to compile the .pyx files to the respective .c files
 when using |project_name| code from git. For |project_name| releases, the .c
-files will be bundled.
+files will be bundled, so you won't need Cython to install a release.
 
 Platform notes
 --------------
@@ -32,7 +39,7 @@ FreeBSD: You may need to get a recent enough OpenSSL version from FreeBSD ports.
 
 Mac OS X: You may need to get a recent enough OpenSSL version from homebrew_.
 
-Mac OS X: A recent enough FUSE implementation might be unavailable.
+Mac OS X: You need OS X FUSE >= 3.0.
 
 
 Debian / Ubuntu installation (from git)
@@ -53,11 +60,17 @@ Some of the steps detailled below might be useful also for non-git installs.
     # ACL support Headers + Library
     apt-get install libacl1-dev libacl1
 
+    # lz4 super fast compression support Headers + Library
+    apt-get install liblz4-dev liblz4-1
+
     # if you do not have gcc / make / etc. yet
     apt-get install build-essential
 
-    # optional: lowlevel FUSE py binding - to mount backup archives
-    apt-get install python3-llfuse fuse
+    # optional: FUSE support - to mount backup archives
+    # in case you get complaints about permission denied on /etc/fuse.conf:
+    # on ubuntu this means your user is not in the "fuse" group. just add
+    # yourself there, log out and log in again.
+    apt-get install libfuse-dev fuse
 
     # optional: for unit testing
     apt-get install fakeroot
@@ -73,6 +86,7 @@ Some of the steps detailled below might be useful also for non-git installs.
     pip install cython  # compile .pyx -> .c
     pip install tox pytest  # optional, for running unit tests
     pip install sphinx  # optional, to build the docs
+    pip install llfuse  # optional, for FUSE support
     cd borg
     pip install -e .  # in-place editable mode
 
@@ -96,13 +110,16 @@ Some of the steps detailled below might be useful also for non-git installs.
 
     # ACL support Headers + Library
     sudo dnf install libacl-devel libacl
-    
-    # optional: lowlevel FUSE py binding - to mount backup archives
-    sudo dnf install python3-llfuse fuse
+
+    # lz4 super fast compression support Headers + Library
+    sudo dnf install lz4
+
+    # optional: FUSE support - to mount backup archives
+    sudo dnf install fuse-devel fuse
     
     # optional: for unit testing
     sudo dnf install fakeroot
-    
+
     # get |project_name| from github, install it
     git clone |git_url|
 
@@ -114,8 +131,47 @@ Some of the steps detailled below might be useful also for non-git installs.
     pip install cython  # compile .pyx -> .c
     pip install tox pytest  # optional, for running unit tests
     pip install sphinx  # optional, to build the docs
+    pip install llfuse  # optional, for FUSE support
     cd borg
     pip install -e .  # in-place editable mode
 
     # optional: run all the tests, on all supported Python versions
     fakeroot -u tox
+
+
+Cygwin (from git)
+-----------------
+Please note that running under cygwin is rather experimental, stuff has been
+tested with CygWin (x86-64) v2.1.0.
+
+You'll need at least (use the cygwin installer to fetch/install these):
+
+::
+
+    python3
+    python3-setuptools
+    python3-cython
+    binutils
+    gcc-core
+    git
+    libopenssl
+    liblz4_1 liblz4-devel  # from cygwinports.org
+    make
+    openssh
+    openssl-devel
+
+You can then install ``pip`` and ``virtualenv``:
+
+::
+
+    easy_install-3.4 pip
+    pip install virtualenv
+
+And now continue as for Linux (see above).
+
+In case that creation of the virtual env fails, try deleting this file:
+
+::
+
+    /usr/lib/python3.4/__pycache__/platform.cpython-34.pyc
+

+ 75 - 17
docs/internals.rst

@@ -26,7 +26,7 @@ README
   simple text file telling that this is a |project_name| repository
 
 config
-  repository configuration and lock file
+  repository configuration
 
 data/
   directory where the actual data is stored
@@ -37,6 +37,9 @@ hints.%d
 index.%d
   repository index
 
+lock.roster and lock.exclusive/*
+  used by the locking system to manage shared and exclusive locks
+
 
 Config file
 -----------
@@ -55,9 +58,6 @@ identifier for repositories. It will not change if you move the
 repository around so you can make a local transfer then decide to move
 the repository to another (even remote) location at a later time.
 
-|project_name| will do a POSIX read lock on the config file when operating
-on the repository.
-
 
 Keys
 ----
@@ -168,13 +168,27 @@ A chunk is stored as an object as well, of course.
 Chunks
 ------
 
-|project_name| uses a rolling hash computed by the Buzhash_ algorithm, with a
-window size of 4095 bytes (`0xFFF`), with a minimum chunk size of 1024 bytes.
-It triggers (chunks) when the last 16 bits of the hash are zero, producing
-chunks of 64kiB on average.
+The |project_name| chunker uses a rolling hash computed by the Buzhash_ algorithm.
+It triggers (chunks) when the last HASH_MASK_BITS bits of the hash are zero,
+producing chunks of 2^HASH_MASK_BITS Bytes on average.
+
+create --chunker-params CHUNK_MIN_EXP,CHUNK_MAX_EXP,HASH_MASK_BITS,HASH_WINDOW_SIZE
+can be used to tune the chunker parameters, the default is:
+
+- CHUNK_MIN_EXP = 10 (minimum chunk size = 2^10 B = 1 kiB)
+- CHUNK_MAX_EXP = 23 (maximum chunk size = 2^23 B = 8 MiB)
+- HASH_MASK_BITS = 16 (statistical medium chunk size ~= 2^16 B = 64 kiB)
+- HASH_WINDOW_SIZE = 4095 [B] (`0xFFF`)
+
+The default parameters are OK for relatively small backup data volumes and
+repository sizes and a lot of available memory (RAM) and disk space for the
+chunk index. If that does not apply, you are advised to tune these parameters
+to keep the chunk count lower than with the defaults.
 
 The buzhash table is altered by XORing it with a seed randomly generated once
-for the archive, and stored encrypted in the keyfile.
+for the archive, and stored encrypted in the keyfile. This is to prevent chunk
+size based fingerprinting attacks on your encrypted repo contents (to guess
+what files you have based on a specific set of chunk sizes).
 
 
 Indexes / Caches
@@ -243,7 +257,7 @@ Indexes / Caches memory usage
 
 Here is the estimated memory usage of |project_name|:
 
-  chunk_count ~= total_file_size / 65536
+  chunk_count ~= total_file_size / 2 ^ HASH_MASK_BITS
 
   repo_index_usage = chunk_count * 40
 
@@ -252,20 +266,32 @@ Here is the estimated memory usage of |project_name|:
   files_cache_usage = total_file_count * 240 + chunk_count * 80
 
   mem_usage ~= repo_index_usage + chunks_cache_usage + files_cache_usage
-             = total_file_count * 240 + total_file_size / 400
+             = chunk_count * 164 + total_file_count * 240
 
 All units are Bytes.
 
-It is assuming every chunk is referenced exactly once and that typical chunk size is 64kiB.
+It is assuming every chunk is referenced exactly once (if you have a lot of
+duplicate chunks, you will have less chunks than estimated above).
+
+It is also assuming that typical chunk size is 2^HASH_MASK_BITS (if you have
+a lot of files smaller than this statistical medium chunk size, you will have
+more chunks than estimated above, because 1 file is at least 1 chunk).
 
 If a remote repository is used the repo index will be allocated on the remote side.
 
-E.g. backing up a total count of 1Mi files with a total size of 1TiB:
+E.g. backing up a total count of 1Mi files with a total size of 1TiB.
+
+a) with create --chunker-params 10,23,16,4095 (default):
+
+  mem_usage  =  2.8GiB
+
+b) with create --chunker-params 10,23,20,4095 (custom):
 
-  mem_usage  =  1 * 2**20 * 240  +  1 * 2**40 / 400  =  2.8GiB
+  mem_usage  =  0.4GiB
 
-Note: there is a commandline option to switch off the files cache. You'll save
-some memory, but it will need to read / chunk all the files then.
+Note: there is also the --no-files-cache option to switch off the files cache.
+You'll save some memory, but it will need to read / chunk all the files then as
+it can not skip unmodified files then.
 
 
 Encryption
@@ -291,6 +317,7 @@ Encryption keys are either derived from a passphrase or kept in a key file.
 The passphrase is passed through the ``BORG_PASSPHRASE`` environment variable
 or prompted for interactive usage.
 
+
 Key files
 ---------
 
@@ -355,4 +382,35 @@ representation of the repository id.
 Compression
 -----------
 
-Currently, zlib level 6 is used as compression.
+|project_name| supports the following compression methods:
+
+- none (no compression, pass through data 1:1)
+- lz4 (low compression, but super fast)
+- zlib (level 0-9, level 0 is no compression [but still adding zlib overhead],
+  level 1 is low, level 9 is high compression)
+- lzma (level 0-9, level 0 is low, level 9 is high compression).
+
+Speed:  none > lz4 > zlib > lzma
+Compression: lzma > zlib > lz4 > none
+
+Be careful, higher zlib and especially lzma compression levels might take a
+lot of resources (CPU and memory).
+
+The overall speed of course also depends on the speed of your target storage.
+If that is slow, using a higher compression level might yield better overall
+performance. You need to experiment a bit. Maybe just watch your CPU load, if
+that is relatively low, increase compression until 1 core is 70-100% loaded.
+
+Even if your target storage is rather fast, you might see interesting effects:
+while doing no compression at all (none) is a operation that takes no time, it
+likely will need to store more data to the storage compared to using lz4.
+The time needed to transfer and store the additional data might be much more
+than if you had used lz4 (which is super fast, but still might compress your
+data about 2:1). This is assuming your data is compressible (if you backup
+already compressed data, trying to compress them at backup time is usually
+pointless).
+
+Compression is applied after deduplication, thus using different compression
+methods in one repo does not influence deduplication.
+
+See ``borg create --help`` about how to specify the compression level and its default.

+ 7 - 0
docs/intro.rst

@@ -0,0 +1,7 @@
+.. include:: global.rst.inc
+.. _foreword:
+
+Introduction
+============
+
+.. include:: ../README.rst

+ 25 - 3
docs/quickstart.rst

@@ -89,6 +89,31 @@ certain number of old archives::
     # and 6 monthly archives.
     borg prune -v $REPOSITORY --keep-daily=7 --keep-weekly=4 --keep-monthly=6
 
+.. backup_compression:
+
+Backup compression
+------------------
+
+Default is no compression, but we support different methods with high speed
+or high compression:
+
+If you have a quick repo storage and you want a little compression:
+
+    $ borg create --compression lz4 /mnt/backup::repo ~
+
+If you have a medium fast repo storage and you want a bit more compression (N=0..9,
+0 means no compression, 9 means high compression):
+
+    $ borg create --compression zlib,N /mnt/backup::repo ~
+
+If you have a very slow repo storage and you want high compression (N=0..9, 0 means
+low compression, 9 means high compression):
+
+    $ borg create --compression lzma,N /mnt/backup::repo ~
+
+You'll need to experiment a bit to find the best compression for your use case.
+Keep an eye on CPU load and throughput.
+
 .. _encrypted_repos:
 
 Repository encryption
@@ -159,6 +184,3 @@ mounting the remote filesystem, for example, using sshfs::
   $ borg init /mnt/backup
   $ fusermount -u /mnt
 
-However, be aware that sshfs doesn't fully implement POSIX locks, so
-you must be sure to not have two processes trying to access the same
-repository at the same time.

+ 37 - 0
docs/support.rst

@@ -0,0 +1,37 @@
+.. include:: global.rst.inc
+.. _support:
+
+Support
+=======
+
+Please first read the docs and the FAQ section in the docs, a lot of stuff is
+documented / explained there.
+
+Issue Tracker
+-------------
+
+If you've found a bug or have a concrete feature request, please create a new
+ticket on the project's `issue tracker`_ (after checking whether someone else
+already has reported the same thing).
+
+For more general questions or discussions, IRC or mailing list are preferred.
+
+IRC
+---
+Join us on channel #borgbackup on chat.freenode.net.
+
+As usual on IRC, just ask or tell directly and then patiently wait for replies.
+Stay connected.
+
+Mailing list
+------------
+
+There is a mailing list for Borg on librelist_ that you can use for feature
+requests and general discussions about Borg. A mailing list archive is
+available `here <http://librelist.com/browser/borgbackup/>`_.
+
+To subscribe to the list, send an email to borgbackup@librelist.com and reply
+to the confirmation mail.
+
+To unsubscribe, send an email to borgbackup-unsubscribe@librelist.com and reply
+to the confirmation mail.

+ 142 - 2
docs/usage.rst

@@ -15,6 +15,93 @@ Like most UNIX commands |project_name| is quiet by default but the ``-v`` or
 ``--verbose`` option can be used to get the program to output more status
 messages as it is processing.
 
+Return codes
+------------
+
+|project_name| can exit with the following return codes (rc):
+
+::
+
+    0      no error, normal termination
+    1      some error occurred (this can be a complete or a partial failure)
+    128+N  killed by signal N (e.g. 137 == kill -9)
+
+
+Note: we are aware that more distinct return codes might be useful, but it is
+not clear yet which return codes should be used for which precise conditions.
+
+See issue #61 for a discussion about that. Depending on the outcome of the
+discussion there, return codes may change in future (the only thing rather sure
+is that 0 will always mean some sort of success and "not 0" will always mean
+some sort of warning / error / failure - but the definition of success might
+change).
+
+Environment Variables
+---------------------
+
+|project_name| uses some environment variables for automation:
+
+::
+
+    Specifying a passphrase:
+        BORG_PASSPHRASE : When set, use the value to answer the passphrase question for encrypted repositories.
+
+    Some "yes" sayers (if set, they automatically confirm that you really want to do X even if there is that warning):
+        BORG_UNKNOWN_UNENCRYPTED_REPO_ACCESS_IS_OK : For "Warning: Attempting to access a previously unknown unencrypted repository"
+        BORG_RELOCATED_REPO_ACCESS_IS_OK : For "Warning: The repository at location ... was previously located at ..."
+        BORG_CHECK_I_KNOW_WHAT_I_AM_DOING : For "Warning: 'check --repair' is an experimental feature that might result in data loss."
+
+    Directories:
+        BORG_KEYS_DIR : Default to '~/.borg/keys'. This directory contains keys for encrypted repositories.
+        BORG_CACHE_DIR : Default to '~/.cache/borg'. This directory contains the local cache and might need a lot
+                         of space for dealing with big repositories).
+
+    Building:
+        BORG_OPENSSL_PREFIX : Adds given OpenSSL header file directory to the default locations (setup.py).
+
+    General:
+        TMPDIR : where temporary files are stored (might need a lot of temporary space for some operations)
+
+
+Please note:
+
+- be very careful when using the "yes" sayers, the warnings with prompt exist for your / your data's security/safety
+- also be very careful when putting your passphrase into a script, make sure it has appropriate file permissions
+  (e.g. mode 600, root:root).
+
+
+Resource Usage
+--------------
+
+|project_name| might use a lot of resources depending on the size of the data set it is dealing with.
+
+CPU: it won't go beyond 100% of 1 core as the code is currently single-threaded.
+     Especially higher zlib and lzma compression levels use significant amounts of CPU cycles.
+
+Memory (RAM): the chunks index and the files index are read into memory for performance reasons.
+              compression, esp. lzma compression with high levels might need substantial amounts
+              of memory.
+
+Temporary files: reading data and metadata from a FUSE mounted repository will consume about the same space as the
+                 deduplicated chunks used to represent them in the repository.
+
+Cache files: chunks index and files index (plus a compressed collection of single-archive chunk indexes).
+
+Chunks index: proportional to the amount of data chunks in your repo. lots of small chunks in your repo implies a big
+              chunks index. you may need to tweak the chunker params (see create options) if you have a lot of data and
+              you want to keep the chunks index at some reasonable size.
+
+Files index: proportional to the amount of files in your last backup. can be switched off (see create options), but
+             next backup will be much slower if you do.
+
+Network: if your repository is remote, all deduplicated (and optionally compressed/encrypted) data of course has to go
+         over the connection (ssh: repo url). if you use a locally mounted network filesystem, additionally some copy
+         operations used for transaction support also go over the connection. if you backup multiple sources to one
+         target repository, additional traffic happens for cache resynchronization.
+
+In case you are interested in more details, please read the internals documentation.
+
+
 .. include:: usage/init.rst.inc
 
 Examples
@@ -27,8 +114,43 @@ Examples
     # Remote repository (accesses a remote borg via ssh)
     $ borg init user@hostname:backup
 
-    # Encrypted remote repository
-    $ borg init --encryption=passphrase user@hostname:backup
+    # Encrypted remote repository, store the key in the repo
+    $ borg init --encryption=repokey user@hostname:backup
+
+    # Encrypted remote repository, store the key your home dir
+    $ borg init --encryption=keyfile user@hostname:backup
+
+Important notes about encryption:
+
+Use encryption! Repository encryption protects you e.g. against the case that
+an attacker has access to your backup repository.
+
+But be careful with the key / the passphrase:
+
+``--encryption=passphrase`` is DEPRECATED and will be removed in next major release.
+This mode has very fundamental, unfixable problems (like you can never change
+your passphrase or the pbkdf2 iteration count for an existing repository, because
+the encryption / decryption key is directly derived from the passphrase).
+
+If you want "passphrase-only" security, just use the ``repokey`` mode. The key will
+be stored inside the repository (in its "config" file). In above mentioned
+attack scenario, the attacker will have the key (but not the passphrase).
+
+If you want "passphrase and having-the-key" security, use the ``keyfile`` mode.
+The key will be stored in your home directory (in ``.borg/keys``). In the attack
+scenario, the attacker who has just access to your repo won't have the key (and
+also not the passphrase).
+
+Make a backup copy of the key file (``keyfile`` mode) or repo config file
+(``repokey`` mode) and keep it at a safe place, so you still have the key in
+case it gets corrupted or lost.
+The backup that is encrypted with that key won't help you with that, of course.
+
+Make sure you use a good passphrase. Not too short, not too simple. The real
+encryption / decryption key is encrypted with / locked by your passphrase.
+If an attacker gets your key, he can't unlock and use it without knowing the
+passphrase. In ``repokey`` and ``keyfile`` modes, you can change your passphrase
+for existing repos.
 
 
 .. include:: usage/create.rst.inc
@@ -53,6 +175,21 @@ Examples
     # Backup huge files with little chunk management overhead
     $ borg create --chunker-params 19,23,21,4095 /mnt/backup::VMs /srv/VMs
 
+    # Backup a raw device (must not be active/in use/mounted at that time)
+    $ dd if=/dev/sda bs=10M | borg create /mnt/backup::my-sda -
+
+    # No compression (default)
+    $ borg create /mnt/backup::repo ~
+
+    # Super fast, low compression
+    $ borg create --compression lz4 /mnt/backup::repo ~
+
+    # Less fast, higher compression (N = 0..9)
+    $ borg create --compression zlib,N /mnt/backup::repo ~
+
+    # Even slower, even higher compression (N = 0..9)
+    $ borg create --compression lzma,N /mnt/backup::repo ~
+
 
 .. include:: usage/extract.rst.inc
 
@@ -72,6 +209,9 @@ Examples
     # Extract the "src" directory but exclude object files
     $ borg extract /mnt/backup::my-files home/USERNAME/src --exclude '*.o'
 
+Note: currently, extract always writes into the current working directory ("."),
+      so make sure you ``cd`` to the right place before calling ``borg extract``.
+
 .. include:: usage/check.rst.inc
 
 .. include:: usage/delete.rst.inc

+ 5 - 0
requirements.d/development.txt

@@ -0,0 +1,5 @@
+tox
+mock
+pytest
+pytest-cov<2.0.0
+Cython

+ 2 - 2
setup.cfg

@@ -2,7 +2,7 @@
 python_files = testsuite/*.py
 
 [flake8]
-ignore = E123,E126,E127,E129,E203,E221,E226,E231,E241,E265,E301,E302,E303,E713,F401,F403,W291,W293,W391
+ignore = E226,F403
 max-line-length = 250
-exclude = versioneer.py,docs/conf.py,borg/_version.py
+exclude = versioneer.py,docs/conf.py,borg/_version.py,build,dist,.git,.idea,.cache
 max-complexity = 100

+ 15 - 7
setup.py

@@ -16,11 +16,10 @@ if sys.version_info < min_python:
     print("Borg requires Python %d.%d or later" % min_python)
     sys.exit(1)
 
-try:
-    from setuptools import setup, Extension
-except ImportError:
-    from distutils.core import setup, Extension
 
+from setuptools import setup, Extension
+
+compress_source = 'borg/compress.pyx'
 crypto_source = 'borg/crypto.pyx'
 chunker_source = 'borg/chunker.pyx'
 hashindex_source = 'borg/hashindex.pyx'
@@ -40,6 +39,7 @@ try:
 
         def make_distribution(self):
             self.filelist.extend([
+                'borg/compress.c',
                 'borg/crypto.c',
                 'borg/chunker.c', 'borg/_chunker.c',
                 'borg/hashindex.c', 'borg/_hashindex.c',
@@ -47,13 +47,14 @@ try:
                 'borg/platform_freebsd.c',
                 'borg/platform_darwin.c',
             ])
-            super(Sdist, self).make_distribution()
+            super().make_distribution()
 
 except ImportError:
     class Sdist(versioneer.cmd_sdist):
         def __init__(self, *args, **kwargs):
             raise Exception('Cython is required to run sdist')
 
+    compress_source = compress_source.replace('.pyx', '.c')
     crypto_source = crypto_source.replace('.pyx', '.c')
     chunker_source = chunker_source.replace('.pyx', '.c')
     hashindex_source = hashindex_source.replace('.pyx', '.c')
@@ -61,7 +62,9 @@ except ImportError:
     platform_freebsd_source = platform_freebsd_source.replace('.pyx', '.c')
     platform_darwin_source = platform_darwin_source.replace('.pyx', '.c')
     from distutils.command.build_ext import build_ext
-    if not all(os.path.exists(path) for path in [crypto_source, chunker_source, hashindex_source, platform_linux_source, platform_freebsd_source]):
+    if not all(os.path.exists(path) for path in [
+        compress_source, crypto_source, chunker_source, hashindex_source,
+        platform_linux_source, platform_freebsd_source]):
         raise ImportError('The GIT version of Borg needs Cython. Install Cython or use a released version')
 
 
@@ -91,6 +94,7 @@ cmdclass = versioneer.get_cmdclass()
 cmdclass.update({'build_ext': build_ext, 'sdist': Sdist})
 
 ext_modules = [
+    Extension('borg.compress', [compress_source], libraries=['lz4']),
     Extension('borg.crypto', [crypto_source], libraries=['crypto'], include_dirs=include_dirs, library_dirs=library_dirs),
     Extension('borg.chunker', [chunker_source]),
     Extension('borg.hashindex', [hashindex_source])
@@ -129,7 +133,11 @@ setup(
         'Topic :: System :: Archiving :: Backup',
     ],
     packages=['borg', 'borg.testsuite'],
-    scripts=['scripts/borg'],
+    entry_points={
+        'console_scripts': [
+            'borg = borg.archiver:main',
+        ]
+    },
     cmdclass=cmdclass,
     ext_modules=ext_modules,
     # msgpack pure python data corruption was fixed in 0.4.6.

+ 10 - 11
tox.ini

@@ -1,15 +1,14 @@
+# tox configuration - if you change anything here, run this to verify:
+# fakeroot -u tox --recreate
+
 [tox]
 envlist = py32, py33, py34
 
 [testenv]
-# Change dir to avoid import problem
-changedir = {envdir}
-deps =
-    pytest
-commands = py.test
-passenv = *  # fakeroot -u needs some env vars
-
-[testenv:py32]
-deps =
-    pytest
-    mock
+# Change dir to avoid import problem for cython code. The directory does
+# not really matter, should be just different from the toplevel dir.
+changedir = {toxworkdir}
+deps = -rrequirements.d/development.txt
+commands = py.test --cov=borg --pyargs {posargs:borg.testsuite}
+# fakeroot -u needs some env vars:
+passenv = *