浏览代码

Merge branch 'master' into windows

# Conflicts:
#	Vagrantfile
#	src/borg/archive.py
#	src/borg/constants.py
#	src/borg/helpers.py
#	src/borg/testsuite/archiver.py
Marian Beermann 9 年之前
父节点
当前提交
b48cde9f50
共有 78 个文件被更改,包括 2536 次插入1047 次删除
  1. 1 0
      .gitignore
  2. 7 2
      .travis.yml
  3. 0 2
      .travis/install.sh
  4. 14 8
      README.rst
  5. 29 21
      Vagrantfile
  6. 14 29
      docs/api.rst
  7. 272 34
      docs/changes.rst
  8. 2 0
      docs/conf.py
  9. 1 0
      docs/deployment.rst
  10. 1 0
      docs/development.rst
  11. 55 8
      docs/faq.rst
  12. 29 21
      docs/installation.rst
  13. 2 0
      docs/internals.rst
  14. 41 0
      docs/misc/internals-picture.txt
  15. 93 0
      docs/misc/prune-example.txt
  16. 18 12
      docs/quickstart.rst
  17. 7 13
      docs/resources.rst
  18. 8 2
      docs/support.rst
  19. 35 19
      docs/usage.rst
  20. 2 0
      docs/usage/break-lock.rst.inc
  21. 2 0
      docs/usage/change-passphrase.rst.inc
  22. 24 3
      docs/usage/check.rst.inc
  23. 10 3
      docs/usage/create.rst.inc
  24. 2 0
      docs/usage/debug-delete-obj.rst.inc
  25. 2 0
      docs/usage/debug-dump-archive-items.rst.inc
  26. 2 0
      docs/usage/debug-get-obj.rst.inc
  27. 2 0
      docs/usage/debug-put-obj.rst.inc
  28. 4 0
      docs/usage/delete.rst.inc
  29. 2 0
      docs/usage/diff.rst.inc
  30. 6 0
      docs/usage/extract.rst.inc
  31. 82 41
      docs/usage/help.rst.inc
  32. 6 0
      docs/usage/info.rst.inc
  33. 44 1
      docs/usage/init.rst.inc
  34. 18 9
      docs/usage/list.rst.inc
  35. 2 0
      docs/usage/migrate-to-repokey.rst.inc
  36. 14 0
      docs/usage/mount.rst.inc
  37. 34 12
      docs/usage/prune.rst.inc
  38. 6 0
      docs/usage/recreate.rst.inc
  39. 2 0
      docs/usage/rename.rst.inc
  40. 4 0
      docs/usage/serve.rst.inc
  41. 2 0
      docs/usage/upgrade.rst.inc
  42. 2 0
      docs/usage/with-lock.rst.inc
  43. 5 0
      requirements.d/attic.txt
  44. 61 0
      scripts/glibc_check.py
  45. 0 0
      scripts/hash_sizes.py
  46. 2 0
      scripts/release
  47. 9 0
      scripts/sign-binaries
  48. 5 4
      setup.py
  49. 7 1
      src/borg/_hashindex.c
  50. 412 222
      src/borg/archive.py
  51. 277 169
      src/borg/archiver.py
  52. 4 2
      src/borg/cache.py
  53. 14 4
      src/borg/constants.py
  54. 54 19
      src/borg/crypto.pyx
  55. 79 62
      src/borg/fuse.py
  56. 2 3
      src/borg/hashindex.pyx
  57. 123 78
      src/borg/helpers.py
  58. 79 18
      src/borg/item.py
  59. 34 31
      src/borg/key.py
  60. 10 6
      src/borg/locking.py
  61. 7 1
      src/borg/platform/base.py
  62. 12 14
      src/borg/platform/darwin.pyx
  63. 6 6
      src/borg/platform/freebsd.pyx
  64. 5 5
      src/borg/platform/linux.pyx
  65. 35 22
      src/borg/remote.py
  66. 16 12
      src/borg/repository.py
  67. 2 2
      src/borg/selftest.py
  68. 1 1
      src/borg/shellpattern.py
  69. 18 0
      src/borg/testsuite/__init__.py
  70. 67 9
      src/borg/testsuite/archive.py
  71. 204 78
      src/borg/testsuite/archiver.py
  72. 23 0
      src/borg/testsuite/crypto.py
  73. 17 1
      src/borg/testsuite/helpers.py
  74. 4 4
      src/borg/testsuite/item.py
  75. 17 17
      src/borg/testsuite/platform.py
  76. 7 4
      src/borg/testsuite/repository.py
  77. 14 11
      src/borg/xattr.py
  78. 1 1
      tox.ini

+ 1 - 0
.gitignore

@@ -26,3 +26,4 @@ borg.exe
 *.dll
 .coverage
 .vagrant
+.eggs

+ 7 - 2
.travis.yml

@@ -6,19 +6,24 @@ cache:
     directories:
         - $HOME/.cache/pip
 
+# note: use py 3.5.2, it has lzma support. 3.5(.0) on travis.org/trusty does not.
 matrix:
     include:
         - python: 3.4
           os: linux
+          dist: trusty
           env: TOXENV=py34
-        - python: 3.5
+        - python: 3.5.2
           os: linux
+          dist: trusty
           env: TOXENV=py35
         - python: nightly
           os: linux
+          dist: trusty
           env: TOXENV=py36
-        - python: 3.5
+        - python: 3.4
           os: linux
+          dist: trusty
           env: TOXENV=flake8
         - language: generic
           os: osx

+ 0 - 2
.travis/install.sh

@@ -32,8 +32,6 @@ if [[ "$(uname -s)" == 'Darwin' ]]; then
     python -m pip install --user 'virtualenv<14.0'
 else
     pip install 'virtualenv<14.0'
-    sudo add-apt-repository -y ppa:gezakovacs/lz4
-    sudo apt-get update
     sudo apt-get install -y liblz4-dev
     sudo apt-get install -y libacl1-dev
 fi

+ 14 - 8
README.rst

@@ -1,5 +1,7 @@
 |screencast|
 
+.. highlight:: bash
+
 What is BorgBackup?
 ===================
 
@@ -87,7 +89,10 @@ Initialize a new backup repository and create a backup archive::
     $ borg init /path/to/repo
     $ borg create /path/to/repo::Saturday1 ~/Documents
 
-Now doing another backup, just to show off the great deduplication::
+Now doing another backup, just to show off the great deduplication:
+
+.. code-block:: none
+   :emphasize-lines: 11
 
     $ borg create -v --stats /path/to/repo::Saturday2 ~/Documents
     -----------------------------------------------------------------------------
@@ -113,13 +118,14 @@ Links
 =====
 
 * `Main Web Site <https://borgbackup.readthedocs.org/>`_
-* `Releases <https://github.com/borgbackup/borg/releases>`_
-* `PyPI packages <https://pypi.python.org/pypi/borgbackup>`_
-* `ChangeLog <https://github.com/borgbackup/borg/blob/master/docs/changes.rst>`_
-* `GitHub <https://github.com/borgbackup/borg>`_
-* `Issue Tracker <https://github.com/borgbackup/borg/issues>`_
-* `Bounties & Fundraisers <https://www.bountysource.com/teams/borgbackup>`_
-* `Mailing List <https://mail.python.org/mailman/listinfo/borgbackup>`_
+* `Releases <https://github.com/borgbackup/borg/releases>`_,
+  `PyPI packages <https://pypi.python.org/pypi/borgbackup>`_ and
+  `ChangeLog <https://github.com/borgbackup/borg/blob/master/docs/changes.rst>`_
+* `GitHub <https://github.com/borgbackup/borg>`_,
+  `Issue Tracker <https://github.com/borgbackup/borg/issues>`_ and
+  `Bounties & Fundraisers <https://www.bountysource.com/teams/borgbackup>`_
+* `Web-Chat (IRC) <http://webchat.freenode.net/?randomnick=1&channels=%23borgbackup&uio=MTY9dHJ1ZSY5PXRydWUa8>`_ and
+  `Mailing List <https://mail.python.org/mailman/listinfo/borgbackup>`_
 * `License <https://borgbackup.readthedocs.org/en/stable/authors.html#license>`_
 
 Notes

+ 29 - 21
Vagrantfile

@@ -42,7 +42,7 @@ def packages_redhatted
     # needed to compile msgpack-python (otherwise it will use slow fallback code):
     yum install -y gcc-c++
     # for building python:
-    yum install -y zlib-devel bzip2-devel ncurses-devel readline-devel xz-devel sqlite-devel
+    yum install -y zlib-devel bzip2-devel ncurses-devel readline-devel xz xz-devel sqlite-devel
     #yum install -y python-pip
     #pip install virtualenv
     touch ~vagrant/.bash_profile ; chown vagrant ~vagrant/.bash_profile
@@ -53,10 +53,10 @@ def packages_darwin
   return <<-EOF
     # install all the (security and other) updates
     sudo softwareupdate --install --all
-    # get osxfuse 3.0.x pre-release code from github:
-    curl -s -L https://github.com/osxfuse/osxfuse/releases/download/osxfuse-3.2.0/osxfuse-3.2.0.dmg >osxfuse.dmg
+    # get osxfuse 3.x pre-release code from github:
+    curl -s -L https://github.com/osxfuse/osxfuse/releases/download/osxfuse-3.3.3/osxfuse-3.3.3.dmg >osxfuse.dmg
     MOUNTDIR=$(echo `hdiutil mount osxfuse.dmg | tail -1 | awk '{$1="" ; print $0}'` | xargs -0 echo) \
-    && sudo installer -pkg "${MOUNTDIR}/Extras/FUSE for OS X 3.2.0.pkg" -target /
+    && sudo installer -pkg "${MOUNTDIR}/Extras/FUSE for OS X 3.3.3.pkg" -target /
     sudo chown -R vagrant /usr/local  # brew must be able to create stuff here
     ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)"
     brew update
@@ -109,7 +109,6 @@ def packages_openbsd
     pkg_add lz4
     # pkg_add fuse  # does not install, sdl dependency missing
     pkg_add git  # no fakeroot
-    pkg_add python-3.4.2
     pkg_add py3-setuptools
     ln -sf /usr/local/bin/python3.4 /usr/local/bin/python3
     ln -sf /usr/local/bin/python3.4 /usr/local/bin/python
@@ -166,7 +165,7 @@ def install_pythons(boxname)
     . ~/.bash_profile
     pyenv install 3.4.0  # tests
     pyenv install 3.5.0  # tests
-    pyenv install 3.5.1  # binary build, use latest 3.5.x release
+    pyenv install 3.5.2  # binary build, use latest 3.5.x release
     pyenv rehash
   EOF
 end
@@ -184,8 +183,8 @@ def build_pyenv_venv(boxname)
     . ~/.bash_profile
     cd /vagrant/borg
     # use the latest 3.5 release
-    pyenv global 3.5.1
-    pyenv virtualenv 3.5.1 borg-env
+    pyenv global 3.5.2
+    pyenv virtualenv 3.5.2 borg-env
     ln -s ~/.pyenv/versions/borg-env .
   EOF
 end
@@ -207,6 +206,22 @@ def install_borg(boxname)
   EOF
 end
 
+def install_borg_no_fuse(boxname)
+  return <<-EOF
+    . ~/.bash_profile
+    cd /vagrant/borg
+    . borg-env/bin/activate
+    pip install -U wheel  # upgrade wheel, too old for 3.5
+    cd borg
+    # clean up (wrong/outdated) stuff we likely got via rsync:
+    rm -f borg/*.so borg/*.cpy*
+    rm -f borg/{chunker,crypto,compress,hashindex,platform_linux}.c
+    rm -rf borg/__pycache__ borg/support/__pycache__ borg/testsuite/__pycache__
+    pip install -r requirements.d/development.txt
+    pip install -e .
+  EOF
+end
+
 def install_pyinstaller(boxname)
   return <<-EOF
     . ~/.bash_profile
@@ -241,7 +256,7 @@ def build_binary_with_pyinstaller(boxname)
     cd /vagrant/borg
     . borg-env/bin/activate
     cd borg
-    pyinstaller -F -n borg.exe --distpath=/vagrant/borg --clean borg/__main__.py
+    pyinstaller -F -n borg.exe --distpath=/vagrant/borg --clean src/borg/__main__.py --hidden-import=borg.platform.posix
   EOF
 end
 
@@ -337,9 +352,7 @@ Vagrant.configure(2) do |config|
     b.vm.provision "install pyenv", :type => :shell, :privileged => false, :inline => install_pyenv("centos6_32")
     b.vm.provision "install pythons", :type => :shell, :privileged => false, :inline => install_pythons("centos6_32")
     b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_pyenv_venv("centos6_32")
-    b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg("centos6_32")
-    b.vm.provision "install pyinstaller", :type => :shell, :privileged => false, :inline => install_pyinstaller("centos6_32")
-    b.vm.provision "build binary with pyinstaller", :type => :shell, :privileged => false, :inline => build_binary_with_pyinstaller("centos6_32")
+    b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg_no_fuse("centos6_32")
     b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("centos6_32")
   end
 
@@ -355,9 +368,7 @@ Vagrant.configure(2) do |config|
     b.vm.provision "install pyenv", :type => :shell, :privileged => false, :inline => install_pyenv("centos6_64")
     b.vm.provision "install pythons", :type => :shell, :privileged => false, :inline => install_pythons("centos6_64")
     b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_pyenv_venv("centos6_64")
-    b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg("centos6_64")
-    b.vm.provision "install pyinstaller", :type => :shell, :privileged => false, :inline => install_pyinstaller("centos6_64")
-    b.vm.provision "build binary with pyinstaller", :type => :shell, :privileged => false, :inline => build_binary_with_pyinstaller("centos6_64")
+    b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg_no_fuse("centos6_64")
     b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("centos6_64")
   end
 
@@ -472,16 +483,13 @@ Vagrant.configure(2) do |config|
   end
 
   config.vm.define "openbsd64" do |b|
-    b.vm.synced_folder ".", "/vagrant/borg/borg", :type => "rsync", :rsync__args => ["--verbose", "--archive", "--delete", "-z"]
-    b.vm.synced_folder ".", "/vagrant", disabled: true
-    b.vm.provision "fix perms", :type => :shell, :inline => fix_perms
-    b.vm.box = "bodgit/openbsd-5.7-amd64"
+    b.vm.box = "kaorimatz/openbsd-5.9-amd64"
     b.vm.provider :virtualbox do |v|
       v.memory = 768
     end
     b.vm.provision "packages openbsd", :type => :shell, :inline => packages_openbsd
     b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_sys_venv("openbsd64")
-    b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg("openbsd64")
+    b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg_no_fuse("openbsd64")
     b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("openbsd64")
   end
 
@@ -495,7 +503,7 @@ Vagrant.configure(2) do |config|
     end
     b.vm.provision "packages netbsd", :type => :shell, :inline => packages_netbsd
     b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_sys_venv("netbsd64")
-    b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg("netbsd64")
+    b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg_no_fuse("netbsd64")
     b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("netbsd64")
   end
 

+ 14 - 29
docs/api.rst

@@ -1,3 +1,4 @@
+.. highlight:: python
 
 API Documentation
 =================
@@ -6,67 +7,55 @@ API Documentation
     :members:
     :undoc-members:
 
-.. automodule:: borg.upgrader
-    :members:
-    :undoc-members:
-
 .. automodule:: borg.archive
     :members:
     :undoc-members:
 
-.. automodule:: borg.fuse
-    :members:
-    :undoc-members:
-
-.. automodule:: borg.platform
-    :members:
-    :undoc-members:
-
-.. automodule:: borg.locking
+.. automodule:: borg.repository
     :members:
     :undoc-members:
 
-.. automodule:: borg.shellpattern
+.. automodule:: borg.remote
     :members:
     :undoc-members:
 
-.. automodule:: borg.repository
+.. automodule:: borg.cache
     :members:
     :undoc-members:
 
-.. automodule:: borg.lrucache
+.. automodule:: borg.key
     :members:
     :undoc-members:
 
-.. automodule:: borg.remote
+.. automodule:: borg.logger
     :members:
     :undoc-members:
 
-.. automodule:: borg.hash_sizes
+.. automodule:: borg.helpers
     :members:
     :undoc-members:
 
-.. automodule:: borg.xattr
+.. automodule:: borg.locking
     :members:
     :undoc-members:
 
-.. automodule:: borg.helpers
+.. automodule:: borg.shellpattern
     :members:
     :undoc-members:
 
-.. automodule:: borg.cache
+.. automodule:: borg.lrucache
     :members:
     :undoc-members:
 
-.. automodule:: borg.key
+.. automodule:: borg.fuse
     :members:
     :undoc-members:
 
-.. automodule:: borg.logger
+.. automodule:: borg.xattr
     :members:
     :undoc-members:
 
-.. automodule:: borg.platform_darwin
+.. automodule:: borg.platform
     :members:
     :undoc-members:
 
@@ -79,7 +68,7 @@ API Documentation
     :undoc-members:
 
 .. automodule:: borg.compress
-    :members:
+    :members: get_compressor, Compressor, CompressorBase
     :undoc-members:
 
 .. automodule:: borg.chunker
@@ -89,7 +78,3 @@ API Documentation
 .. automodule:: borg.crypto
     :members:
     :undoc-members:
-
-.. automodule:: borg.platform_freebsd
-    :members:
-    :undoc-members:

+ 272 - 34
docs/changes.rst

@@ -1,6 +1,55 @@
 Changelog
 =========
 
+Important note about pre-1.0.4 potential repo corruption
+--------------------------------------------------------
+
+Some external errors (like network or disk I/O errors) could lead to
+corruption of the backup repository due to issue #1138.
+
+A sign that this happened is if "E" status was reported for a file that can
+not be explained by problems with the source file. If you still have logs from
+"borg create -v --list", you can check for "E" status.
+
+Here is what could cause corruption and what you can do now:
+
+1) I/O errors (e.g. repo disk errors) while writing data to repo.
+
+This could lead to corrupted segment files.
+
+Fix::
+
+    # check for corrupt chunks / segments:
+    borg check -v --repository-only REPO
+
+    # repair the repo:
+    borg check -v --repository-only --repair REPO
+
+    # make sure everything is fixed:
+    borg check -v --repository-only REPO
+
+2) Unreliable network / unreliable connection to the repo.
+
+This could lead to archive metadata corruption.
+
+Fix::
+
+    # check for corrupt archives:
+    borg check -v --archives-only REPO
+
+    # delete the corrupt archives:
+    borg delete --force REPO::CORRUPT_ARCHIVE
+
+    # make sure everything is fixed:
+    borg check -v --archives-only REPO
+
+3) In case you want to do more intensive checking.
+
+The best check that everything is ok is to run a dry-run extraction::
+
+    borg extract -v --dry-run REPO::ARCHIVE
+
+
 Version 1.1.0 (not released yet)
 --------------------------------
 
@@ -74,8 +123,197 @@ Other changes:
   - ChunkBuffer: add test for leaving partial chunk in buffer, fixes #945
 
 
-Version 1.0.3
--------------
+Version 1.0.6 (2016-07-12)
+--------------------------
+
+Bug fixes:
+
+- Linux: handle multiple LD_PRELOAD entries correctly, #1314, #1111
+- Fix crash with unclear message if the libc is not found, #1314, #1111
+
+Other changes:
+
+- tests:
+
+  - Fixed O_NOATIME tests for Solaris and GNU Hurd, #1315
+  - Fixed sparse file tests for (file) systems not supporting it, #1310
+- docs:
+
+  - Fixed syntax highlighting, #1313
+  - misc docs: added data processing overview picture
+
+
+Version 1.0.6rc1 (2016-07-10)
+-----------------------------
+
+New features:
+
+- borg check --repair: heal damaged files if missing chunks re-appear (e.g. if
+  the previously missing chunk was added again in a later backup archive),
+  #148. (*) Also improved logging.
+
+Bug fixes:
+
+- sync_dir: silence fsync() failing with EINVAL, #1287
+  Some network filesystems (like smbfs) don't support this and we use this in
+  repository code.
+- borg mount (FUSE):
+
+  - fix directories being shadowed when contained paths were also specified,
+    #1295
+  - raise I/O Error (EIO) on damaged files (unless -o allow_damaged_files is
+    used), #1302. (*)
+- borg extract: warn if a damaged file is extracted, #1299. (*)
+- Added some missing return code checks (ChunkIndex._add, hashindex_resize).
+- borg check: fix/optimize initial hash table size, avoids resize of the table.
+
+Other changes:
+
+- tests:
+
+  - add more FUSE tests, #1284
+  - deduplicate fuse (u)mount code
+  - fix borg binary test issues, #862
+- docs:
+
+  - changelog: added release dates to older borg releases
+  - fix some sphinx (docs generator) warnings, #881
+
+Notes:
+
+(*) Some features depend on information (chunks_healthy list) added to item
+metadata when a file with missing chunks was "repaired" using all-zero
+replacement chunks. The chunks_healthy list is generated since borg 1.0.4,
+thus borg can't recognize such "repaired" (but content-damaged) files if the
+repair was done with an older borg version.
+
+
+Version 1.0.5 (2016-07-07)
+--------------------------
+
+Bug fixes:
+
+- borg mount: fix FUSE crash in xattr code on Linux introduced in 1.0.4, #1282
+
+Other changes:
+
+- backport some FAQ entries from master branch
+- add release helper scripts
+- Vagrantfile:
+
+  - centos6: no FUSE, don't build binary
+  - add xz for redhat-like dists
+
+
+Version 1.0.4 (2016-07-07)
+--------------------------
+
+New features:
+
+- borg serve --append-only, #1168
+  This was included because it was a simple change (append-only functionality
+  was already present via repository config file) and makes better security now
+  practically usable.
+- BORG_REMOTE_PATH environment variable, #1258
+  This was included because it was a simple change (--remote-path cli option
+  was already present) and makes borg much easier to use if you need it.
+- Repository: cleanup incomplete transaction on "no space left" condition.
+  In many cases, this can avoid a 100% full repo filesystem (which is very
+  problematic as borg always needs free space - even to delete archives).
+
+Bug fixes:
+
+- Fix wrong handling and reporting of OSErrors in borg create, #1138.
+  This was a serious issue: in the context of "borg create", errors like
+  repository I/O errors (e.g. disk I/O errors, ssh repo connection errors)
+  were handled badly and did not lead to a crash (which would be good for this
+  case, because the repo transaction would be incomplete and trigger a
+  transaction rollback to clean up).
+  Now, error handling for source files is cleanly separated from every other
+  error handling, so only problematic input files are logged and skipped.
+- Implement fail-safe error handling for borg extract.
+  Note that this isn't nearly as critical as the borg create error handling
+  bug, since nothing is written to the repo. So this was "merely" misleading
+  error reporting.
+- Add missing error handler in directory attr restore loop.
+- repo: make sure write data hits disk before the commit tag (#1236) and also
+  sync the containing directory.
+- FUSE: getxattr fail must use errno.ENOATTR, #1126
+  (fixes Mac OS X Finder malfunction: "zero bytes" file length, access denied)
+- borg check --repair: do not lose information about the good/original chunks.
+  If we do not lose the original chunk IDs list when "repairing" a file
+  (replacing missing chunks with all-zero chunks), we have a chance to "heal"
+  the file back into its original state later, in case the chunks re-appear
+  (e.g. in a fresh backup). Healing is not implemented yet, see #148.
+- fixes for --read-special mode:
+
+  - ignore known files cache, #1241
+  - fake regular file mode, #1214
+  - improve symlinks handling, #1215
+- remove passphrase from subprocess environment, #1105
+- Ignore empty index file (will trigger index rebuild), #1195
+- add missing placeholder support for --prefix, #1027
+- improve exception handling for placeholder replacement
+- catch and format exceptions in arg parsing
+- helpers: fix "undefined name 'e'" in exception handler
+- better error handling for missing repo manifest, #1043
+- borg delete:
+
+  - make it possible to delete a repo without manifest
+  - borg delete --forced allows to delete corrupted archives, #1139
+- borg check:
+
+  - make borg check work for empty repo
+  - fix resync and msgpacked item qualifier, #1135
+  - rebuild_manifest: fix crash if 'name' or 'time' key were missing.
+  - better validation of item metadata dicts, #1130
+  - better validation of archive metadata dicts
+- close the repo on exit - even if rollback did not work, #1197.
+  This is rather cosmetic, it avoids repo closing in the destructor.
+
+- tests:
+
+  - fix sparse file test, #1170
+  - flake8: ignore new F405, #1185
+  - catch "invalid argument" on cygwin, #257
+  - fix sparseness assertion in test prep, #1264
+
+Other changes:
+
+- make borg build/work on OpenSSL 1.0 and 1.1, #1187
+- docs / help:
+
+  - fix / clarify prune help, #1143
+  - fix "patterns" help formatting
+  - add missing docs / help about placeholders
+  - resources: rename atticmatic to borgmatic
+  - document sshd settings, #545
+  - more details about checkpoints, add split trick, #1171
+  - support docs: add freenode web chat link, #1175
+  - add prune visualization / example, #723
+  - add note that Fnmatch is default, #1247
+  - make clear that lzma levels > 6 are a waste of cpu cycles
+  - add a "do not edit" note to auto-generated files, #1250
+  - update cygwin installation docs
+- repository interoperability with borg master (1.1dev) branch:
+
+  - borg check: read item metadata keys from manifest, #1147
+  - read v2 hints files, #1235
+  - fix hints file "unknown version" error handling bug
+- tests: add tests for format_line
+- llfuse: update version requirement for freebsd
+- Vagrantfile:
+
+  - use openbsd 5.9, #716
+  - do not install llfuse on netbsd (broken)
+  - update OSXfuse to version 3.3.3
+  - use Python 3.5.2 to build the binaries
+- glibc compatibility checker: scripts/glibc_check.py
+- add .eggs to .gitignore
+
+
+Version 1.0.3 (2016-05-20)
+--------------------------
 
 Bug fixes:
 
@@ -104,8 +342,8 @@ Other changes:
   - borg create help: document format tags, #894
 
 
-Version 1.0.2
--------------
+Version 1.0.2 (2016-04-16)
+--------------------------
 
 Bug fixes:
 
@@ -140,8 +378,8 @@ Other changes:
   - fix confusing usage of "repo" as archive name (use "arch")
 
 
-Version 1.0.1
--------------
+Version 1.0.1 (2016-04-08)
+--------------------------
 
 New features:
 
@@ -192,8 +430,8 @@ Other changes:
   - Document logo font. Recreate logo png. Remove GIMP logo file.
 
 
-Version 1.0.0
--------------
+Version 1.0.0 (2016-03-05)
+--------------------------
 
 The major release number change (0.x -> 1.x) indicates bigger incompatible
 changes, please read the compatibility notes, adapt / test your scripts and
@@ -276,8 +514,8 @@ Other changes:
   - FAQ: how to limit bandwidth
 
 
-Version 1.0.0rc2
-----------------
+Version 1.0.0rc2 (2016-02-28)
+-----------------------------
 
 New features:
 
@@ -318,8 +556,8 @@ Other changes:
   - "connection closed by remote": add FAQ entry and point to issue #636
 
 
-Version 1.0.0rc1
-----------------
+Version 1.0.0rc1 (2016-02-07)
+-----------------------------
 
 New features:
 
@@ -368,8 +606,8 @@ Other changes:
   - misc. updates and fixes
 
 
-Version 0.30.0
---------------
+Version 0.30.0 (2016-01-23)
+---------------------------
 
 Compatibility notes:
 
@@ -446,8 +684,8 @@ Other changes:
   - add gcc gcc-c++ to redhat/fedora/corora install docs, fixes #583
 
 
-Version 0.29.0
---------------
+Version 0.29.0 (2015-12-13)
+---------------------------
 
 Compatibility notes:
 
@@ -522,8 +760,8 @@ Other changes:
   - fix wrong installation instructions for archlinux
 
 
-Version 0.28.2
---------------
+Version 0.28.2 (2015-11-15)
+---------------------------
 
 New features:
 
@@ -546,8 +784,8 @@ Other changes:
   - minor install docs improvements
 
 
-Version 0.28.1
---------------
+Version 0.28.1 (2015-11-08)
+---------------------------
 
 Bug fixes:
 
@@ -561,8 +799,8 @@ Other changes:
 - fix build on readthedocs
 
 
-Version 0.28.0
---------------
+Version 0.28.0 (2015-11-08)
+---------------------------
 
 Compatibility notes:
 
@@ -659,8 +897,8 @@ Other changes:
   - minor development docs update
 
 
-Version 0.27.0
---------------
+Version 0.27.0 (2015-10-07)
+---------------------------
 
 New features:
 
@@ -694,8 +932,8 @@ Other changes:
   - hint to single-file pyinstaller binaries from README
 
 
-Version 0.26.1
---------------
+Version 0.26.1 (2015-09-28)
+---------------------------
 
 This is a minor update, just docs and new pyinstaller binaries.
 
@@ -707,8 +945,8 @@ This is a minor update, just docs and new pyinstaller binaries.
 Note: if you did a python-based installation, there is no need to upgrade.
 
 
-Version 0.26.0
---------------
+Version 0.26.0 (2015-09-19)
+---------------------------
 
 New features:
 
@@ -768,8 +1006,8 @@ Other changes:
   - Darwin (OS X Yosemite)
 
 
-Version 0.25.0
---------------
+Version 0.25.0 (2015-08-29)
+---------------------------
 
 Compatibility notes:
 
@@ -835,8 +1073,8 @@ Other changes:
   - split install docs into system-specific preparations and generic instructions
 
 
-Version 0.24.0
---------------
+Version 0.24.0 (2015-08-09)
+---------------------------
 
 Incompatible changes (compared to 0.23):
 
@@ -939,8 +1177,8 @@ Other changes:
 - some easy micro optimizations
 
 
-Version 0.23.0
---------------
+Version 0.23.0 (2015-06-11)
+---------------------------
 
 Incompatible changes (compared to attic, fork related):
 

+ 2 - 0
docs/conf.py

@@ -55,6 +55,8 @@ version = sw_version.split('-')[0]
 # The full version, including alpha/beta/rc tags.
 release = version
 
+suppress_warnings = ['image.nonlocal_uri']
+
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
 #language = None

+ 1 - 0
docs/deployment.rst

@@ -1,4 +1,5 @@
 .. include:: global.rst.inc
+.. highlight:: none
 .. _deployment:
 
 Deployment

+ 1 - 0
docs/development.rst

@@ -1,4 +1,5 @@
 .. include:: global.rst.inc
+.. highlight:: bash
 .. _development:
 
 Development

+ 55 - 8
docs/faq.rst

@@ -1,5 +1,6 @@
-.. _faq:
 .. include:: global.rst.inc
+.. highlight:: none
+.. _faq:
 
 Frequently asked questions
 ==========================
@@ -142,7 +143,7 @@ C to delete all backups residing on S.
 
 These are your options to protect against that:
 
-- Do not allow to permanently delete data from the repo, see :ref:`append-only-mode`.
+- Do not allow to permanently delete data from the repo, see :ref:`append_only_mode`.
 - Use a pull-mode setup using ``ssh -R``, see :issue:`900`.
 - Mount C's filesystem on another machine and then create a backup of it.
 - Do not give C filesystem-level access to S.
@@ -186,6 +187,24 @@ stops after a while (some minutes, hours, ... - not immediately) with
 
 That's a good question and we are trying to find a good answer in :issue:`636`.
 
+Why am I seeing idle borg serve processes on the repo server?
+-------------------------------------------------------------
+
+Maybe the ssh connection between client and server broke down and that was not
+yet noticed on the server. Try these settings:
+
+::
+
+    # /etc/ssh/sshd_config on borg repo server - kill connection to client
+    # after ClientAliveCountMax * ClientAliveInterval seconds with no response
+    ClientAliveInterval 20
+    ClientAliveCountMax 3
+
+If you have multiple borg create ... ; borg create ... commands in a already
+serialized way in a single script, you need to give them --lock-wait N (with N
+being a bit more than the time the server needs to terminate broken down
+connections and release the lock).
+
 The borg cache eats way too much disk space, what can I do?
 -----------------------------------------------------------
 
@@ -223,17 +242,23 @@ Yes, |project_name| supports resuming backups.
 
 During a backup a special checkpoint archive named ``<archive-name>.checkpoint``
 is saved every checkpoint interval (the default value for this is 5
-minutes) containing all the data backed-up until that point. This checkpoint
-archive is a valid archive, but it is only a partial backup. Having it
-in the repo until a successful, full backup is completed is useful because it
-references all the transmitted chunks up to the checkpoint time. This means
-that at most <checkpoint interval> worth of data needs to be retransmitted
-if you restart the backup.
+minutes) containing all the data backed-up until that point.
+
+Checkpoints only happen between files (so they don't help for interruptions
+happening while a very large file is being processed).
+
+This checkpoint archive is a valid archive (all files in it are valid and complete),
+but it is only a partial backup (not all files that you wanted to backup are
+contained in it). Having it in the repo until a successful, full backup is
+completed is useful because it references all the transmitted chunks up
+to the checkpoint. This means that in case of an interruption, you only need to
+retransfer the data since the last checkpoint.
 
 If a backup was interrupted, you do not need to do any special considerations,
 just invoke ``borg create`` as you always do. You may use the same archive name
 as in previous attempt or a different one (e.g. if you always include the current
 datetime), it does not matter.
+
 |project_name| always does full single-pass backups, so it will start again
 from the beginning - but it will be much faster, because some of the data was
 already stored into the repo (and is still referenced by the checkpoint
@@ -243,6 +268,28 @@ Once your backup has finished successfully, you can delete all
 ``<archive-name>.checkpoint`` archives. If you run ``borg prune``, it will
 also care for deleting unneeded checkpoints.
 
+How can I backup huge file(s) over a unstable connection?
+---------------------------------------------------------
+
+You can use this "split trick" as a workaround for the in-between-files-only
+checkpoints (see above), huge files and a instable connection to the repository:
+
+Split the huge file(s) into parts of manageable size (e.g. 100MB) and create
+a temporary archive of them. Borg will create checkpoints now more frequently
+than if you try to backup the files in their original form (e.g. 100GB).
+
+After that, you can remove the parts again and backup the huge file(s) in
+their original form. This will now work a lot faster as a lot of content chunks
+are already in the repository.
+
+After you have successfully backed up the huge original file(s), you can remove
+the temporary archive you made from the parts.
+
+We realize that this is just a better-than-nothing workaround, see :issue:`1198`
+for a potential solution.
+
+Please note that this workaround only helps you for backup, not for restore.
+
 If it crashes with a UnicodeError, what can I do?
 -------------------------------------------------
 

+ 29 - 21
docs/installation.rst

@@ -1,4 +1,5 @@
 .. include:: global.rst.inc
+.. highlight:: bash
 .. _installation:
 
 Installation
@@ -25,9 +26,17 @@ Distribution Package
 --------------------
 
 Some distributions might offer a ready-to-use ``borgbackup``
-package which can be installed with the package manager.  As |project_name| is
-still a young project, such a package might be not available for your system
-yet.
+package which can be installed with the package manager.
+
+.. important:: Those packages may not be up to date with the latest
+               |project_name| releases. Before submitting a bug
+               report, check the package version and compare that to
+               our latest release then review :doc:`changes` to see if
+               the bug has been fixed. Report bugs to the package
+               maintainer rather than directly to |project_name| if the
+               package is out of date in the distribution.
+
+.. keep this list in alphabetical order
 
 ============ ============================================= =======
 Distribution Source                                        Command
@@ -36,13 +45,16 @@ Arch Linux   `[community]`_                                ``pacman -S borg``
 Debian       `jessie-backports`_, `stretch`_, `sid`_       ``apt install borgbackup``
 Gentoo       `ebuild`_                                     ``emerge borgbackup``
 GNU Guix     `GNU Guix`_                                   ``guix package --install borg``
-FreeBSD      `Ports-Tree`_                                 ``cd /usr/ports/archivers/py-borgbackup && make install clean``
+Fedora/RHEL  `Fedora official repository`_, `EPEL`_        ``dnf install borgbackup``
+FreeBSD      `FreeBSD ports`_                              ``cd /usr/ports/archivers/py-borgbackup && make install clean``
+Mageia       `cauldron`_                                   ``urpmi borgbackup``
 NetBSD       `pkgsrc`_                                     ``pkg_add py-borgbackup``
 NixOS        `.nix file`_                                  N/A
 OpenBSD      `OpenBSD ports`_                              ``pkg_add borgbackup``
+OpenIndiana  `OpenIndiana hipster repository`_             ``pkg install borg``
 openSUSE     `openSUSE official repository`_               ``zypper in python3-borgbackup``
-Fedora       `Fedora official repository`_                 ``dnf install borgbackup``
 OS X         `Brew cask`_                                  ``brew cask install borgbackup``
+Raspbian     `Raspbian testing`_                           ``apt install borgbackup``
 Ubuntu       `16.04`_, backports (PPA): `15.10`_, `14.04`_ ``apt install borgbackup``
 ============ ============================================= =======
 
@@ -50,26 +62,27 @@ Ubuntu       `16.04`_, backports (PPA): `15.10`_, `14.04`_ ``apt install borgbac
 .. _jessie-backports: https://packages.debian.org/jessie-backports/borgbackup
 .. _stretch: https://packages.debian.org/stretch/borgbackup
 .. _sid: https://packages.debian.org/sid/borgbackup
+.. _Fedora official repository: https://apps.fedoraproject.org/packages/borgbackup
+.. _EPEL: https://admin.fedoraproject.org/pkgdb/package/rpms/borgbackup/
+.. _FreeBSD ports: http://www.freshports.org/archivers/py-borgbackup/
 .. _ebuild: https://packages.gentoo.org/packages/app-backup/borgbackup
-.. _Ports-Tree: http://www.freshports.org/archivers/py-borgbackup/
+.. _GNU Guix: https://www.gnu.org/software/guix/package-list.html#borg
 .. _pkgsrc: http://pkgsrc.se/sysutils/py-borgbackup
-.. _16.04: https://launchpad.net/ubuntu/xenial/+source/borgbackup
-.. _15.10: https://launchpad.net/~costamagnagianfranco/+archive/ubuntu/borgbackup
-.. _14.04: https://launchpad.net/~costamagnagianfranco/+archive/ubuntu/borgbackup
+.. _cauldron: http://madb.mageia.org/package/show/application/0/release/cauldron/name/borgbackup
 .. _.nix file: https://github.com/NixOS/nixpkgs/blob/master/pkgs/tools/backup/borg/default.nix
 .. _OpenBSD ports: http://cvsweb.openbsd.org/cgi-bin/cvsweb/ports/sysutils/borgbackup/
+.. _OpenIndiana hipster repository: http://pkg.openindiana.org/hipster/en/search.shtml?token=borg&action=Search
 .. _openSUSE official repository: http://software.opensuse.org/package/borgbackup
-.. _Fedora official repository: https://apps.fedoraproject.org/packages/borgbackup
 .. _Brew cask: http://caskroom.io/
-.. _GNU Guix: https://www.gnu.org/software/guix/package-list.html#borg
+.. _Raspbian testing: http://archive.raspbian.org/raspbian/pool/main/b/borgbackup/
+.. _16.04: https://launchpad.net/ubuntu/xenial/+source/borgbackup
+.. _15.10: https://launchpad.net/~costamagnagianfranco/+archive/ubuntu/borgbackup
+.. _14.04: https://launchpad.net/~costamagnagianfranco/+archive/ubuntu/borgbackup
 
 Please ask package maintainers to build a package or, if you can package /
 submit it yourself, please help us with that! See :issue:`105` on
 github to followup on packaging efforts.
 
-If a package is available, it might be interesting to check its version
-and compare that to our latest release and review the :doc:`changes`.
-
 .. _pyinstaller-binary:
 
 Standalone Binary
@@ -219,15 +232,14 @@ Cygwin
 
 .. note::
     Running under Cygwin is experimental and has only been tested with Cygwin
-    (x86-64) v2.1.0.
+    (x86-64) v2.5.2.
 
 Use the Cygwin installer to install the dependencies::
 
     python3 python3-setuptools
-    python3-cython  # not needed for releases
     binutils gcc-g++
     libopenssl openssl-devel
-    liblz4_1 liblz4-devel  # from cygwinports.org
+    liblz4_1 liblz4-devel
     git make openssh
 
 You can then install ``pip`` and ``virtualenv``::
@@ -235,10 +247,6 @@ You can then install ``pip`` and ``virtualenv``::
     easy_install-3.4 pip
     pip install virtualenv
 
-In case the creation of the virtual environment fails, try deleting this file::
-
-    /usr/lib/python3.4/__pycache__/platform.cpython-34.pyc
-
 
 .. _pip-installation:
 

+ 2 - 0
docs/internals.rst

@@ -1,4 +1,5 @@
 .. include:: global.rst.inc
+.. highlight:: none
 .. _internals:
 
 Internals
@@ -280,6 +281,7 @@ emptied to 25%, its size is shrinked. So operations on it have a variable
 complexity between constant and linear with low factor, and memory overhead
 varies between 33% and 300%.
 
+.. _cache-memory-usage:
 
 Indexes / Caches memory usage
 -----------------------------

+ 41 - 0
docs/misc/internals-picture.txt

@@ -0,0 +1,41 @@
+BorgBackup from 10.000m
+=======================
+
++--------+ +--------+     +--------+
+|archive0| |archive1| ... |archiveN|
++--------+ +--------+     +--+-----+
+    |          |             |
+    |          |             |
+    |      +---+             |
+    |      |                 |
+    |      |                 |
+    +------+-------+         |
+    |      |       |         |
+ /chunk\/chunk\/chunk\...   /maybe different chunks lists\ 
++-----------------------------------------------------------------+
+|item list                                                        |
++-----------------------------------------------------------------+
+    |                                                       
+    +-------------------------------------+--------------+  
+    |                                     |              |  
+    |                                     |              |  
++-------------+                     +-------------+      |  
+|item0        |                     |item1        |      |  
+| - owner     |                     | - owner     |      |  
+| - size      |                     | - size      |     ... 
+| - ...       |                     | - ...       |         
+| - chunks    |                     | - chunks    |         
++----+--------+                     +-----+-------+         
+     |                                    |                 
+     | +-----+----------------------------+-----------------+
+     | |     |                                              |
+     +-o-----o------------+                                 |
+     | |     |            |                                 |
+  /chunk0\/chunk1\ ... /chunkN\     /chunk0\/chunk1\ ... /chunkN'\
+ +-----------------------------+   +------------------------------+
+ |file0                        |   |file0'                        |
+ +-----------------------------+   +------------------------------+
+
+
+Thanks to anarcat for drawing the picture!
+

+ 93 - 0
docs/misc/prune-example.txt

@@ -0,0 +1,93 @@
+borg prune visualized
+=====================
+
+Assume it is 2016-01-01, today's backup has not yet been made and you have
+created at least one backup on each day in 2015 except on 2015-12-20 (no
+backup made on that day).
+
+This is what borg prune --keep-daily 14 --keep-monthly 6 would keep.
+
+Backups kept by the --keep-daily rule are marked by a "d" to the right,
+backups kept by the --keep-monthly rule are marked by a "m" to the right.
+
+Calendar view
+-------------
+
+                            2015
+      January               February               March          
+Mo Tu We Th Fr Sa Su  Mo Tu We Th Fr Sa Su  Mo Tu We Th Fr Sa Su  
+          1  2  3  4                     1                     1  
+ 5  6  7  8  9 10 11   2  3  4  5  6  7  8   2  3  4  5  6  7  8  
+12 13 14 15 16 17 18   9 10 11 12 13 14 15   9 10 11 12 13 14 15  
+19 20 21 22 23 24 25  16 17 18 19 20 21 22  16 17 18 19 20 21 22  
+26 27 28 29 30 31     23 24 25 26 27 28     23 24 25 26 27 28 29  
+                                            30 31                 
+
+       April                  May                   June          
+Mo Tu We Th Fr Sa Su  Mo Tu We Th Fr Sa Su  Mo Tu We Th Fr Sa Su  
+       1  2  3  4  5               1  2  3   1  2  3  4  5  6  7  
+ 6  7  8  9 10 11 12   4  5  6  7  8  9 10   8  9 10 11 12 13 14  
+13 14 15 16 17 18 19  11 12 13 14 15 16 17  15 16 17 18 19 20 21  
+20 21 22 23 24 25 26  18 19 20 21 22 23 24  22 23 24 25 26 27 28  
+27 28 29 30           25 26 27 28 29 30 31  29 30m                
+                                                                  
+
+        July                 August              September        
+Mo Tu We Th Fr Sa Su  Mo Tu We Th Fr Sa Su  Mo Tu We Th Fr Sa Su  
+       1  2  3  4  5                  1  2      1  2  3  4  5  6  
+ 6  7  8  9 10 11 12   3  4  5  6  7  8  9   7  8  9 10 11 12 13  
+13 14 15 16 17 18 19  10 11 12 13 14 15 16  14 15 16 17 18 19 20  
+20 21 22 23 24 25 26  17 18 19 20 21 22 23  21 22 23 24 25 26 27  
+27 28 29 30 31m       24 25 26 27 28 29 30  28 29 30m             
+                      31m                                         
+
+      October               November              December        
+Mo Tu We Th Fr Sa Su  Mo Tu We Th Fr Sa Su  Mo Tu We Th Fr Sa Su  
+          1  2  3  4                     1      1  2  3  4  5  6  
+ 5  6  7  8  9 10 11   2  3  4  5  6  7  8   7  8  9 10 11 12 13  
+12 13 14 15 16 17 18   9 10 11 12 13 14 15  14 15 16 17d18d19d20  
+19 20 21 22 23 24 25  16 17 18 19 20 21 22  21d22d23d24d25d26d27d 
+26 27 28 29 30 31m    23 24 25 26 27 28 29  28d29d30d31d           
+                      30m                                          
+
+List view
+---------
+
+--keep-daily 14     --keep-monthly 6
+-------------------------------------------------
+ 1. 2015-12-31          (2015-12-31 kept by daily rule)
+ 2. 2015-12-30       1. 2015-11-30
+ 3. 2015-12-29       2. 2015-10-31
+ 4. 2015-12-28       3. 2015-09-30
+ 5. 2015-12-27       4. 2015-08-31
+ 6. 2015-12-26       5. 2015-07-31
+ 7. 2015-12-25       6. 2015-06-30
+ 8. 2015-12-24
+ 9. 2015-12-23
+10. 2015-12-22
+11. 2015-12-21
+    (no backup made on 2015-12-20)
+12. 2015-12-19
+13. 2015-12-18
+14. 2015-12-17
+
+
+Notes
+-----
+
+2015-12-31 is kept due to the --keep-daily 14 rule (because it is applied
+first), not due to the --keep-monthly rule.
+
+Because of that, the --keep-monthly 6 rule keeps Nov, Oct, Sep, Aug, Jul and
+Jun. December is not considered for this rule, because that backup was already
+kept because of the daily rule.
+
+2015-12-17 is kept to satisfy the --keep-daily 14 rule - because no backup was
+made on 2015-12-20. If a backup had been made on that day, it would not keep
+the one from 2015-12-17.
+
+We did not include yearly, weekly, hourly, minutely or secondly rules to keep
+this example simple. They all work in basically the same way.
+
+The weekly rule is easy to understand roughly, but hard to understand in all
+details. If interested, read "ISO 8601:2000 standard week-based year".

+ 18 - 12
docs/quickstart.rst

@@ -1,4 +1,5 @@
 .. include:: global.rst.inc
+.. highlight:: bash
 .. _quickstart:
 
 Quick Start
@@ -11,11 +12,15 @@ The next section continues by showing how backups can be automated.
 Important note about free space
 -------------------------------
 
-Before you start creating backups, please make sure that there is **always**
+Before you start creating backups, please make sure that there is *always*
 a good amount of free space on the filesystem that has your backup repository
-(and also on ~/.cache). It is hard to tell how much, maybe 1-5%.
+(and also on ~/.cache). A few GB should suffice for most hard-drive sized
+repositories. See also :ref:`cache-memory-usage`.
 
-If you run out of disk space, it can be hard or impossible to free space,
+If |project_name| runs out of disk space, it tries to free as much space as it
+can while aborting the current operation safely, which allows to free more space
+by deleting/pruning archives. This mechanism is not bullet-proof though.
+If you *really* run out of disk space, it can be hard or impossible to free space,
 because |project_name| needs free space to operate - even to delete backup
 archives. There is a ``--save-space`` option for some commands, but even with
 that |project_name| will need free space to operate.
@@ -103,10 +108,11 @@ Automating backups
 
 The following example script backs up ``/home`` and ``/var/www`` to a remote
 server. The script also uses the :ref:`borg_prune` subcommand to maintain a
-certain number of old archives::
+certain number of old archives:
 
-    #!/bin/sh
+::
 
+    #!/bin/sh
     # setting this, so the repo does not need to be given on the commandline:
     export BORG_REPO=username@remoteserver.com:backup
 
@@ -115,18 +121,18 @@ certain number of old archives::
     export BORG_PASSPHRASE=mysecret
 
     # Backup most important stuff:
-    borg create --stats -C lz4 ::`hostname`-`date +%Y-%m-%d` \
-        /etc                                                    \
-        /home                                                   \
-        /var                                                    \
-        --exclude '/home/*/.cache'                              \
+    borg create --stats -C lz4 ::'{hostname}-{now:%Y-%m-%d}' \
+        /etc                                                 \
+        /home                                                \
+        /var                                                 \
+        --exclude '/home/*/.cache'                           \
         --exclude '*.pyc'
 
     # Use the `prune` subcommand to maintain 7 daily, 4 weekly and 6 monthly
-    # archives of THIS machine. Using --prefix is very important to
+    # archives of THIS machine. The '{hostname}-' prefix is very important to
     # limit prune's operation to this machine's archives and not apply to
     # other machine's archives also.
-    borg prune -v --prefix `hostname`- \
+    borg prune -v --prefix '{hostname}-' \
         --keep-daily=7 --keep-weekly=4 --keep-monthly=6
 
 .. backup_compression:

+ 7 - 13
docs/resources.rst

@@ -17,25 +17,19 @@ Some of them refer to attic, but you can do the same stuff (and more) with borgb
 
 - `TW's slides for borgbackup talks / lightning talks <https://slides.com/thomaswaldmann>`_ (just grab the latest ones)
 
-- "Attic / Borg Backup" talk from GPN 2015 (video, german audio, english slides):
-  `media.ccc.de <https://media.ccc.de/browse/conferences/gpn/gpn15/gpn15-6942-attic_borg_backup.html#video>`_
-  or
-  `youtube <https://www.youtube.com/watch?v=Nb5nXEKSN-k>`_
+- `Attic / Borg Backup talk from GPN 2015 (media.ccc.de) <https://media.ccc.de/browse/conferences/gpn/gpn15/gpn15-6942-attic_borg_backup.html#video>`_
+- `Attic / Borg Backup talk from GPN 2015 (youtube) <https://www.youtube.com/watch?v=Nb5nXEKSN-k>`_
 
-- "Attic" talk from Easterhegg 2015 (video, german audio, english slides):
-  `media.ccc.de <https://media.ccc.de/v/eh15_-_49_-__-_saal_-_201504042130_-_attic_-_the_holy_grail_of_backups_-_thomas#video>`_
-  or
-  `youtube <https://www.youtube.com/watch?v=96VEAAFDtJw>`_
+- `Attic talk from Easterhegg 2015 (media.ccc.de) <https://media.ccc.de/v/eh15_-_49_-__-_saal_-_201504042130_-_attic_-_the_holy_grail_of_backups_-_thomas#video>`_
+- `Attic talk from Easterhegg 2015 (youtube) <https://www.youtube.com/watch?v=96VEAAFDtJw>`_
 
-- "Attic Backup: Mount your encrypted backups over ssh", 2014 (video, english):
-  `youtube <https://www.youtube.com/watch?v=BVXDFv9YMp8>`_
+- `Attic Backup: Mount your encrypted backups over ssh (youtube) <https://www.youtube.com/watch?v=BVXDFv9YMp8>`_
 
-- "Evolution of Borg", Oct 2015 (gource visualization of attic and borg development):
-  `youtube <https://www.youtube.com/watch?v=K4k_4wDkG6Q>`_
+- `Evolution of Borg (youtube) <https://www.youtube.com/watch?v=K4k_4wDkG6Q>`_
 
 Software
 --------
 
 - `BorgWeb - a very simple web UI for BorgBackup <https://borgweb.readthedocs.io/>`_
 - some other stuff found at the `BorgBackup Github organisation <https://github.com/borgbackup/>`_
-- `atticmatic <https://github.com/witten/atticmatic/>`_ (includes borgmatic)
+- `borgmatic <https://torsion.org/borgmatic/>`_ - simple wrapper script for BorgBackup that creates and prunes backups

+ 8 - 2
docs/support.rst

@@ -16,13 +16,19 @@ ticket on the project's `issue tracker`_.
 
 For more general questions or discussions, IRC or mailing list are preferred.
 
-IRC
----
+Chat (IRC)
+----------
 Join us on channel #borgbackup on chat.freenode.net.
 
 As usual on IRC, just ask or tell directly and then patiently wait for replies.
 Stay connected.
 
+You could use the following link (after connecting, you can change the random
+nickname you get by typing "/nick mydesirednickname"):
+
+http://webchat.freenode.net/?randomnick=1&channels=%23borgbackup&uio=MTY9dHJ1ZSY5PXRydWUa8
+
+
 Mailing list
 ------------
 

+ 35 - 19
docs/usage.rst

@@ -1,4 +1,5 @@
 .. include:: global.rst.inc
+.. highlight:: none
 .. _detailed_usage:
 
 Usage
@@ -79,6 +80,9 @@ General:
     BORG_RSH
         When set, use this command instead of ``ssh``. This can be used to specify ssh options, such as
         a custom identity file ``ssh -i /path/to/private/key``. See ``man ssh`` for other options.
+    BORG_REMOTE_PATH
+        When set, use the given path/filename as remote path (default is "borg").
+        Using ``--remote-path PATH`` commandline option overrides the environment variable.
     TMPDIR
         where temporary files are stored (might need a lot of temporary space for some operations)
 
@@ -446,14 +450,17 @@ prefix "foo" if you do not also want to match "foobar".
 It is strongly recommended to always run ``prune --dry-run ...`` first so you
 will see what it would do without it actually doing anything.
 
+There is also a visualized prune example in ``docs/misc/prune-example.txt``.
+
 ::
 
     # Keep 7 end of day and 4 additional end of week archives.
     # Do a dry-run without actually deleting anything.
     $ borg prune --dry-run --keep-daily=7 --keep-weekly=4 /path/to/repo
 
-    # Same as above but only apply to archive names starting with "foo":
-    $ borg prune --keep-daily=7 --keep-weekly=4 --prefix=foo /path/to/repo
+    # Same as above but only apply to archive names starting with the hostname
+    # of the machine followed by a "-" character:
+    $ borg prune --keep-daily=7 --keep-weekly=4 --prefix='{hostname}-' /path/to/repo
 
     # Keep 7 end of day, 4 additional end of week archives,
     # and an end of month archive for every month:
@@ -735,32 +742,34 @@ For more details, see :ref:`chunker_details`.
 --read-special
 ~~~~~~~~~~~~~~
 
-The option ``--read-special`` is not intended for normal, filesystem-level (full or
-partly-recursive) backups. You only give this option if you want to do something
-rather ... special -- and if you have hand-picked some files that you want to treat
-that way.
+The --read-special option is special - you do not want to use it for normal
+full-filesystem backups, but rather after carefully picking some targets for it.
 
-``borg create --read-special`` will open all files without doing any special
-treatment according to the file type (the only exception here are directories:
-they will be recursed into). Just imagine what happens if you do ``cat
-filename`` --- the content you will see there is what borg will backup for that
-filename.
+The option ``--read-special`` triggers special treatment for block and char
+device files as well as FIFOs. Instead of storing them as such a device (or
+FIFO), they will get opened, their content will be read and in the backup
+archive they will show up like a regular file.
 
-So, for example, symlinks will be followed, block device content will be read,
-named pipes / UNIX domain sockets will be read.
+Symlinks will also get special treatment if (and only if) they point to such
+a special file: instead of storing them as a symlink, the target special file
+will get processed as described above.
 
-You need to be careful with what you give as filename when using ``--read-special``,
-e.g. if you give ``/dev/zero``, your backup will never terminate.
+One intended use case of this is backing up the contents of one or multiple
+block devices, like e.g. LVM snapshots or inactive LVs or disk partitions.
 
-The given files' metadata is saved as it would be saved without
-``--read-special`` (e.g. its name, its size [might be 0], its mode, etc.) -- but
-additionally, also the content read from it will be saved for it.
+You need to be careful about what you include when using ``--read-special``,
+e.g. if you include ``/dev/zero``, your backup will never terminate.
 
 Restoring such files' content is currently only supported one at a time via
 ``--stdout`` option (and you have to redirect stdout to where ever it shall go,
 maybe directly into an existing device file of your choice or indirectly via
 ``dd``).
 
+To some extent, mounting a backup archive with the backups of special files
+via ``borg mount`` and then loop-mounting the image files from inside the mount
+point will work. If you plan to access a lot of data in there, it likely will
+scale and perform better if you do not work via the FUSE mount.
+
 Example
 +++++++
 
@@ -797,7 +806,7 @@ Now, let's see how to restore some LVs from such a backup. ::
     $ borg extract --stdout /path/to/repo::arch dev/vg0/home-snapshot > /dev/vg0/home
 
 
-.. _append-only-mode:
+.. _append_only_mode:
 
 Append-only mode
 ~~~~~~~~~~~~~~~~
@@ -814,6 +823,13 @@ To activate append-only mode, edit the repository ``config`` file and add a line
 In append-only mode Borg will create a transaction log in the ``transactions`` file,
 where each line is a transaction and a UTC timestamp.
 
+In addition, ``borg serve`` can act as if a repository is in append-only mode with
+its option ``--append-only``. This can be very useful for fine-tuning access control
+in ``.ssh/authorized_keys`` ::
+
+    command="borg serve --append-only ..." ssh-rsa <key used for not-always-trustable backup clients>
+    command="borg serve ..." ssh-rsa <key used for backup management>
+
 Example
 +++++++
 

+ 2 - 0
docs/usage/break-lock.rst.inc

@@ -1,3 +1,5 @@
+.. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit!
+
 .. _borg_break-lock:
 
 borg break-lock

+ 2 - 0
docs/usage/change-passphrase.rst.inc

@@ -1,3 +1,5 @@
+.. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit!
+
 .. _borg_change-passphrase:
 
 borg change-passphrase

+ 24 - 3
docs/usage/check.rst.inc

@@ -1,3 +1,5 @@
+.. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit!
+
 .. _borg_check:
 
 borg check
@@ -15,6 +17,8 @@ optional arguments
         | only perform repository checks
     ``--archives-only``
         | only perform archives checks
+    ``--verify-data``
+        | perform cryptographic archive data integrity verification (conflicts with --repository-only)
     ``--repair``
         | attempt to repair any inconsistencies found
     ``--save-space``
@@ -23,6 +27,8 @@ optional arguments
         | only check last N archives (Default: all)
     ``-P``, ``--prefix``
         | only consider archive names starting with this prefix
+    ``-p``, ``--progress``
+        | show progress display while checking
 
 `Common options`_
     |
@@ -53,9 +59,12 @@ Second, the consistency and correctness of the archive metadata is verified:
 - Check if archive metadata chunk is present. if not, remove archive from
   manifest.
 - For all files (items) in the archive, for all chunks referenced by these
-  files, check if chunk is present (if not and we are in repair mode, replace
-  it with a same-size chunk of zeros). This requires reading of archive and
-  file metadata, but not data.
+  files, check if chunk is present.
+  If a chunk is not present and we are in repair mode, replace it with a same-size
+  replacement chunk of zeros.
+  If a previously lost chunk reappears (e.g. via a later backup) and we are in
+  repair mode, the all-zero replacement chunk will be replaced by the correct chunk.
+  This requires reading of archive and file metadata, but not data.
 - If we are in repair mode and we checked all the archives: delete orphaned
   chunks from the repo.
 - if you use a remote repo server via ssh:, the archive check is executed on
@@ -64,3 +73,15 @@ Second, the consistency and correctness of the archive metadata is verified:
   required).
 - The archive checks can be time consuming, they can be skipped using the
   --repository-only option.
+
+The --verify-data option will perform a full integrity verification (as opposed to
+checking the CRC32 of the segment) of data, which means reading the data from the
+repository, decrypting and decompressing it. This is a cryptographic verification,
+which will detect (accidental) corruption. For encrypted repositories it is
+tamper-resistant as well, unless the attacker has access to the keys.
+
+It is also very slow.
+
+--verify-data only verifies data used by the archives specified with --last,
+--prefix or an explicitly named archive. If none of these are passed,
+all data in the repository is verified.

+ 10 - 3
docs/usage/create.rst.inc

@@ -1,3 +1,5 @@
+.. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit!
+
 .. _borg_create:
 
 borg create
@@ -47,7 +49,7 @@ Filesystem options
     ``--ignore-inode``
         | ignore inode data in the file metadata cache used to detect unchanged files.
     ``--read-special``
-        | open and read special files as if they were regular files
+        | open and read block and char device files as well as FIFOs as if they were regular files. Also follows symlinks pointing to these kinds of files.
 
 Archive options
     ``--comment COMMENT``
@@ -55,17 +57,21 @@ Archive options
     ``--timestamp yyyy-mm-ddThh:mm:ss``
         | manually specify the archive creation date/time (UTC). alternatively, give a reference file/directory.
     ``-c SECONDS``, ``--checkpoint-interval SECONDS``
-        | write checkpoint every SECONDS seconds (Default: 300)
+        | write checkpoint every SECONDS seconds (Default: 1800)
     ``--chunker-params CHUNK_MIN_EXP,CHUNK_MAX_EXP,HASH_MASK_BITS,HASH_WINDOW_SIZE``
         | specify the chunker parameters. default: 19,23,21,4095
     ``-C COMPRESSION``, ``--compression COMPRESSION``
         | select compression algorithm (and level):
         | none == no compression (default),
+        | auto,C[,L] == built-in heuristic decides between none or C[,L] - with C[,L]
+        |               being any valid compression algorithm (and optional level),
         | lz4 == lz4,
         | zlib == zlib (default level 6),
         | zlib,0 .. zlib,9 == zlib (with level 0..9),
         | lzma == lzma (default level 6),
         | lzma,0 .. lzma,9 == lzma (with level 0..9).
+    ``--compression-from COMPRESSIONCONFIG``
+        | read compression patterns from COMPRESSIONCONFIG, one per line
 
 Description
 ~~~~~~~~~~~
@@ -79,7 +85,7 @@ The archive name needs to be unique. It must not end in '.checkpoint' or
 checkpoints and treated in special ways.
 
 In the archive name, you may use the following format tags:
-{now}, {utcnow}, {fqdn}, {hostname}, {user}, {pid}
+{now}, {utcnow}, {fqdn}, {hostname}, {user}, {pid}, {uuid4}
 
 To speed up pulling backups over sshfs and similar network file systems which do
 not provide correct inode information the --ignore-inode flag can be used. This
@@ -87,3 +93,4 @@ potentially decreases reliability of change detection, while avoiding always rea
 all files on these file systems.
 
 See the output of the "borg help patterns" command for more help on exclude patterns.
+See the output of the "borg help placeholders" command for more help on placeholders.

+ 2 - 0
docs/usage/debug-delete-obj.rst.inc

@@ -1,3 +1,5 @@
+.. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit!
+
 .. _borg_debug-delete-obj:
 
 borg debug-delete-obj

+ 2 - 0
docs/usage/debug-dump-archive-items.rst.inc

@@ -1,3 +1,5 @@
+.. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit!
+
 .. _borg_debug-dump-archive-items:
 
 borg debug-dump-archive-items

+ 2 - 0
docs/usage/debug-get-obj.rst.inc

@@ -1,3 +1,5 @@
+.. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit!
+
 .. _borg_debug-get-obj:
 
 borg debug-get-obj

+ 2 - 0
docs/usage/debug-put-obj.rst.inc

@@ -1,3 +1,5 @@
+.. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit!
+
 .. _borg_debug-put-obj:
 
 borg debug-put-obj

+ 4 - 0
docs/usage/delete.rst.inc

@@ -1,3 +1,5 @@
+.. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit!
+
 .. _borg_delete:
 
 borg delete
@@ -17,6 +19,8 @@ optional arguments
         | print statistics for the deleted archive
     ``-c``, ``--cache-only``
         | delete only the local cache for the given repository
+    ``--force``
+        | force deletion of corrupted archives
     ``--save-space``
         | work slower, but using less space
 

+ 2 - 0
docs/usage/diff.rst.inc

@@ -1,3 +1,5 @@
+.. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit!
+
 .. _borg_diff:
 
 borg diff

+ 6 - 0
docs/usage/extract.rst.inc

@@ -1,3 +1,5 @@
+.. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit!
+
 .. _borg_extract:
 
 borg extract
@@ -42,3 +44,7 @@ by passing a list of ``PATHs`` as arguments. The file selection can further
 be restricted by using the ``--exclude`` option.
 
 See the output of the "borg help patterns" command for more help on exclude patterns.
+
+By using ``--dry-run``, you can do all extraction steps except actually writing the
+output data: reading metadata and data chunks from the repo, checking the hash/hmac,
+decrypting, decompressing.

+ 82 - 41
docs/usage/help.rst.inc

@@ -1,31 +1,71 @@
+.. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit!
+
+.. _borg_placeholders:
+
+borg help placeholders
+~~~~~~~~~~~~~~~~~~~~~~
+
+
+Repository (or Archive) URLs and --prefix values support these placeholders:
+
+{hostname}
+
+    The (short) hostname of the machine.
+
+{fqdn}
+
+    The full name of the machine.
+
+{now}
+
+    The current local date and time.
+
+{utcnow}
+
+    The current UTC date and time.
+
+{user}
+
+    The user name (or UID, if no name is available) of the user running borg.
+
+{pid}
+
+    The current process ID.
+
+Examples::
+
+    borg create /path/to/repo::{hostname}-{user}-{utcnow} ...
+    borg create /path/to/repo::{hostname}-{now:%Y-%m-%d_%H:%M:%S} ...
+    borg prune --prefix '{hostname}-' ...
+
 .. _borg_patterns:
 
 borg help patterns
 ~~~~~~~~~~~~~~~~~~
-::
 
 
 Exclusion patterns support four separate styles, fnmatch, shell, regular
-expressions and path prefixes. If followed by a colon (':') the first two
-characters of a pattern are used as a style selector. Explicit style
-selection is necessary when a non-default style is desired or when the
-desired pattern starts with two alphanumeric characters followed by a colon
-(i.e. `aa:something/*`).
+expressions and path prefixes. By default, fnmatch is used. If followed
+by a colon (':') the first two characters of a pattern are used as a
+style selector. Explicit style selection is necessary when a
+non-default style is desired or when the desired pattern starts with
+two alphanumeric characters followed by a colon (i.e. `aa:something/*`).
 
 `Fnmatch <https://docs.python.org/3/library/fnmatch.html>`_, selector `fm:`
 
-    These patterns use a variant of shell pattern syntax, with '*' matching
-    any number of characters, '?' matching any single character, '[...]'
-    matching any single character specified, including ranges, and '[!...]'
-    matching any character not specified. For the purpose of these patterns,
-    the path separator ('\' for Windows and '/' on other systems) is not
-    treated specially. Wrap meta-characters in brackets for a literal match
-    (i.e. `[?]` to match the literal character `?`). For a path to match
-    a pattern, it must completely match from start to end, or must match from
-    the start to just before a path separator. Except for the root path,
-    paths will never end in the path separator when matching is attempted.
-    Thus, if a given pattern ends in a path separator, a '*' is appended
-    before matching is attempted.
+    This is the default style.  These patterns use a variant of shell
+    pattern syntax, with '*' matching any number of characters, '?'
+    matching any single character, '[...]' matching any single
+    character specified, including ranges, and '[!...]' matching any
+    character not specified. For the purpose of these patterns, the
+    path separator ('\' for Windows and '/' on other systems) is not
+    treated specially. Wrap meta-characters in brackets for a literal
+    match (i.e. `[?]` to match the literal character `?`). For a path
+    to match a pattern, it must completely match from start to end, or
+    must match from the start to just before a path separator. Except
+    for the root path, paths will never end in the path separator when
+    matching is attempted.  Thus, if a given pattern ends in a path
+    separator, a '*' is appended before matching is attempted.
 
 Shell-style patterns, selector `sh:`
 
@@ -61,32 +101,33 @@ selector prefix is also supported for patterns loaded from a file. Due to
 whitespace removal paths with whitespace at the beginning or end can only be
 excluded using regular expressions.
 
-Examples:
+Examples::
+
+    # Exclude '/home/user/file.o' but not '/home/user/file.odt':
+    $ borg create -e '*.o' backup /
 
-# Exclude '/home/user/file.o' but not '/home/user/file.odt':
-$ borg create -e '*.o' backup /
+    # Exclude '/home/user/junk' and '/home/user/subdir/junk' but
+    # not '/home/user/importantjunk' or '/etc/junk':
+    $ borg create -e '/home/*/junk' backup /
 
-# Exclude '/home/user/junk' and '/home/user/subdir/junk' but
-# not '/home/user/importantjunk' or '/etc/junk':
-$ borg create -e '/home/*/junk' backup /
+    # Exclude the contents of '/home/user/cache' but not the directory itself:
+    $ borg create -e /home/user/cache/ backup /
 
-# Exclude the contents of '/home/user/cache' but not the directory itself:
-$ borg create -e /home/user/cache/ backup /
+    # The file '/home/user/cache/important' is *not* backed up:
+    $ borg create -e /home/user/cache/ backup / /home/user/cache/important
 
-# The file '/home/user/cache/important' is *not* backed up:
-$ borg create -e /home/user/cache/ backup / /home/user/cache/important
+    # The contents of directories in '/home' are not backed up when their name
+    # ends in '.tmp'
+    $ borg create --exclude 're:^/home/[^/]+\.tmp/' backup /
 
-# The contents of directories in '/home' are not backed up when their name
-# ends in '.tmp'
-$ borg create --exclude 're:^/home/[^/]+\.tmp/' backup /
+    # Load exclusions from file
+    $ cat >exclude.txt <<EOF
+    # Comment line
+    /home/*/junk
+    *.tmp
+    fm:aa:something/*
+    re:^/home/[^/]\.tmp/
+    sh:/home/*/.thumbnails
+    EOF
+    $ borg create --exclude-from exclude.txt backup /
 
-# Load exclusions from file
-$ cat >exclude.txt <<EOF
-# Comment line
-/home/*/junk
-*.tmp
-fm:aa:something/*
-re:^/home/[^/]\.tmp/
-sh:/home/*/.thumbnails
-EOF
-$ borg create --exclude-from exclude.txt backup /

+ 6 - 0
docs/usage/info.rst.inc

@@ -1,3 +1,5 @@
+.. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit!
+
 .. _borg_info:
 
 borg info
@@ -17,3 +19,7 @@ Description
 ~~~~~~~~~~~
 
 This command displays some detailed information about the specified archive.
+
+The "This archive" line refers exclusively to this archive:
+"Deduplicated size" is the size of the unique chunks stored only for this
+archive. Non-unique / common chunks show up under "All archives".

+ 44 - 1
docs/usage/init.rst.inc

@@ -1,3 +1,5 @@
+.. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit!
+
 .. _borg_init:
 
 borg init
@@ -22,4 +24,45 @@ Description
 
 This command initializes an empty repository. A repository is a filesystem
 directory containing the deduplicated data from zero or more archives.
-Encryption can be enabled at repository init time.
+
+Encryption can be enabled at repository init time (the default).
+
+It is not recommended to disable encryption. Repository encryption protects you
+e.g. against the case that an attacker has access to your backup repository.
+
+But be careful with the key / the passphrase:
+
+If you want "passphrase-only" security, use the repokey mode. The key will
+be stored inside the repository (in its "config" file). In above mentioned
+attack scenario, the attacker will have the key (but not the passphrase).
+
+If you want "passphrase and having-the-key" security, use the keyfile mode.
+The key will be stored in your home directory (in .config/borg/keys). In
+the attack scenario, the attacker who has just access to your repo won't have
+the key (and also not the passphrase).
+
+Make a backup copy of the key file (keyfile mode) or repo config file
+(repokey mode) and keep it at a safe place, so you still have the key in
+case it gets corrupted or lost. Also keep the passphrase at a safe place.
+The backup that is encrypted with that key won't help you with that, of course.
+
+Make sure you use a good passphrase. Not too short, not too simple. The real
+encryption / decryption key is encrypted with / locked by your passphrase.
+If an attacker gets your key, he can't unlock and use it without knowing the
+passphrase.
+
+Be careful with special or non-ascii characters in your passphrase:
+
+- Borg processes the passphrase as unicode (and encodes it as utf-8),
+  so it does not have problems dealing with even the strangest characters.
+- BUT: that does not necessarily apply to your OS / VM / keyboard configuration.
+
+So better use a long passphrase made from simple ascii chars than one that
+includes non-ascii stuff or characters that are hard/impossible to enter on
+a different keyboard layout.
+
+You can change your passphrase for existing repos at any time, it won't affect
+the encryption/decryption key or other secrets.
+
+When encrypting, AES-CTR-256 is used for encryption, and HMAC-SHA256 for
+authentication. Hardware acceleration will be used automatically.

+ 18 - 9
docs/usage/list.rst.inc

@@ -1,3 +1,5 @@
+.. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit!
+
 .. _borg_list:
 
 borg list
@@ -35,8 +37,22 @@ This command lists the contents of a repository or an archive.
 
 See the "borg help patterns" command for more help on exclude patterns.
 
-The following keys are available for --format when listing files:
+The following keys are available for --format:
+ - NEWLINE: OS dependent line separator
+ - NL: alias of NEWLINE
+ - NUL: NUL character for creating print0 / xargs -0 like output, see barchive/bpath
+ - SPACE
+ - TAB
+ - CR
+ - LF
+
+-- Keys for listing repository archives:
+ - archive: archive name interpreted as text (might be missing non-text characters, see barchive)
+ - barchive: verbatim archive name, can contain any character except NUL
+ - time: time of creation of the archive
+ - id: internal ID of the archive
 
+-- Keys for listing archive files:
  - type
  - mode
  - uid
@@ -47,6 +63,7 @@ The following keys are available for --format when listing files:
  - bpath: verbatim POSIX path, can contain any character except NUL
  - source: link target for links (identical to linktarget)
  - linktarget
+ - flags
 
  - size
  - csize: compressed size
@@ -70,11 +87,3 @@ The following keys are available for --format when listing files:
  - archiveid
  - archivename
  - extra: prepends {source} with " -> " for soft links and " link to " for hard links
-
- - NEWLINE: OS dependent line separator
- - NL: alias of NEWLINE
- - NUL: NUL character for creating print0 / xargs -0 like ouput, see bpath
- - SPACE
- - TAB
- - CR
- - LF

+ 2 - 0
docs/usage/migrate-to-repokey.rst.inc

@@ -1,3 +1,5 @@
+.. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit!
+
 .. _borg_migrate-to-repokey:
 
 borg migrate-to-repokey

+ 14 - 0
docs/usage/mount.rst.inc

@@ -1,3 +1,5 @@
+.. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit!
+
 .. _borg_mount:
 
 borg mount
@@ -35,3 +37,15 @@ used in fstab entries:
 
 To allow a regular user to use fstab entries, add the ``user`` option:
 ``/path/to/repo /mnt/point fuse.borgfs defaults,noauto,user 0 0``
+
+For mount options, see the fuse(8) manual page. Additional mount options
+supported by borg:
+
+- allow_damaged_files: by default damaged files (where missing chunks were
+  replaced with runs of zeros by borg check --repair) are not readable and
+  return EIO (I/O error). Set this option to read such files.
+
+The BORG_MOUNT_DATA_CACHE_ENTRIES environment variable is meant for advanced users
+to tweak the performance. It sets the number of cached data chunks; additional
+memory usage can be up to ~8 MiB times this number. The default is the number
+of CPU cores.

+ 34 - 12
docs/usage/prune.rst.inc

@@ -1,3 +1,5 @@
+.. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit!
+
 .. _borg_prune:
 
 borg prune
@@ -13,12 +15,18 @@ positional arguments
 optional arguments
     ``-n``, ``--dry-run``
         | do not change repository
+    ``--force``
+        | force pruning of corrupted archives
     ``-s``, ``--stats``
         | print statistics for the deleted archive
     ``--list``
         | output verbose list of archives it keeps/prunes
     ``--keep-within WITHIN``
         | keep all archives within this time interval
+    ``--keep-last``, ``--keep-secondly``
+        | number of secondly archives to keep
+    ``--keep-minutely``
+        | number of minutely archives to keep
     ``-H``, ``--keep-hourly``
         | number of hourly archives to keep
     ``-d``, ``--keep-daily``
@@ -40,17 +48,23 @@ optional arguments
 Description
 ~~~~~~~~~~~
 
-The prune command prunes a repository by deleting archives not matching
+The prune command prunes a repository by deleting all archives not matching
 any of the specified retention options. This command is normally used by
 automated backup scripts wanting to keep a certain number of historic backups.
 
-As an example, "-d 7" means to keep the latest backup on each day, up to 7
-most recent days with backups (days without backups do not count).
-The rules are applied from hourly to yearly, and backups selected by previous
-rules do not count towards those of later rules. The time that each backup
-completes is used for pruning purposes. Dates and times are interpreted in
-the local timezone, and weeks go from Monday to Sunday. Specifying a
-negative number of archives to keep means that there is no limit.
+Also, prune automatically removes checkpoint archives (incomplete archives left
+behind by interrupted backup runs) except if the checkpoint is the latest
+archive (and thus still needed). Checkpoint archives are not considered when
+comparing archive counts against the retention limits (--keep-*).
+
+If a prefix is set with -P, then only archives that start with the prefix are
+considered for deletion and only those archives count towards the totals
+specified by the rules.
+Otherwise, *all* archives in the repository are candidates for deletion!
+
+If you have multiple sequences of archives with different data sets (e.g.
+from different machines) in one shared repository, use one prune call per
+data set that matches only the respective archives using the -P option.
 
 The "--keep-within" option takes an argument of the form "<int><char>",
 where char is "H", "d", "w", "m", "y". For example, "--keep-within 2d" means
@@ -58,7 +72,15 @@ to keep all archives that were created within the past 48 hours.
 "1m" is taken to mean "31d". The archives kept with this option do not
 count towards the totals specified by any other options.
 
-If a prefix is set with -P, then only archives that start with the prefix are
-considered for deletion and only those archives count towards the totals
-specified by the rules.
-Otherwise, *all* archives in the repository are candidates for deletion!
+A good procedure is to thin out more and more the older your backups get.
+As an example, "--keep-daily 7" means to keep the latest backup on each day,
+up to 7 most recent days with backups (days without backups do not count).
+The rules are applied from secondly to yearly, and backups selected by previous
+rules do not count towards those of later rules. The time that each backup
+starts is used for pruning purposes. Dates and times are interpreted in
+the local timezone, and weeks go from Monday to Sunday. Specifying a
+negative number of archives to keep means that there is no limit.
+
+The "--keep-last N" option is doing the same as "--keep-secondly N" (and it will
+keep the last N archives under the assumption that you do not create more than one
+backup archive in the same second).

+ 6 - 0
docs/usage/recreate.rst.inc

@@ -1,3 +1,5 @@
+.. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit!
+
 .. _borg_recreate:
 
 borg recreate
@@ -47,11 +49,15 @@ Archive options
     ``-C COMPRESSION``, ``--compression COMPRESSION``
         | select compression algorithm (and level):
         | none == no compression (default),
+        | auto,C[,L] == built-in heuristic decides between none or C[,L] - with C[,L]
+        |               being any valid compression algorithm (and optional level),
         | lz4 == lz4,
         | zlib == zlib (default level 6),
         | zlib,0 .. zlib,9 == zlib (with level 0..9),
         | lzma == lzma (default level 6),
         | lzma,0 .. lzma,9 == lzma (with level 0..9).
+    ``--compression-from COMPRESSIONCONFIG``
+        | read compression patterns from COMPRESSIONCONFIG, one per line
     ``--chunker-params CHUNK_MIN_EXP,CHUNK_MAX_EXP,HASH_MASK_BITS,HASH_WINDOW_SIZE``
         | specify the chunker parameters (or "default").
 

+ 2 - 0
docs/usage/rename.rst.inc

@@ -1,3 +1,5 @@
+.. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit!
+
 .. _borg_rename:
 
 borg rename

+ 4 - 0
docs/usage/serve.rst.inc

@@ -1,3 +1,5 @@
+.. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit!
+
 .. _borg_serve:
 
 borg serve
@@ -9,6 +11,8 @@ borg serve
 optional arguments
     ``--restrict-to-path PATH``
         | restrict repository access to PATH
+    ``--append-only``
+        | only allow appending to repository segment files
 
 `Common options`_
     |

+ 2 - 0
docs/usage/upgrade.rst.inc

@@ -1,3 +1,5 @@
+.. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit!
+
 .. _borg_upgrade:
 
 borg upgrade

+ 2 - 0
docs/usage/with-lock.rst.inc

@@ -1,3 +1,5 @@
+.. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit!
+
 .. _borg_with-lock:
 
 borg with-lock

+ 5 - 0
requirements.d/attic.txt

@@ -0,0 +1,5 @@
+# Please note:
+# attic only builds using OpenSSL 1.0.x, it can not be installed using OpenSSL >= 1.1.0.
+# If attic is not installed, our unit tests will just skip the tests that require attic.
+attic
+

+ 61 - 0
scripts/glibc_check.py

@@ -0,0 +1,61 @@
+#!/usr/bin/env python3
+"""
+Check if all given binaries work with the given glibc version.
+
+check_glibc.py 2.11 bin [bin ...]
+"""
+
+import re
+import subprocess
+import sys
+
+verbose = True
+objdump = "objdump -T %s"
+glibc_re = re.compile(r'GLIBC_([0-9]\.[0-9]+)')
+
+
+def parse_version(v):
+    major, minor = v.split('.')
+    return int(major), int(minor)
+
+
+def format_version(version):
+    return "%d.%d" % version
+
+
+def main():
+    given = parse_version(sys.argv[1])
+    filenames = sys.argv[2:]
+
+    overall_versions = set()
+    for filename in filenames:
+        try:
+            output = subprocess.check_output(objdump % filename, shell=True,
+                                             stderr=subprocess.STDOUT)
+            output = output.decode('utf-8')
+            versions = set(parse_version(match.group(1))
+                           for match in glibc_re.finditer(output))
+            requires_glibc = max(versions)
+            overall_versions.add(requires_glibc)
+            if verbose:
+                print("%s %s" % (filename, format_version(requires_glibc)))
+        except subprocess.CalledProcessError as e:
+            if verbose:
+                print("%s errored." % filename)
+
+    wanted = max(overall_versions)
+    ok = given >= wanted
+
+    if verbose:
+        if ok:
+            print("The binaries work with the given glibc %s." %
+                  format_version(given))
+        else:
+            print("The binaries do not work with the given glibc %s. "
+                  "Minimum is: %s" % (format_version(given), format_version(wanted)))
+    return ok
+
+
+if __name__ == '__main__':
+    ok = main()
+    sys.exit(0 if ok else 1)

+ 0 - 0
src/borg/hash_sizes.py → scripts/hash_sizes.py


+ 2 - 0
scripts/release

@@ -0,0 +1,2 @@
+python setup.py register sdist upload --identity="Thomas Waldmann" --sign
+

+ 9 - 0
scripts/sign-binaries

@@ -0,0 +1,9 @@
+#!/bin/bash
+# usage: sign-binaries 201512312359
+
+for file in dist/borg-*; do
+    gpg --armor --detach-sign $file
+done
+
+touch -t $1 dist/*
+

+ 5 - 4
setup.py

@@ -35,9 +35,9 @@ extras_require = {
 }
 
 if sys.platform.startswith('freebsd'):
-    # while llfuse 1.0 is the latest llfuse release right now,
-    # llfuse 0.41.1 is the latest release that actually builds on freebsd:
-    extras_require['fuse'] = ['llfuse==0.41.1', ]
+    # llfuse was frequently broken / did not build on freebsd
+    # llfuse 0.41.1, 1.1 are ok
+    extras_require['fuse'] = ['llfuse <2.0, !=0.42.*, !=0.43, !=1.0', ]
 
 from setuptools import setup, find_packages, Extension
 from setuptools.command.sdist import sdist
@@ -206,12 +206,13 @@ class build_usage(Command):
         for command, parser in choices.items():
             print('generating help for %s' % command)
             with open('docs/usage/%s.rst.inc' % command, 'w') as doc:
+                doc.write(".. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit!\n\n")
                 if command == 'help':
                     for topic in Archiver.helptext:
                         params = {"topic": topic,
                                   "underline": '~' * len('borg help ' + topic)}
                         doc.write(".. _borg_{topic}:\n\n".format(**params))
-                        doc.write("borg help {topic}\n{underline}\n::\n\n".format(**params))
+                        doc.write("borg help {topic}\n{underline}\n\n".format(**params))
                         doc.write(Archiver.helptext[topic])
                 else:
                     params = {"command": command,

+ 7 - 1
src/borg/_hashindex.c

@@ -114,6 +114,8 @@ static int hashindex_delete(HashIndex *index, const void *key);
 static void *hashindex_next_key(HashIndex *index, const void *key);
 
 /* Private API */
+static void hashindex_free(HashIndex *index);
+
 static int
 hashindex_index(HashIndex *index, const void *key)
 {
@@ -162,7 +164,11 @@ hashindex_resize(HashIndex *index, int capacity)
         return 0;
     }
     while((key = hashindex_next_key(index, key))) {
-        hashindex_set(new, key, key + key_size);
+        if(!hashindex_set(new, key, key + key_size)) {
+            /* This can only happen if there's a bug in the code calculating capacity */
+            hashindex_free(new);
+            return 0;
+        }
     }
     free(index->buckets);
     index->buckets = new->buckets;

+ 412 - 222
src/borg/archive.py

@@ -4,6 +4,7 @@ import socket
 import stat
 import sys
 import time
+from contextlib import contextmanager
 from datetime import datetime, timezone
 from getpass import getuser
 from io import BytesIO
@@ -33,6 +34,7 @@ from .helpers import ProgressIndicatorPercent, log_multi
 from .helpers import PathPrefixPattern, FnmatchPattern
 from .helpers import consume
 from .helpers import CompressionDecider1, CompressionDecider2, CompressionSpec
+from .item import Item
 from .key import key_factory
 from .platform import acl_get, acl_set, set_flags, get_flags, swidth
 from .remote import cache_if_remote
@@ -89,7 +91,7 @@ class Statistics:
             columns, lines = get_terminal_size()
             if not final:
                 msg = '{0.osize_fmt} O {0.csize_fmt} C {0.usize_fmt} D {0.nfiles} N '.format(self)
-                path = remove_surrogates(item[b'path']) if item else ''
+                path = remove_surrogates(item.path) if item else ''
                 space = columns - swidth(msg)
                 if space < swidth('...') + swidth(path):
                     path = '%s...%s' % (path[:(space // 2) - swidth('...')], path[-space // 2:])
@@ -99,6 +101,50 @@ class Statistics:
             print(msg, file=stream or sys.stderr, end="\r", flush=True)
 
 
+def is_special(mode):
+    # file types that get special treatment in --read-special mode
+    return stat.S_ISBLK(mode) or stat.S_ISCHR(mode) or stat.S_ISFIFO(mode)
+
+
+class BackupOSError(Exception):
+    """
+    Wrapper for OSError raised while accessing backup files.
+
+    Borg does different kinds of IO, and IO failures have different consequences.
+    This wrapper represents failures of input file or extraction IO.
+    These are non-critical and are only reported (exit code = 1, warning).
+
+    Any unwrapped IO error is critical and aborts execution (for example repository IO failure).
+    """
+    def __init__(self, os_error):
+        self.os_error = os_error
+        self.errno = os_error.errno
+        self.strerror = os_error.strerror
+        self.filename = os_error.filename
+
+    def __str__(self):
+        return str(self.os_error)
+
+
+@contextmanager
+def backup_io():
+    """Context manager changing OSError to BackupOSError."""
+    try:
+        yield
+    except OSError as os_error:
+        raise BackupOSError(os_error) from os_error
+
+
+def backup_io_iter(iterator):
+    while True:
+        try:
+            with backup_io():
+                item = next(iterator)
+        except StopIteration:
+            return
+        yield item
+
+
 class DownloadPipeline:
 
     def __init__(self, repository, key):
@@ -109,16 +155,16 @@ class DownloadPipeline:
         unpacker = msgpack.Unpacker(use_list=False)
         for _, data in self.fetch_many(ids):
             unpacker.feed(data)
-            items = [decode_dict(item, ITEM_TEXT_KEYS) for item in unpacker]
+            items = [Item(internal_dict=item) for item in unpacker]
             if filter:
                 items = [item for item in items if filter(item)]
             for item in items:
-                if b'chunks' in item:
-                    item[b'chunks'] = [ChunkListEntry(*e) for e in item[b'chunks']]
+                if 'chunks' in item:
+                    item.chunks = [ChunkListEntry(*e) for e in item.chunks]
             if preload:
                 for item in items:
-                    if b'chunks' in item:
-                        self.repository.preload([c.id for c in item[b'chunks']])
+                    if 'chunks' in item:
+                        self.repository.preload([c.id for c in item.chunks])
             for item in items:
                 yield item
 
@@ -138,7 +184,7 @@ class ChunkBuffer:
         self.chunker = Chunker(self.key.chunk_seed, *chunker_params)
 
     def add(self, item):
-        self.buffer.write(self.packer.pack(StableDict(item)))
+        self.buffer.write(self.packer.pack(item.as_dict()))
         if self.is_full():
             self.flush()
 
@@ -289,9 +335,6 @@ Number of files: {0.stats.nfiles}'''.format(
             yield item
 
     def add_item(self, item):
-        unknown_keys = set(item) - ITEM_KEYS
-        assert not unknown_keys, ('unknown item metadata keys detected, please update constants.ITEM_KEYS: %s',
-                                  ','.join(k.decode('ascii') for k in unknown_keys))
         if self.show_progress:
             self.stats.show_progress(item=item, dt=0.2)
         self.items_buffer.add(item)
@@ -359,9 +402,10 @@ Number of files: {0.stats.nfiles}'''.format(
             _, data = self.key.decrypt(id, chunk)
             unpacker.feed(data)
             for item in unpacker:
-                if b'chunks' in item:
+                item = Item(internal_dict=item)
+                if 'chunks' in item:
                     stats.nfiles += 1
-                    add_file_chunks(item[b'chunks'])
+                    add_file_chunks(item.chunks)
         cache.rollback()
         return stats
 
@@ -376,22 +420,26 @@ Number of files: {0.stats.nfiles}'''.format(
         :param stdout: write extracted data to stdout
         :param sparse: write sparse files (chunk-granularity, independent of the original being sparse)
         :param hardlink_masters: maps paths to (chunks, link_target) for extracting subtrees with hardlinks correctly
-        :param original_path: b'path' key as stored in archive
+        :param original_path: 'path' key as stored in archive
         """
+        has_damaged_chunks = 'chunks_healthy' in item
         if dry_run or stdout:
-            if b'chunks' in item:
-                for _, data in self.pipeline.fetch_many([c.id for c in item[b'chunks']], is_preloaded=True):
+            if 'chunks' in item:
+                for _, data in self.pipeline.fetch_many([c.id for c in item.chunks], is_preloaded=True):
                     if stdout:
                         sys.stdout.buffer.write(data)
                 if stdout:
                     sys.stdout.buffer.flush()
+            if has_damaged_chunks:
+                logger.warning('File %s has damaged (all-zero) chunks. Try running borg check --repair.' %
+                               remove_surrogates(item[b'path']))
             return
 
-        original_path = original_path or item[b'path']
+        original_path = original_path or item.path
         dest = self.cwd
         if item[b'path'].startswith('/') or item[b'path'].startswith('..') or (sys.platform == 'win32' and len(item[b'path']) > 1 and item[b'path'][1] == ':'):
             raise Exception('Path should be relative and local')
-        path = os.path.join(dest, item[b'path'])
+        path = os.path.join(dest, item.path)
         # Attempt to remove existing files, ignore errors on failure
         try:
             st = os.lstat(path)
@@ -403,79 +451,96 @@ Number of files: {0.stats.nfiles}'''.format(
             raise self.IncompatibleFilesystemEncodingError(path, sys.getfilesystemencoding()) from None
         except OSError:
             pass
-        mode = item[b'mode']
+        mode = item.mode
         if stat.S_ISREG(mode):
-            if not os.path.exists(os.path.dirname(path)):
-                os.makedirs(os.path.dirname(path))
-
+            with backup_io():
+                if not os.path.exists(os.path.dirname(path)):
+                    os.makedirs(os.path.dirname(path))
             # Hard link?
-            if b'source' in item:
-                source = os.path.join(dest, item[b'source'])
-                if os.path.exists(path):
-                    os.unlink(path)
-                if not hardlink_masters:
-                    os.link(source, path)
-                    return
-                item[b'chunks'], link_target = hardlink_masters[item[b'source']]
+            if 'source' in item:
+                source = os.path.join(dest, item.source)
+                with backup_io():
+                    if os.path.exists(path):
+                        os.unlink(path)
+                    if not hardlink_masters:
+                        os.link(source, path)
+                        return
+                item.chunks, link_target = hardlink_masters[item.source]
                 if link_target:
                     # Hard link was extracted previously, just link
-                    os.link(link_target, path)
+                    with backup_io():
+                        os.link(link_target, path)
                     return
                 # Extract chunks, since the item which had the chunks was not extracted
-            with open(path, 'wb') as fd:
-                ids = [c.id for c in item[b'chunks']]
+            with backup_io():
+                fd = open(path, 'wb')
+            with fd:
+                ids = [c.id for c in item.chunks]
                 for _, data in self.pipeline.fetch_many(ids, is_preloaded=True):
-                    if sparse and self.zeros.startswith(data):
-                        # all-zero chunk: create a hole in a sparse file
-                        fd.seek(len(data), 1)
+                    with backup_io():
+                        if sparse and self.zeros.startswith(data):
+                            # all-zero chunk: create a hole in a sparse file
+                            fd.seek(len(data), 1)
+                        else:
+                            fd.write(data)
+                with backup_io():
+                    pos = fd.tell()
+                    fd.truncate(pos)
+                    fd.flush()
+                    if sys.platform != 'win32':
+                        self.restore_attrs(path, item, fd=fd.fileno())
                     else:
-                        fd.write(data)
-                pos = fd.tell()
-                fd.truncate(pos)
-                fd.flush()
-                if sys.platform != 'win32':
-                    self.restore_attrs(path, item, fd=fd.fileno())
-                else:
-                    # File needs to be closed or timestamps are rewritten at close
-                    fd.close()
-                    self.restore_attrs(path, item)
+                        # File needs to be closed or timestamps are rewritten at close
+                        fd.close()
+                        self.restore_attrs(path, item)
+            if has_damaged_chunks:
+                logger.warning('File %s has damaged (all-zero) chunks. Try running borg check --repair.' %
+                               remove_surrogates(item.path))
             if hardlink_masters:
                 # Update master entry with extracted file path, so that following hardlinks don't extract twice.
-                hardlink_masters[item.get(b'source') or original_path] = (None, path)
-        elif stat.S_ISDIR(mode):
-            if not os.path.exists(path):
-                os.makedirs(path)
-            if restore_attrs:
+                hardlink_masters[item.get('source') or original_path] = (None, path)
+            return
+        with backup_io():
+            # No repository access beyond this point.
+            if stat.S_ISDIR(mode):
+                if not os.path.exists(path):
+                    os.makedirs(path)
+                if restore_attrs:
+                    self.restore_attrs(path, item)
+            elif stat.S_ISLNK(mode):
+                if not os.path.exists(os.path.dirname(path)):
+                    os.makedirs(os.path.dirname(path))
+                source = item.source
+                if os.path.exists(path):
+                    os.unlink(path)
+                try:
+                    os.symlink(source, path)
+                except UnicodeEncodeError:
+                    raise self.IncompatibleFilesystemEncodingError(source, sys.getfilesystemencoding()) from None
+                self.restore_attrs(path, item, symlink=True)
+            elif stat.S_ISFIFO(mode):
+                if not os.path.exists(os.path.dirname(path)):
+                    os.makedirs(os.path.dirname(path))
+                os.mkfifo(path)
                 self.restore_attrs(path, item)
-        elif stat.S_ISLNK(mode):
-            if not os.path.exists(os.path.dirname(path)):
-                os.makedirs(os.path.dirname(path))
-            source = item[b'source']
-            if os.path.exists(path):
-                os.unlink(path)
-            try:
-                os.symlink(source, path)
-            except UnicodeEncodeError:
-                raise self.IncompatibleFilesystemEncodingError(source, sys.getfilesystemencoding()) from None
-            self.restore_attrs(path, item, symlink=True)
-        elif stat.S_ISFIFO(mode):
-            if not os.path.exists(os.path.dirname(path)):
-                os.makedirs(os.path.dirname(path))
-            os.mkfifo(path)
-            self.restore_attrs(path, item)
-        elif stat.S_ISCHR(mode) or stat.S_ISBLK(mode):
-            os.mknod(path, item[b'mode'], item[b'rdev'])
-            self.restore_attrs(path, item)
-        else:
-            raise Exception('Unknown archive item type %r' % item[b'mode'])
+            elif stat.S_ISCHR(mode) or stat.S_ISBLK(mode):
+                os.mknod(path, item.mode, item.rdev)
+                self.restore_attrs(path, item)
+            else:
+                raise Exception('Unknown archive item type %r' % item.mode)
 
     def restore_attrs(self, path, item, symlink=False, fd=None):
+        """
+        Restore filesystem attributes on *path* (*fd*) from *item*.
+
+        Does not access the repository.
+        """
         uid = gid = None
         if not self.numeric_owner:
-            uid = user2uid(item[b'user'])
-            gid = group2gid(item[b'group'])
-        uid = item[b'uid'] if uid is None else uid
-        gid = item[b'gid'] if gid is None else gid
+            uid = user2uid(item.user)
+            gid = group2gid(item.group)
+        uid = item.uid if uid is None else uid
+        gid = item.gid if gid is None else gid
         # This code is a bit of a mess due to os specific differences
         if sys.platform != 'win32':
             try:
@@ -492,14 +557,18 @@ Number of files: {0.stats.nfiles}'''.format(
                 pass
         if sys.platform != 'win32':
             if fd:
-                os.fchmod(fd, item[b'mode'])
+                os.fchown(fd, uid, gid)
+            else:
+                os.lchown(path, uid, gid)
+            if fd:
+                os.fchmod(fd, item.mode)
             elif not symlink:
-                os.chmod(path, item[b'mode'])
+                os.chmod(path, item.mode)
             elif has_lchmod:  # Not available on Linux
-                os.lchmod(path, item[b'mode'])
-        mtime = bigint_to_int(item[b'mtime'])
-        if b'atime' in item:
-            atime = bigint_to_int(item[b'atime'])
+                os.lchmod(path, item.mode)
+        mtime = item.mtime
+        if 'atime' in item:
+            atime = item.atime
         else:
             # old archives only had mtime in item metadata
             atime = mtime
@@ -510,14 +579,14 @@ Number of files: {0.stats.nfiles}'''.format(
         else:
             os.utime(path, None, ns=(atime, mtime), follow_symlinks=False)
         acl_set(path, item, self.numeric_owner)
-        if b'bsdflags' in item:
+        if 'bsdflags' in item:
             try:
-                set_flags(path, item[b'bsdflags'], fd=fd)
+                set_flags(path, item.bsdflags, fd=fd)
             except OSError:
                 pass
         # chown removes Linux capabilities, so set the extended attributes at the end, after chown, since they include
         # the Linux capabilities in the "security.capability" attribute.
-        xattrs = item.get(b'xattrs', {})
+        xattrs = item.get('xattrs', {})
         for k, v in xattrs.items():
             try:
                 xattr.setxattr(fd or path, k, v, follow_symlinks=False)
@@ -547,71 +616,107 @@ Number of files: {0.stats.nfiles}'''.format(
         self.set_meta(b'name', name)
         del self.manifest.archives[oldname]
 
-    def delete(self, stats, progress=False):
-        unpacker = msgpack.Unpacker(use_list=False)
-        items_ids = self.metadata[b'items']
-        pi = ProgressIndicatorPercent(total=len(items_ids), msg="Decrementing references %3.0f%%", same_line=True)
-        for (i, (items_id, data)) in enumerate(zip(items_ids, self.repository.get_many(items_ids))):
+    def delete(self, stats, progress=False, forced=False):
+        class ChunksIndexError(Error):
+            """Chunk ID {} missing from chunks index, corrupted chunks index - aborting transaction."""
+
+        def chunk_decref(id, stats):
+            nonlocal error
+            try:
+                self.cache.chunk_decref(id, stats)
+            except KeyError:
+                cid = bin_to_hex(id)
+                raise ChunksIndexError(cid)
+            except Repository.ObjectNotFound as e:
+                # object not in repo - strange, but we wanted to delete it anyway.
+                if not forced:
+                    raise
+                error = True
+
+        error = False
+        try:
+            unpacker = msgpack.Unpacker(use_list=False)
+            items_ids = self.metadata[b'items']
+            pi = ProgressIndicatorPercent(total=len(items_ids), msg="Decrementing references %3.0f%%", same_line=True)
+            for (i, (items_id, data)) in enumerate(zip(items_ids, self.repository.get_many(items_ids))):
+                if progress:
+                    pi.show(i)
+                _, data = self.key.decrypt(items_id, data)
+                unpacker.feed(data)
+                chunk_decref(items_id, stats)
+                try:
+                    for item in unpacker:
+                        item = Item(internal_dict=item)
+                        if 'chunks' in item:
+                            for chunk_id, size, csize in item.chunks:
+                                chunk_decref(chunk_id, stats)
+                except (TypeError, ValueError):
+                    # if items metadata spans multiple chunks and one chunk got dropped somehow,
+                    # it could be that unpacker yields bad types
+                    if not forced:
+                        raise
+                    error = True
             if progress:
-                pi.show(i)
-            _, data = self.key.decrypt(items_id, data)
-            unpacker.feed(data)
-            self.cache.chunk_decref(items_id, stats)
-            for item in unpacker:
-                if b'chunks' in item:
-                    for chunk_id, size, csize in item[b'chunks']:
-                        self.cache.chunk_decref(chunk_id, stats)
-        if progress:
-            pi.finish()
-        self.cache.chunk_decref(self.id, stats)
+                pi.finish()
+        except (msgpack.UnpackException, Repository.ObjectNotFound):
+            # items metadata corrupted
+            if not forced:
+                raise
+            error = True
+        # in forced delete mode, we try hard to delete at least the manifest entry,
+        # if possible also the archive superblock, even if processing the items raises
+        # some harmless exception.
+        chunk_decref(self.id, stats)
         del self.manifest.archives[self.name]
+        if error:
+            logger.warning('forced deletion succeeded, but the deleted archive was corrupted.')
+            logger.warning('borg check --repair is required to free all space.')
 
     def stat_attrs(self, st, path):
-        item = {}
+        attrs = dict(
+            mode=st.st_mode,
+
+            atime=st.st_atime_ns,
+            ctime=st.st_ctime_ns,
+            mtime=st.st_mtime_ns,
+        )
         if sys.platform == 'win32':
             owner = get_owner(path)
-            item = {
-                b'mode': st.st_mode,
-                b'uid': owner[1], b'user': owner[0],
-                b'gid': st.st_gid, b'group': gid2group(st.st_gid),
-                b'atime': int_to_bigint(st.st_atime_ns),
-                b'ctime': int_to_bigint(st.st_ctime_ns),
-                b'mtime': int_to_bigint(st.st_mtime_ns),
-            }
+            attrs.update({
+                'uid': owner[1], 'user': owner[0],
+                'gid': st.st_gid, 'group': gid2group(st.st_gid),
+            })
         else:
-            item = {
-                b'mode': st.st_mode,
-                b'uid': st.st_uid, b'user': uid2user(st.st_uid),
-                b'gid': st.st_gid, b'group': gid2group(st.st_gid),
-                b'atime': int_to_bigint(st.st_atime_ns),
-                b'ctime': int_to_bigint(st.st_ctime_ns),
-                b'mtime': int_to_bigint(st.st_mtime_ns),
-            }
+            attrs.update({
+                'uid': st.st_uid, 'user': uid2user(st.st_uid),
+                'gid': st.st_gid, 'group': gid2group(st.st_gid),
+            })
         if self.numeric_owner:
-            item[b'user'] = item[b'group'] = None
-        xattrs = xattr.get_all(path, follow_symlinks=False)
+            attrs['user'] = attrs['group'] = None
+        with backup_io():
+            xattrs = xattr.get_all(path, follow_symlinks=False)
+            bsdflags = get_flags(path, st)
+            acl_get(path, attrs, st, self.numeric_owner)
         if xattrs:
-            item[b'xattrs'] = StableDict(xattrs)
-        bsdflags = get_flags(path, st)
+            attrs['xattrs'] = StableDict(xattrs)
         if bsdflags:
-            item[b'bsdflags'] = bsdflags
-        acl_get(path, item, st, self.numeric_owner)
-        return item
+            attrs['bsdflags'] = bsdflags
+        return attrs
 
     def process_dir(self, path, st):
-        item = {b'path': make_path_safe(path)}
+        item = Item(path=make_path_safe(path))
         item.update(self.stat_attrs(st, path))
         self.add_item(item)
         return 'd'  # directory
 
     def process_fifo(self, path, st):
-        item = {b'path': make_path_safe(path)}
+        item = Item(path=make_path_safe(path))
         item.update(self.stat_attrs(st, path))
         self.add_item(item)
         return 'f'  # fifo
 
     def process_dev(self, path, st):
-        item = {b'path': make_path_safe(path), b'rdev': st.st_rdev}
+        item = Item(path=make_path_safe(path), rdev=st.st_rdev)
         item.update(self.stat_attrs(st, path))
         self.add_item(item)
         if stat.S_ISCHR(st.st_mode):
@@ -621,7 +726,7 @@ Number of files: {0.stats.nfiles}'''.format(
 
     def process_symlink(self, path, st):
         source = os.readlink(path)
-        item = {b'path': make_path_safe(path), b'source': source}
+        item = Item(path=make_path_safe(path), source=source)
         item.update(self.stat_attrs(st, path))
         self.add_item(item)
         return 's'  # symlink
@@ -630,18 +735,18 @@ Number of files: {0.stats.nfiles}'''.format(
         uid, gid = 0, 0
         fd = sys.stdin.buffer  # binary
         chunks = []
-        for data in self.chunker.chunkify(fd):
+        for data in backup_io_iter(self.chunker.chunkify(fd)):
             chunks.append(cache.add_chunk(self.key.id_hash(data), Chunk(data), self.stats))
         self.stats.nfiles += 1
-        t = int_to_bigint(int(time.time()) * 1000000000)
-        item = {
-            b'path': path,
-            b'chunks': chunks,
-            b'mode': 0o100660,  # regular file, ug=rw
-            b'uid': uid, b'user': uid2user(uid),
-            b'gid': gid, b'group': gid2group(gid),
-            b'mtime': t, b'atime': t, b'ctime': t,
-        }
+        t = int(time.time()) * 1000000000
+        item = Item(
+            path=path,
+            chunks=chunks,
+            mode=0o100660,  # regular file, ug=rw
+            uid=uid, user=uid2user(uid),
+            gid=gid, group=gid2group(gid),
+            mtime=t, atime=t, ctime=t,
+        )
         self.add_item(item)
         return 'i'  # stdin
 
@@ -652,19 +757,23 @@ Number of files: {0.stats.nfiles}'''.format(
         if st.st_nlink > 1:
             source = self.hard_links.get((st.st_ino, st.st_dev))
             if (st.st_ino, st.st_dev) in self.hard_links:
-                item = self.stat_attrs(st, path)
-                item.update({
-                    b'path': safe_path,
-                    b'source': source,
-                })
+                item = Item(path=safe_path, source=source)
+                item.update(self.stat_attrs(st, path))
                 self.add_item(item)
                 status = 'h'  # regular file, hardlink (to already seen inodes)
                 return status
             else:
                 self.hard_links[st.st_ino, st.st_dev] = safe_path
-        path_hash = self.key.id_hash(safe_encode(os.path.join(self.cwd, path)))
+        is_special_file = is_special(st.st_mode)
+        if not is_special_file:
+            path_hash = self.key.id_hash(safe_encode(os.path.join(self.cwd, path)))
+            ids = cache.file_known_and_unchanged(path_hash, st, ignore_inode)
+        else:
+            # in --read-special mode, we may be called for special files.
+            # there should be no information in the cache about special files processed in
+            # read-special mode, but we better play safe as this was wrong in the past:
+            path_hash = ids = None
         first_run = not cache.files
-        ids = cache.file_known_and_unchanged(path_hash, st, ignore_inode)
         if first_run:
             logger.debug('Processing files ...')
         chunks = None
@@ -678,27 +787,35 @@ Number of files: {0.stats.nfiles}'''.format(
                 status = 'U'  # regular file, unchanged
         else:
             status = 'A'  # regular file, added
-        item = {
-            b'path': safe_path,
-            b'hardlink_master': st.st_nlink > 1,  # item is a hard link and has the chunks
-        }
+        item = Item(
+            path=safe_path,
+            hardlink_master=st.st_nlink > 1,  # item is a hard link and has the chunks
+        )
         # Only chunkify the file if needed
         if chunks is None:
             compress = self.compression_decider1.decide(path)
             logger.debug('%s -> compression %s', path, compress['name'])
-            fh = Archive._open_rb(path)
+            with backup_io():
+                fh = Archive._open_rb(path)
             with os.fdopen(fh, 'rb') as fd:
                 chunks = []
-                for data in self.chunker.chunkify(fd, fh):
+                for data in backup_io_iter(self.chunker.chunkify(fd, fh)):
                     chunks.append(cache.add_chunk(self.key.id_hash(data),
                                                   Chunk(data, compress=compress),
                                                   self.stats))
                     if self.show_progress:
                         self.stats.show_progress(item=item, dt=0.2)
-            cache.memorize_file(path_hash, st, [c.id for c in chunks])
+            if not is_special_file:
+                # we must not memorize special files, because the contents of e.g. a
+                # block or char device will change without its mtime/size/inode changing.
+                cache.memorize_file(path_hash, st, [c.id for c in chunks])
             status = status or 'M'  # regular file, modified (if not 'A' already)
-        item[b'chunks'] = chunks
+        item.chunks = chunks
         item.update(self.stat_attrs(st, path))
+        if is_special_file:
+            # we processed a special file like a regular file. reflect that in mode,
+            # so it can be extracted / accessed in FUSE mount like a regular file:
+            item.mode = stat.S_IFREG | stat.S_IMODE(item.mode)
         self.stats.nfiles += 1
         self.add_item(item)
         return status
@@ -722,12 +839,40 @@ Number of files: {0.stats.nfiles}'''.format(
             return os.open(path, flags_normal)
 
 
+def valid_msgpacked_dict(d, keys_serialized):
+    """check if the data <d> looks like a msgpacked dict"""
+    d_len = len(d)
+    if d_len == 0:
+        return False
+    if d[0] & 0xf0 == 0x80:  # object is a fixmap (up to 15 elements)
+        offs = 1
+    elif d[0] == 0xde:  # object is a map16 (up to 2^16-1 elements)
+        offs = 3
+    else:
+        # object is not a map (dict)
+        # note: we must not have dicts with > 2^16-1 elements
+        return False
+    if d_len <= offs:
+        return False
+    # is the first dict key a bytestring?
+    if d[offs] & 0xe0 == 0xa0:  # key is a small bytestring (up to 31 chars)
+        pass
+    elif d[offs] in (0xd9, 0xda, 0xdb):  # key is a str8, str16 or str32
+        pass
+    else:
+        # key is not a bytestring
+        return False
+    # is the bytestring any of the expected key names?
+    key_serialized = d[offs:]
+    return any(key_serialized.startswith(pattern) for pattern in keys_serialized)
+
+
 class RobustUnpacker:
     """A restartable/robust version of the streaming msgpack unpacker
     """
-    def __init__(self, validator):
+    def __init__(self, validator, item_keys):
         super().__init__()
-        self.item_keys = [msgpack.packb(name) for name in ITEM_KEYS]
+        self.item_keys = [msgpack.packb(name.encode()) for name in item_keys]
         self.validator = validator
         self._buffered_data = []
         self._resync = False
@@ -752,18 +897,10 @@ class RobustUnpacker:
             while self._resync:
                 if not data:
                     raise StopIteration
-                # Abort early if the data does not look like a serialized dict
-                if len(data) < 2 or ((data[0] & 0xf0) != 0x80) or ((data[1] & 0xe0) != 0xa0):
-                    data = data[1:]
-                    continue
-                # Make sure it looks like an item dict
-                for pattern in self.item_keys:
-                    if data[1:].startswith(pattern):
-                        break
-                else:
+                # Abort early if the data does not look like a serialized item dict
+                if not valid_msgpacked_dict(data, self.item_keys):
                     data = data[1:]
                     continue
-
                 self._unpacker = msgpack.Unpacker(object_hook=StableDict)
                 self._unpacker.feed(data)
                 try:
@@ -825,7 +962,7 @@ class ArchiveChecker:
         """
         # Explicitly set the initial hash table capacity to avoid performance issues
         # due to hash table "resonance"
-        capacity = int(len(self.repository) * 1.2)
+        capacity = int(len(self.repository) * 1.35 + 1)  # > len * 1.0 / HASH_MAX_LOAD (see _hashindex.c)
         self.chunks = ChunkIndex(capacity)
         marker = None
         while True:
@@ -838,7 +975,12 @@ class ArchiveChecker:
                 self.chunks[id_] = init_entry
 
     def identify_key(self, repository):
-        cdata = repository.get(next(self.chunks.iteritems())[0])
+        try:
+            some_chunkid, _ = next(self.chunks.iteritems())
+        except StopIteration:
+            # repo is completely empty, no chunks
+            return None
+        cdata = repository.get(some_chunkid)
         return key_factory(repository, cdata)
 
     def verify_data(self):
@@ -866,13 +1008,26 @@ class ArchiveChecker:
 
         Iterates through all objects in the repository looking for archive metadata blocks.
         """
+        required_archive_keys = frozenset(key.encode() for key in REQUIRED_ARCHIVE_KEYS)
+
+        def valid_archive(obj):
+            if not isinstance(obj, dict):
+                return False
+            keys = set(obj)
+            return required_archive_keys.issubset(keys)
+
         logger.info('Rebuilding missing manifest, this might take some time...')
+        # as we have lost the manifest, we do not know any more what valid item keys we had.
+        # collecting any key we encounter in a damaged repo seems unwise, thus we just use
+        # the hardcoded list from the source code. thus, it is not recommended to rebuild a
+        # lost manifest on a older borg version than the most recent one that was ever used
+        # within this repository (assuming that newer borg versions support more item keys).
         manifest = Manifest(self.key, self.repository)
+        archive_keys_serialized = [msgpack.packb(name.encode()) for name in ARCHIVE_KEYS]
         for chunk_id, _ in self.chunks.iteritems():
             cdata = self.repository.get(chunk_id)
             _, data = self.key.decrypt(chunk_id, cdata)
-            # Some basic sanity checks of the payload before feeding it into msgpack
-            if len(data) < 2 or ((data[0] & 0xf0) != 0x80) or ((data[1] & 0xe0) != 0xa0):
+            if not valid_msgpacked_dict(data, archive_keys_serialized):
                 continue
             if b'cmdline' not in data or b'\xa7version\x01' not in data:
                 continue
@@ -882,7 +1037,7 @@ class ArchiveChecker:
             # msgpack with invalid data
             except (TypeError, ValueError, StopIteration):
                 continue
-            if isinstance(archive, dict) and b'items' in archive and b'cmdline' in archive:
+            if valid_archive(archive):
                 logger.info('Found archive %s', archive[b'name'].decode('utf-8'))
                 manifest.archives[archive[b'name'].decode('utf-8')] = {b'id': chunk_id, b'time': archive[b'time']}
         logger.info('Manifest rebuild complete.')
@@ -916,35 +1071,64 @@ class ArchiveChecker:
                     self.repository.put(id_, cdata)
 
         def verify_file_chunks(item):
-            """Verifies that all file chunks are present
+            """Verifies that all file chunks are present.
 
-            Missing file chunks will be replaced with new chunks of the same
-            length containing all zeros.
+            Missing file chunks will be replaced with new chunks of the same length containing all zeros.
+            If a previously missing file chunk re-appears, the replacement chunk is replaced by the correct one.
             """
             offset = 0
             chunk_list = []
-            for chunk_id, size, csize in item[b'chunks']:
+            chunks_replaced = False
+            has_chunks_healthy = 'chunks_healthy' in item
+            chunks_current = item.chunks
+            chunks_healthy = item.chunks_healthy if has_chunks_healthy else chunks_current
+            assert len(chunks_current) == len(chunks_healthy)
+            for chunk_current, chunk_healthy in zip(chunks_current, chunks_healthy):
+                chunk_id, size, csize = chunk_healthy
                 if chunk_id not in self.chunks:
-                    # If a file chunk is missing, create an all empty replacement chunk
-                    logger.error('{}: Missing file chunk detected (Byte {}-{})'.format(safe_decode(item[b'path']), offset, offset + size))
-                    self.error_found = True
-                    data = bytes(size)
-                    chunk_id = self.key.id_hash(data)
-                    cdata = self.key.encrypt(Chunk(data))
-                    csize = len(cdata)
-                    add_reference(chunk_id, size, csize, cdata)
+                    # a chunk of the healthy list is missing
+                    if chunk_current == chunk_healthy:
+                        logger.error('{}: New missing file chunk detected (Byte {}-{}). '
+                                     'Replacing with all-zero chunk.'.format(item.path, offset, offset + size))
+                        self.error_found = chunks_replaced = True
+                        data = bytes(size)
+                        chunk_id = self.key.id_hash(data)
+                        cdata = self.key.encrypt(Chunk(data))
+                        csize = len(cdata)
+                        add_reference(chunk_id, size, csize, cdata)
+                    else:
+                        logger.info('{}: Previously missing file chunk is still missing (Byte {}-{}). It has a '
+                                    'all-zero replacement chunk already.'.format(item.path, offset, offset + size))
+                        chunk_id, size, csize = chunk_current
+                        add_reference(chunk_id, size, csize)
                 else:
-                    add_reference(chunk_id, size, csize)
-                chunk_list.append((chunk_id, size, csize))
+                    if chunk_current == chunk_healthy:
+                        # normal case, all fine.
+                        add_reference(chunk_id, size, csize)
+                    else:
+                        logger.info('{}: Healed previously missing file chunk! '
+                                    '(Byte {}-{}).'.format(item.path, offset, offset + size))
+                        add_reference(chunk_id, size, csize)
+                        mark_as_possibly_superseded(chunk_current[0])  # maybe orphaned the all-zero replacement chunk
+                chunk_list.append([chunk_id, size, csize])  # list-typed element as chunks_healthy is list-of-lists
                 offset += size
-            item[b'chunks'] = chunk_list
+            if chunks_replaced and not has_chunks_healthy:
+                # if this is first repair, remember the correct chunk IDs, so we can maybe heal the file later
+                item.chunks_healthy = item.chunks
+            if has_chunks_healthy and chunk_list == chunks_healthy:
+                logger.info('{}: Completely healed previously damaged file!'.format(item.path))
+                del item.chunks_healthy
+            item.chunks = chunk_list
 
         def robust_iterator(archive):
             """Iterates through all archive items
 
             Missing item chunks will be skipped and the msgpack stream will be restarted
             """
-            unpacker = RobustUnpacker(lambda item: isinstance(item, dict) and b'path' in item)
+            item_keys = frozenset(key.encode() for key in self.manifest.item_keys)
+            required_item_keys = frozenset(key.encode() for key in REQUIRED_ITEM_KEYS)
+            unpacker = RobustUnpacker(lambda item: isinstance(item, dict) and 'path' in item,
+                                      self.manifest.item_keys)
             _state = 0
 
             def missing_chunk_detector(chunk_id):
@@ -959,6 +1143,12 @@ class ArchiveChecker:
                 self.error_found = True
                 logger.error(msg)
 
+            def valid_item(obj):
+                if not isinstance(obj, StableDict):
+                    return False
+                keys = set(obj)
+                return required_item_keys.issubset(keys) and keys.issubset(item_keys)
+
             i = 0
             for state, items in groupby(archive[b'items'], missing_chunk_detector):
                 items = list(items)
@@ -974,8 +1164,8 @@ class ArchiveChecker:
                     unpacker.feed(data)
                     try:
                         for item in unpacker:
-                            if isinstance(item, dict):
-                                yield item
+                            if valid_item(item):
+                                yield Item(internal_dict=item)
                             else:
                                 report('Did not get expected metadata dict when unpacking item metadata', chunk_id, i)
                     except Exception:
@@ -1019,7 +1209,7 @@ class ArchiveChecker:
                 items_buffer = ChunkBuffer(self.key)
                 items_buffer.write_chunk = add_callback
                 for item in robust_iterator(archive):
-                    if b'chunks' in item:
+                    if 'chunks' in item:
                         verify_file_chunks(item)
                     items_buffer.add(item)
                 items_buffer.flush(flush=True)
@@ -1122,38 +1312,38 @@ class ArchiveRecreater:
 
         def item_is_hardlink_master(item):
             return (target_is_subset and
-                    stat.S_ISREG(item[b'mode']) and
-                    item.get(b'hardlink_master', True) and
-                    b'source' not in item and
-                    not matcher.match(item[b'path']))
+                    stat.S_ISREG(item.mode) and
+                    item.get('hardlink_master', True) and
+                    'source' not in item and
+                    not matcher.match(item.path))
 
         for item in archive.iter_items():
             if item_is_hardlink_master(item):
                 # Re-visit all of these items in the archive even when fast-forwarding to rebuild hardlink_masters
-                hardlink_masters[item[b'path']] = (item.get(b'chunks'), None)
+                hardlink_masters[item.path] = (item.get('chunks'), None)
                 continue
             if resume_from:
                 # Fast forward to after the last processed file
-                if item[b'path'] == resume_from:
-                    logger.info('Fast-forwarded to %s', remove_surrogates(item[b'path']))
+                if item.path == resume_from:
+                    logger.info('Fast-forwarded to %s', remove_surrogates(item.path))
                     resume_from = None
                 continue
-            if not matcher.match(item[b'path']):
-                self.print_file_status('x', item[b'path'])
+            if not matcher.match(item.path):
+                self.print_file_status('x', item.path)
                 continue
-            if target_is_subset and stat.S_ISREG(item[b'mode']) and item.get(b'source') in hardlink_masters:
+            if target_is_subset and stat.S_ISREG(item.mode) and item.get('source') in hardlink_masters:
                 # master of this hard link is outside the target subset
-                chunks, new_source = hardlink_masters[item[b'source']]
+                chunks, new_source = hardlink_masters[item.source]
                 if new_source is None:
                     # First item to use this master, move the chunks
-                    item[b'chunks'] = chunks
-                    hardlink_masters[item[b'source']] = (None, item[b'path'])
-                    del item[b'source']
+                    item.chunks = chunks
+                    hardlink_masters[item.source] = (None, item.path)
+                    del item.source
                 else:
                     # Master was already moved, only update this item's source
-                    item[b'source'] = new_source
+                    item.source = new_source
             if self.dry_run:
-                self.print_file_status('-', item[b'path'])
+                self.print_file_status('-', item.path)
             else:
                 try:
                     self.process_item(archive, target, item)
@@ -1165,11 +1355,11 @@ class ArchiveRecreater:
             target.stats.show_progress(final=True)
 
     def process_item(self, archive, target, item):
-        if b'chunks' in item:
-            item[b'chunks'] = self.process_chunks(archive, target, item)
+        if 'chunks' in item:
+            item.chunks = self.process_chunks(archive, target, item)
             target.stats.nfiles += 1
         target.add_item(item)
-        self.print_file_status(file_status(item[b'mode']), item[b'path'])
+        self.print_file_status(file_status(item.mode), item.path)
         if self.interrupt:
             raise self.Interrupted
 
@@ -1177,9 +1367,9 @@ class ArchiveRecreater:
         """Return new chunk ID list for 'item'."""
         # TODO: support --compression-from
         if not self.recompress and not target.recreate_rechunkify:
-            for chunk_id, size, csize in item[b'chunks']:
+            for chunk_id, size, csize in item.chunks:
                 self.cache.chunk_incref(chunk_id, target.stats)
-            return item[b'chunks']
+            return item.chunks
         new_chunks = self.process_partial_chunks(target)
         chunk_iterator = self.create_chunk_iterator(archive, target, item)
         consume(chunk_iterator, len(new_chunks))
@@ -1210,7 +1400,7 @@ class ArchiveRecreater:
 
     def create_chunk_iterator(self, archive, target, item):
         """Return iterator of chunks to store for 'item' from 'archive' in 'target'."""
-        chunk_iterator = archive.pipeline.fetch_many([chunk_id for chunk_id, _, _ in item[b'chunks']])
+        chunk_iterator = archive.pipeline.fetch_many([chunk_id for chunk_id, _, _ in item.chunks])
         if target.recreate_rechunkify:
             # The target.chunker will read the file contents through ChunkIteratorFileWrapper chunk-by-chunk
             # (does not load the entire file into memory)
@@ -1272,7 +1462,7 @@ class ArchiveRecreater:
         """Add excludes to the matcher created by exclude_cache and exclude_if_present."""
         def exclude(dir, tag_item):
             if self.keep_tag_files:
-                tag_files.append(PathPrefixPattern(tag_item[b'path']))
+                tag_files.append(PathPrefixPattern(tag_item.path))
                 tagged_dirs.append(FnmatchPattern(dir + '/'))
             else:
                 tagged_dirs.append(PathPrefixPattern(dir))
@@ -1284,18 +1474,18 @@ class ArchiveRecreater:
         cachedir_masters = {}
 
         for item in archive.iter_items(
-                filter=lambda item: item[b'path'].endswith(CACHE_TAG_NAME) or matcher.match(item[b'path'])):
-            if item[b'path'].endswith(CACHE_TAG_NAME):
-                cachedir_masters[item[b'path']] = item
-            if stat.S_ISREG(item[b'mode']):
-                dir, tag_file = os.path.split(item[b'path'])
+                filter=lambda item: item.path.endswith(CACHE_TAG_NAME) or matcher.match(item.path)):
+            if item.path.endswith(CACHE_TAG_NAME):
+                cachedir_masters[item.path] = item
+            if stat.S_ISREG(item.mode):
+                dir, tag_file = os.path.split(item.path)
                 if tag_file in self.exclude_if_present:
                     exclude(dir, item)
                 if self.exclude_caches and tag_file == CACHE_TAG_NAME:
-                    if b'chunks' in item:
+                    if 'chunks' in item:
                         file = open_item(archive, item)
                     else:
-                        file = open_item(archive, cachedir_masters[item[b'source']])
+                        file = open_item(archive, cachedir_masters[item.source])
                     if file.read(len(CACHE_TAG_CONTENTS)).startswith(CACHE_TAG_CONTENTS):
                         exclude(dir, item)
         matcher.add(tag_files, True)
@@ -1336,13 +1526,13 @@ class ArchiveRecreater:
         logger.info('Replaying items from interrupted operation...')
         item = None
         for item in old_target.iter_items():
-            if b'chunks' in item:
-                for chunk in item[b'chunks']:
+            if 'chunks' in item:
+                for chunk in item.chunks:
                     self.cache.chunk_incref(chunk.id, target.stats)
                 target.stats.nfiles += 1
             target.add_item(item)
         if item:
-            resume_from = item[b'path']
+            resume_from = item.path
         else:
             resume_from = None
         if self.progress:

+ 277 - 169
src/borg/archiver.py

@@ -23,12 +23,14 @@ logger = create_logger()
 
 from . import __version__
 from . import helpers
-from .archive import Archive, ArchiveChecker, ArchiveRecreater, Statistics
+from .archive import Archive, ArchiveChecker, ArchiveRecreater, Statistics, is_special
+from .archive import BackupOSError, CHUNKER_PARAMS
 from .cache import Cache
 from .constants import *  # NOQA
-from .helpers import Error
-from .helpers import location_validator, archivename_validator, ChunkerParams, CompressionSpec
-from .helpers import ItemFormatter, format_time, format_file_size, format_archive
+from .helpers import EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR
+from .helpers import Error, NoManifestError
+from .helpers import location_validator, archivename_validator, ChunkerParams, CompressionSpec, PrefixSpec
+from .helpers import BaseFormatter, ItemFormatter, ArchiveFormatter, format_time, format_file_size, format_archive
 from .helpers import safe_encode, remove_surrogates, bin_to_hex
 from .helpers import prune_within, prune_split
 from .helpers import to_localtime, timestamp
@@ -38,6 +40,8 @@ from .helpers import update_excludes, check_extension_modules
 from .helpers import dir_is_tagged, is_slow_msgpack, yes, sysinfo
 from .helpers import log_multi
 from .helpers import parse_pattern, PatternMatcher, PathPrefixPattern
+from .helpers import signal_handler
+from .item import Item
 from .key import key_creator, RepoKey, PassphraseKey
 from .platform import get_flags
 from .remote import RepositoryServer, RemoteRepository, cache_if_remote
@@ -164,7 +168,7 @@ class Archiver:
     def do_serve(self, args):
         """Start in server mode. This command is usually not used manually.
         """
-        return RepositoryServer(restrict_to_paths=args.restrict_to_paths).serve()
+        return RepositoryServer(restrict_to_paths=args.restrict_to_paths, append_only=args.append_only).serve()
 
     @with_repository(create=True, exclusive=True, manifest=False)
     def do_init(self, args, repository):
@@ -255,7 +259,7 @@ class Archiver:
                     if not dry_run:
                         try:
                             status = archive.process_stdin(path, cache)
-                        except OSError as e:
+                        except BackupOSError as e:
                             status = 'E'
                             self.print_warning('%s: %s', path, e)
                     else:
@@ -327,14 +331,18 @@ class Archiver:
             return
         status = None
         # Ignore if nodump flag is set
-        if get_flags(path, st) & stat.UF_NODUMP:
-            self.print_file_status('x', path)
+        try:
+            if get_flags(path, st) & stat.UF_NODUMP:
+                self.print_file_status('x', path)
+                return
+        except OSError as e:
+            self.print_warning('%s: %s', path, e)
             return
-        if stat.S_ISREG(st.st_mode) or read_special and not stat.S_ISDIR(st.st_mode):
+        if stat.S_ISREG(st.st_mode):
             if not dry_run:
                 try:
                     status = archive.process_file(path, st, cache, self.ignore_inode)
-                except OSError as e:
+                except BackupOSError as e:
                     status = 'E'
                     self.print_warning('%s: %s', path, e)
         elif stat.S_ISDIR(st.st_mode):
@@ -362,13 +370,26 @@ class Archiver:
                                   read_special=read_special, dry_run=dry_run)
         elif stat.S_ISLNK(st.st_mode):
             if not dry_run:
-                status = archive.process_symlink(path, st)
+                if not read_special:
+                    status = archive.process_symlink(path, st)
+                else:
+                    st_target = os.stat(path)
+                    if is_special(st_target.st_mode):
+                        status = archive.process_file(path, st_target, cache)
+                    else:
+                        status = archive.process_symlink(path, st)
         elif stat.S_ISFIFO(st.st_mode):
             if not dry_run:
-                status = archive.process_fifo(path, st)
+                if not read_special:
+                    status = archive.process_fifo(path, st)
+                else:
+                    status = archive.process_file(path, st, cache)
         elif stat.S_ISCHR(st.st_mode) or stat.S_ISBLK(st.st_mode):
             if not dry_run:
-                status = archive.process_dev(path, st)
+                if not read_special:
+                    status = archive.process_dev(path, st)
+                else:
+                    status = archive.process_file(path, st, cache)
         elif stat.S_ISSOCK(st.st_mode):
             # Ignore unix sockets
             return
@@ -411,41 +432,49 @@ class Archiver:
         hardlink_masters = {} if partial_extract else None
 
         def item_is_hardlink_master(item):
-            return (partial_extract and stat.S_ISREG(item[b'mode']) and
-                    item.get(b'hardlink_master', True) and b'source' not in item)
+            return (partial_extract and stat.S_ISREG(item.mode) and
+                    item.get('hardlink_master', True) and 'source' not in item)
 
         for item in archive.iter_items(preload=True,
-                filter=lambda item: item_is_hardlink_master(item) or matcher.match(item[b'path'])):
-            orig_path = item[b'path']
+                filter=lambda item: item_is_hardlink_master(item) or matcher.match(item.path)):
+            orig_path = item.path
             if item_is_hardlink_master(item):
-                hardlink_masters[orig_path] = (item.get(b'chunks'), None)
-            if not matcher.match(item[b'path']):
+                hardlink_masters[orig_path] = (item.get('chunks'), None)
+            if not matcher.match(item.path):
                 continue
             if strip_components:
-                item[b'path'] = os.sep.join(orig_path.split(os.sep)[strip_components:])
-                if not item[b'path']:
+                item.path = os.sep.join(orig_path.split(os.sep)[strip_components:])
+                if not item.path:
                     continue
             if not args.dry_run:
-                while dirs and not item[b'path'].startswith(dirs[-1][b'path']):
-                    archive.extract_item(dirs.pop(-1), stdout=stdout)
+                while dirs and not item.path.startswith(dirs[-1].path):
+                    dir_item = dirs.pop(-1)
+                    try:
+                        archive.extract_item(dir_item, stdout=stdout)
+                    except BackupOSError as e:
+                        self.print_warning('%s: %s', remove_surrogates(dir_item[b'path']), e)
             if output_list:
                 logging.getLogger('borg.output.list').info(remove_surrogates(orig_path))
             try:
                 if dry_run:
                     archive.extract_item(item, dry_run=True)
                 else:
-                    if stat.S_ISDIR(item[b'mode']):
+                    if stat.S_ISDIR(item.mode):
                         dirs.append(item)
                         archive.extract_item(item, restore_attrs=False)
                     else:
                         archive.extract_item(item, stdout=stdout, sparse=sparse, hardlink_masters=hardlink_masters,
                                              original_path=orig_path)
-            except OSError as e:
+            except BackupOSError as e:
                 self.print_warning('%s: %s', remove_surrogates(orig_path), e)
 
         if not args.dry_run:
             while dirs:
-                archive.extract_item(dirs.pop(-1))
+                dir_item = dirs.pop(-1)
+                try:
+                    archive.extract_item(dir_item)
+                except BackupOSError as e:
+                    self.print_warning('%s: %s', remove_surrogates(dir_item[b'path']), e)
         for pattern in include_patterns:
             if pattern.match_count == 0:
                 self.print_warning("Include pattern '%s' never matched.", pattern)
@@ -461,58 +490,58 @@ class Archiver:
             return self.compare_chunk_contents(chunks1, chunks2)
 
         def sum_chunk_size(item, consider_ids=None):
-            if item.get(b'deleted'):
+            if item.get('deleted'):
                 return None
             else:
-                return sum(c.size for c in item[b'chunks']
+                return sum(c.size for c in item.chunks
                            if consider_ids is None or c.id in consider_ids)
 
         def get_owner(item):
             if args.numeric_owner:
-                return item[b'uid'], item[b'gid']
+                return item.uid, item.gid
             else:
-                return item[b'user'], item[b'group']
+                return item.user, item.group
 
         def get_mode(item):
-            if b'mode' in item:
-                return stat.filemode(item[b'mode'])
+            if 'mode' in item:
+                return stat.filemode(item.mode)
             else:
                 return [None]
 
         def has_hardlink_master(item, hardlink_masters):
-            return stat.S_ISREG(item[b'mode']) and item.get(b'source') in hardlink_masters
+            return stat.S_ISREG(item.mode) and item.get('source') in hardlink_masters
 
         def compare_link(item1, item2):
             # These are the simple link cases. For special cases, e.g. if a
             # regular file is replaced with a link or vice versa, it is
             # indicated in compare_mode instead.
-            if item1.get(b'deleted'):
+            if item1.get('deleted'):
                 return 'added link'
-            elif item2.get(b'deleted'):
+            elif item2.get('deleted'):
                 return 'removed link'
-            elif b'source' in item1 and b'source' in item2 and item1[b'source'] != item2[b'source']:
+            elif 'source' in item1 and 'source' in item2 and item1.source != item2.source:
                 return 'changed link'
 
         def contents_changed(item1, item2):
             if can_compare_chunk_ids:
-                return item1[b'chunks'] != item2[b'chunks']
+                return item1.chunks != item2.chunks
             else:
                 if sum_chunk_size(item1) != sum_chunk_size(item2):
                     return True
                 else:
-                    chunk_ids1 = [c.id for c in item1[b'chunks']]
-                    chunk_ids2 = [c.id for c in item2[b'chunks']]
+                    chunk_ids1 = [c.id for c in item1.chunks]
+                    chunk_ids2 = [c.id for c in item2.chunks]
                     return not fetch_and_compare_chunks(chunk_ids1, chunk_ids2, archive1, archive2)
 
         def compare_content(path, item1, item2):
             if contents_changed(item1, item2):
-                if item1.get(b'deleted'):
+                if item1.get('deleted'):
                     return ('added {:>13}'.format(format_file_size(sum_chunk_size(item2))))
-                elif item2.get(b'deleted'):
+                elif item2.get('deleted'):
                     return ('removed {:>11}'.format(format_file_size(sum_chunk_size(item1))))
                 else:
-                    chunk_ids1 = {c.id for c in item1[b'chunks']}
-                    chunk_ids2 = {c.id for c in item2[b'chunks']}
+                    chunk_ids1 = {c.id for c in item1.chunks}
+                    chunk_ids2 = {c.id for c in item2.chunks}
                     added_ids = chunk_ids2 - chunk_ids1
                     removed_ids = chunk_ids1 - chunk_ids2
                     added = sum_chunk_size(item2, added_ids)
@@ -521,9 +550,9 @@ class Archiver:
                                                  format_file_size(-removed, precision=1, sign=True)))
 
         def compare_directory(item1, item2):
-            if item2.get(b'deleted') and not item1.get(b'deleted'):
+            if item2.get('deleted') and not item1.get('deleted'):
                 return 'removed directory'
-            elif item1.get(b'deleted') and not item2.get(b'deleted'):
+            elif item1.get('deleted') and not item2.get('deleted'):
                 return 'added directory'
 
         def compare_owner(item1, item2):
@@ -533,7 +562,7 @@ class Archiver:
                 return '[{}:{} -> {}:{}]'.format(user1, group1, user2, group2)
 
         def compare_mode(item1, item2):
-            if item1[b'mode'] != item2[b'mode']:
+            if item1.mode != item2.mode:
                 return '[{} -> {}]'.format(get_mode(item1), get_mode(item2))
 
         def compare_items(output, path, item1, item2, hardlink_masters, deleted=False):
@@ -544,15 +573,15 @@ class Archiver:
             changes = []
 
             if has_hardlink_master(item1, hardlink_masters):
-                item1 = hardlink_masters[item1[b'source']][0]
+                item1 = hardlink_masters[item1.source][0]
 
             if has_hardlink_master(item2, hardlink_masters):
-                item2 = hardlink_masters[item2[b'source']][1]
+                item2 = hardlink_masters[item2.source][1]
 
             if get_mode(item1)[0] == 'l' or get_mode(item2)[0] == 'l':
                 changes.append(compare_link(item1, item2))
 
-            if b'chunks' in item1 and b'chunks' in item2:
+            if 'chunks' in item1 and 'chunks' in item2:
                 changes.append(compare_content(path, item1, item2))
 
             if get_mode(item1)[0] == 'd' or get_mode(item2)[0] == 'd':
@@ -576,21 +605,21 @@ class Archiver:
 
         def compare_archives(archive1, archive2, matcher):
             def hardlink_master_seen(item):
-                return b'source' not in item or not stat.S_ISREG(item[b'mode']) or item[b'source'] in hardlink_masters
+                return 'source' not in item or not stat.S_ISREG(item.mode) or item.source in hardlink_masters
 
             def is_hardlink_master(item):
-                return item.get(b'hardlink_master', True) and b'source' not in item
+                return item.get('hardlink_master', True) and 'source' not in item
 
             def update_hardlink_masters(item1, item2):
                 if is_hardlink_master(item1) or is_hardlink_master(item2):
-                    hardlink_masters[item1[b'path']] = (item1, item2)
+                    hardlink_masters[item1.path] = (item1, item2)
 
             def compare_or_defer(item1, item2):
                 update_hardlink_masters(item1, item2)
                 if not hardlink_master_seen(item1) or not hardlink_master_seen(item2):
                     deferred.append((item1, item2))
                 else:
-                    compare_items(output, item1[b'path'], item1, item2, hardlink_masters)
+                    compare_items(output, item1.path, item1, item2, hardlink_masters)
 
             orphans_archive1 = collections.OrderedDict()
             orphans_archive2 = collections.OrderedDict()
@@ -599,44 +628,44 @@ class Archiver:
             output = []
 
             for item1, item2 in zip_longest(
-                    archive1.iter_items(lambda item: matcher.match(item[b'path'])),
-                    archive2.iter_items(lambda item: matcher.match(item[b'path'])),
+                    archive1.iter_items(lambda item: matcher.match(item.path)),
+                    archive2.iter_items(lambda item: matcher.match(item.path)),
             ):
-                if item1 and item2 and item1[b'path'] == item2[b'path']:
+                if item1 and item2 and item1.path == item2.path:
                     compare_or_defer(item1, item2)
                     continue
                 if item1:
-                    matching_orphan = orphans_archive2.pop(item1[b'path'], None)
+                    matching_orphan = orphans_archive2.pop(item1.path, None)
                     if matching_orphan:
                         compare_or_defer(item1, matching_orphan)
                     else:
-                        orphans_archive1[item1[b'path']] = item1
+                        orphans_archive1[item1.path] = item1
                 if item2:
-                    matching_orphan = orphans_archive1.pop(item2[b'path'], None)
+                    matching_orphan = orphans_archive1.pop(item2.path, None)
                     if matching_orphan:
                         compare_or_defer(matching_orphan, item2)
                     else:
-                        orphans_archive2[item2[b'path']] = item2
+                        orphans_archive2[item2.path] = item2
             # At this point orphans_* contain items that had no matching partner in the other archive
-            deleted_item = {
-                b'deleted': True,
-                b'chunks': [],
-                b'mode': 0,
-            }
+            deleted_item = Item(
+                deleted=True,
+                chunks=[],
+                mode=0,
+            )
             for added in orphans_archive2.values():
-                path = added[b'path']
-                deleted_item[b'path'] = path
+                path = added.path
+                deleted_item.path = path
                 update_hardlink_masters(deleted_item, added)
                 compare_items(output, path, deleted_item, added, hardlink_masters, deleted=True)
             for deleted in orphans_archive1.values():
-                path = deleted[b'path']
-                deleted_item[b'path'] = path
+                path = deleted.path
+                deleted_item.path = path
                 update_hardlink_masters(deleted, deleted_item)
                 compare_items(output, path, deleted, deleted_item, hardlink_masters, deleted=True)
             for item1, item2 in deferred:
                 assert hardlink_master_seen(item1)
                 assert hardlink_master_seen(item2)
-                compare_items(output, item1[b'path'], item1, item2, hardlink_masters)
+                compare_items(output, item1.path, item1, item2, hardlink_masters)
 
             for line in sorted(output):
                 print_output(line)
@@ -670,14 +699,15 @@ class Archiver:
         cache.commit()
         return self.exit_code
 
-    @with_repository(exclusive=True)
-    def do_delete(self, args, repository, manifest, key):
+    @with_repository(exclusive=True, manifest=False)
+    def do_delete(self, args, repository):
         """Delete an existing repository or archive"""
         if args.location.archive:
+            manifest, key = Manifest.load(repository)
             with Cache(repository, key, manifest, lock_wait=self.lock_wait) as cache:
                 archive = Archive(repository, key, manifest, args.location.archive, cache=cache)
                 stats = Statistics()
-                archive.delete(stats, progress=args.progress)
+                archive.delete(stats, progress=args.progress, forced=args.forced)
                 manifest.write()
                 repository.commit(save_space=args.save_space)
                 cache.commit()
@@ -690,9 +720,15 @@ class Archiver:
         else:
             if not args.cache_only:
                 msg = []
-                msg.append("You requested to completely DELETE the repository *including* all archives it contains:")
-                for archive_info in manifest.list_archive_infos(sort_by='ts'):
-                    msg.append(format_archive(archive_info))
+                try:
+                    manifest, key = Manifest.load(repository)
+                except NoManifestError:
+                    msg.append("You requested to completely DELETE the repository *including* all archives it may contain.")
+                    msg.append("This repository seems to have no manifest, so we can't tell anything about its contents.")
+                else:
+                    msg.append("You requested to completely DELETE the repository *including* all archives it contains:")
+                    for archive_info in manifest.list_archive_infos(sort_by='ts'):
+                        msg.append(format_archive(archive_info))
                 msg.append("Type 'YES' if you understand this and want to continue: ")
                 msg = '\n'.join(msg)
                 if not yes(msg, false_msg="Aborting.", truish=('YES', ),
@@ -735,6 +771,14 @@ class Archiver:
     @with_repository()
     def do_list(self, args, repository, manifest, key):
         """List archive or repository contents"""
+        if not hasattr(sys.stdout, 'buffer'):
+            # This is a shim for supporting unit tests replacing sys.stdout with e.g. StringIO,
+            # which doesn't have an underlying buffer (= lower file object).
+            def write(bytestring):
+                sys.stdout.write(bytestring.decode('utf-8', errors='replace'))
+        else:
+            write = sys.stdout.buffer.write
+
         if args.location.archive:
             matcher, _ = self.build_matcher(args.excludes, args.paths)
             with Cache(repository, key, manifest, lock_wait=self.lock_wait) as cache:
@@ -751,23 +795,22 @@ class Archiver:
                         format = "{mode} {user:6} {group:6} {size:8} {isomtime} {path}{extra}{NL}"
                 formatter = ItemFormatter(archive, format)
 
-                if not hasattr(sys.stdout, 'buffer'):
-                    # This is a shim for supporting unit tests replacing sys.stdout with e.g. StringIO,
-                    # which doesn't have an underlying buffer (= lower file object).
-                    def write(bytestring):
-                        sys.stdout.write(bytestring.decode('utf-8', errors='replace'))
-                else:
-                    write = sys.stdout.buffer.write
-                for item in archive.iter_items(lambda item: matcher.match(item[b'path'])):
+                for item in archive.iter_items(lambda item: matcher.match(item.path)):
                     write(safe_encode(formatter.format_item(item)))
         else:
+            if args.format:
+                format = args.format
+            elif args.short:
+                format = "{archive}{NL}"
+            else:
+                format = "{archive:<36} {time} [{id}]{NL}"
+            formatter = ArchiveFormatter(format)
+
             for archive_info in manifest.list_archive_infos(sort_by='ts'):
                 if args.prefix and not archive_info.name.startswith(args.prefix):
                     continue
-                if args.short:
-                    print(archive_info.name)
-                else:
-                    print(format_archive(archive_info))
+                write(safe_encode(formatter.format_item(archive_info)))
+
         return self.exit_code
 
     @with_repository(cache=True)
@@ -845,7 +888,7 @@ class Archiver:
                     else:
                         if args.output_list:
                             list_logger.info('Pruning archive: %s' % format_archive(archive))
-                        Archive(repository, key, manifest, archive.name, cache).delete(stats)
+                        Archive(repository, key, manifest, archive.name, cache).delete(stats, forced=args.forced)
                 else:
                     if args.output_list:
                         list_logger.info('Keeping archive: %s' % format_archive(archive))
@@ -905,27 +948,26 @@ class Archiver:
                                      file_status_printer=self.print_file_status,
                                      dry_run=args.dry_run)
 
-        signal.signal(signal.SIGTERM, interrupt)
-        signal.signal(signal.SIGINT, interrupt)
-
-        if args.location.archive:
-            name = args.location.archive
-            if recreater.is_temporary_archive(name):
-                self.print_error('Refusing to work on temporary archive of prior recreate: %s', name)
-                return self.exit_code
-            recreater.recreate(name, args.comment)
-        else:
-            for archive in manifest.list_archive_infos(sort_by='ts'):
-                name = archive.name
+        with signal_handler(signal.SIGTERM, interrupt), \
+             signal_handler(signal.SIGINT, interrupt):
+            if args.location.archive:
+                name = args.location.archive
                 if recreater.is_temporary_archive(name):
-                    continue
-                print('Processing', name)
-                if not recreater.recreate(name, args.comment):
-                    break
-        manifest.write()
-        repository.commit()
-        cache.commit()
-        return self.exit_code
+                    self.print_error('Refusing to work on temporary archive of prior recreate: %s', name)
+                    return self.exit_code
+                recreater.recreate(name, args.comment)
+            else:
+                for archive in manifest.list_archive_infos(sort_by='ts'):
+                    name = archive.name
+                    if recreater.is_temporary_archive(name):
+                        continue
+                    print('Processing', name)
+                    if not recreater.recreate(name, args.comment):
+                        break
+            manifest.write()
+            repository.commit()
+            cache.commit()
+            return self.exit_code
 
     @with_repository(manifest=False)
     def do_with_lock(self, args, repository):
@@ -1017,26 +1059,27 @@ class Archiver:
     helptext = {}
     helptext['patterns'] = textwrap.dedent('''
         Exclusion patterns support four separate styles, fnmatch, shell, regular
-        expressions and path prefixes. If followed by a colon (':') the first two
-        characters of a pattern are used as a style selector. Explicit style
-        selection is necessary when a non-default style is desired or when the
-        desired pattern starts with two alphanumeric characters followed by a colon
-        (i.e. `aa:something/*`).
+        expressions and path prefixes. By default, fnmatch is used. If followed
+        by a colon (':') the first two characters of a pattern are used as a
+        style selector. Explicit style selection is necessary when a
+        non-default style is desired or when the desired pattern starts with
+        two alphanumeric characters followed by a colon (i.e. `aa:something/*`).
 
         `Fnmatch <https://docs.python.org/3/library/fnmatch.html>`_, selector `fm:`
 
-            These patterns use a variant of shell pattern syntax, with '*' matching
-            any number of characters, '?' matching any single character, '[...]'
-            matching any single character specified, including ranges, and '[!...]'
-            matching any character not specified. For the purpose of these patterns,
-            the path separator ('\\' for Windows and '/' on other systems) is not
-            treated specially. Wrap meta-characters in brackets for a literal match
-            (i.e. `[?]` to match the literal character `?`). For a path to match
-            a pattern, it must completely match from start to end, or must match from
-            the start to just before a path separator. Except for the root path,
-            paths will never end in the path separator when matching is attempted.
-            Thus, if a given pattern ends in a path separator, a '*' is appended
-            before matching is attempted.
+            This is the default style.  These patterns use a variant of shell
+            pattern syntax, with '*' matching any number of characters, '?'
+            matching any single character, '[...]' matching any single
+            character specified, including ranges, and '[!...]' matching any
+            character not specified. For the purpose of these patterns, the
+            path separator ('\\' for Windows and '/' on other systems) is not
+            treated specially. Wrap meta-characters in brackets for a literal
+            match (i.e. `[?]` to match the literal character `?`). For a path
+            to match a pattern, it must completely match from start to end, or
+            must match from the start to just before a path separator. Except
+            for the root path, paths will never end in the path separator when
+            matching is attempted.  Thus, if a given pattern ends in a path
+            separator, a '*' is appended before matching is attempted.
 
         Shell-style patterns, selector `sh:`
 
@@ -1072,36 +1115,67 @@ class Archiver:
         whitespace removal paths with whitespace at the beginning or end can only be
         excluded using regular expressions.
 
-        Examples:
+        Examples::
+
+            # Exclude '/home/user/file.o' but not '/home/user/file.odt':
+            $ borg create -e '*.o' backup /
+
+            # Exclude '/home/user/junk' and '/home/user/subdir/junk' but
+            # not '/home/user/importantjunk' or '/etc/junk':
+            $ borg create -e '/home/*/junk' backup /
+
+            # Exclude the contents of '/home/user/cache' but not the directory itself:
+            $ borg create -e /home/user/cache/ backup /
+
+            # The file '/home/user/cache/important' is *not* backed up:
+            $ borg create -e /home/user/cache/ backup / /home/user/cache/important
+
+            # The contents of directories in '/home' are not backed up when their name
+            # ends in '.tmp'
+            $ borg create --exclude 're:^/home/[^/]+\.tmp/' backup /
+
+            # Load exclusions from file
+            $ cat >exclude.txt <<EOF
+            # Comment line
+            /home/*/junk
+            *.tmp
+            fm:aa:something/*
+            re:^/home/[^/]\.tmp/
+            sh:/home/*/.thumbnails
+            EOF
+            $ borg create --exclude-from exclude.txt backup /\n\n''')
+    helptext['placeholders'] = textwrap.dedent('''
+        Repository (or Archive) URLs and --prefix values support these placeholders:
+
+        {hostname}
+
+            The (short) hostname of the machine.
+
+        {fqdn}
+
+            The full name of the machine.
 
-        # Exclude '/home/user/file.o' but not '/home/user/file.odt':
-        $ borg create -e '*.o' backup /
+        {now}
 
-        # Exclude '/home/user/junk' and '/home/user/subdir/junk' but
-        # not '/home/user/importantjunk' or '/etc/junk':
-        $ borg create -e '/home/*/junk' backup /
+            The current local date and time.
 
-        # Exclude the contents of '/home/user/cache' but not the directory itself:
-        $ borg create -e /home/user/cache/ backup /
+        {utcnow}
 
-        # The file '/home/user/cache/important' is *not* backed up:
-        $ borg create -e /home/user/cache/ backup / /home/user/cache/important
+            The current UTC date and time.
 
-        # The contents of directories in '/home' are not backed up when their name
-        # ends in '.tmp'
-        $ borg create --exclude 're:^/home/[^/]+\.tmp/' backup /
+        {user}
 
-        # Load exclusions from file
-        $ cat >exclude.txt <<EOF
-        # Comment line
-        /home/*/junk
-        *.tmp
-        fm:aa:something/*
-        re:^/home/[^/]\.tmp/
-        sh:/home/*/.thumbnails
-        EOF
-        $ borg create --exclude-from exclude.txt backup /
-        ''')
+            The user name (or UID, if no name is available) of the user running borg.
+
+        {pid}
+
+            The current process ID.
+
+        Examples::
+
+            borg create /path/to/repo::{hostname}-{user}-{utcnow} ...
+            borg create /path/to/repo::{hostname}-{now:%Y-%m-%d_%H:%M:%S} ...
+            borg prune --prefix '{hostname}-' ...\n\n''')
 
     def do_help(self, parser, commands, args):
         if not args.topic:
@@ -1162,8 +1236,8 @@ class Archiver:
                                   help='do not load/update the file metadata cache used to detect unchanged files')
         common_group.add_argument('--umask', dest='umask', type=lambda s: int(s, 8), default=UMASK_DEFAULT, metavar='M',
                                   help='set umask to M (local and remote, default: %(default)04o)')
-        common_group.add_argument('--remote-path', dest='remote_path', default='borg', metavar='PATH',
-                                  help='set remote path to executable (default: "%(default)s")')
+        common_group.add_argument('--remote-path', dest='remote_path', metavar='PATH',
+                                  help='set remote path to executable (default: "borg")')
 
         parser = argparse.ArgumentParser(prog=prog, description='Borg - Deduplicated Backups')
         parser.add_argument('-V', '--version', action='version', version='%(prog)s ' + __version__,
@@ -1180,6 +1254,8 @@ class Archiver:
         subparser.set_defaults(func=self.do_serve)
         subparser.add_argument('--restrict-to-path', dest='restrict_to_paths', action='append',
                                metavar='PATH', help='restrict repository access to PATH')
+        subparser.add_argument('--append-only', dest='append_only', action='store_true',
+                               help='only allow appending to repository segment files')
         init_epilog = textwrap.dedent("""
         This command initializes an empty repository. A repository is a filesystem
         directory containing the deduplicated data from zero or more archives.
@@ -1262,9 +1338,12 @@ class Archiver:
         - Check if archive metadata chunk is present. if not, remove archive from
           manifest.
         - For all files (items) in the archive, for all chunks referenced by these
-          files, check if chunk is present (if not and we are in repair mode, replace
-          it with a same-size chunk of zeros). This requires reading of archive and
-          file metadata, but not data.
+          files, check if chunk is present.
+          If a chunk is not present and we are in repair mode, replace it with a same-size
+          replacement chunk of zeros.
+          If a previously lost chunk reappears (e.g. via a later backup) and we are in
+          repair mode, the all-zero replacement chunk will be replaced by the correct chunk.
+          This requires reading of archive and file metadata, but not data.
         - If we are in repair mode and we checked all the archives: delete orphaned
           chunks from the repo.
         - if you use a remote repo server via ssh:, the archive check is executed on
@@ -1314,7 +1393,7 @@ class Archiver:
         subparser.add_argument('--last', dest='last',
                                type=int, default=None, metavar='N',
                                help='only check last N archives (Default: all)')
-        subparser.add_argument('-P', '--prefix', dest='prefix', type=str,
+        subparser.add_argument('-P', '--prefix', dest='prefix', type=PrefixSpec,
                                help='only consider archive names starting with this prefix')
         subparser.add_argument('-p', '--progress', dest='progress',
                                action='store_true', default=False,
@@ -1370,7 +1449,7 @@ class Archiver:
         checkpoints and treated in special ways.
 
         In the archive name, you may use the following format tags:
-        {now}, {utcnow}, {fqdn}, {hostname}, {user}, {pid}
+        {now}, {utcnow}, {fqdn}, {hostname}, {user}, {pid}, {uuid4}
 
         To speed up pulling backups over sshfs and similar network file systems which do
         not provide correct inode information the --ignore-inode flag can be used. This
@@ -1378,6 +1457,7 @@ class Archiver:
         all files on these file systems.
 
         See the output of the "borg help patterns" command for more help on exclude patterns.
+        See the output of the "borg help placeholders" command for more help on placeholders.
         """)
 
         subparser = subparsers.add_parser('create', parents=[common_parser], add_help=False,
@@ -1435,7 +1515,8 @@ class Archiver:
                               help='ignore inode data in the file metadata cache used to detect unchanged files.')
         fs_group.add_argument('--read-special', dest='read_special',
                               action='store_true', default=False,
-                              help='open and read special files as if they were regular files')
+                              help='open and read block and char device files as well as FIFOs as if they were '
+                                   'regular files. Also follows symlinks pointing to these kinds of files.')
 
         archive_group = subparser.add_argument_group('Archive options')
         archive_group.add_argument('--comment', dest='comment', metavar='COMMENT', default='',
@@ -1446,8 +1527,8 @@ class Archiver:
                                    help='manually specify the archive creation date/time (UTC). '
                                         'alternatively, give a reference file/directory.')
         archive_group.add_argument('-c', '--checkpoint-interval', dest='checkpoint_interval',
-                                   type=int, default=300, metavar='SECONDS',
-                                   help='write checkpoint every SECONDS seconds (Default: 300)')
+                                   type=int, default=1800, metavar='SECONDS',
+                                   help='write checkpoint every SECONDS seconds (Default: 1800)')
         archive_group.add_argument('--chunker-params', dest='chunker_params',
                                    type=ChunkerParams, default=CHUNKER_PARAMS,
                                    metavar='CHUNK_MIN_EXP,CHUNK_MAX_EXP,HASH_MASK_BITS,HASH_WINDOW_SIZE',
@@ -1606,6 +1687,9 @@ class Archiver:
         subparser.add_argument('-c', '--cache-only', dest='cache_only',
                                action='store_true', default=False,
                                help='delete only the local cache for the given repository')
+        subparser.add_argument('--force', dest='forced',
+                               action='store_true', default=False,
+                               help='force deletion of corrupted archives')
         subparser.add_argument('--save-space', dest='save_space', action='store_true',
                                default=False,
                                help='work slower, but using less space')
@@ -1618,8 +1702,13 @@ class Archiver:
 
         See the "borg help patterns" command for more help on exclude patterns.
 
-        The following keys are available for --format when listing files:
+        The following keys are available for --format:
+        """) + BaseFormatter.keys_help() + textwrap.dedent("""
+
+        -- Keys for listing repository archives:
+        """) + ArchiveFormatter.keys_help() + textwrap.dedent("""
 
+        -- Keys for listing archive files:
         """) + ItemFormatter.keys_help()
         subparser = subparsers.add_parser('list', parents=[common_parser], add_help=False,
                                           description=self.do_list.__doc__,
@@ -1633,7 +1722,7 @@ class Archiver:
         subparser.add_argument('--format', '--list-format', dest='format', type=str,
                                help="""specify format for file listing
                                 (default: "{mode} {user:6} {group:6} {size:8d} {isomtime} {path}{extra}{NL}")""")
-        subparser.add_argument('-P', '--prefix', dest='prefix', type=str,
+        subparser.add_argument('-P', '--prefix', dest='prefix', type=PrefixSpec,
                                help='only consider archive names starting with this prefix')
         subparser.add_argument('-e', '--exclude', dest='excludes',
                                type=parse_pattern, action='append',
@@ -1660,6 +1749,13 @@ class Archiver:
         To allow a regular user to use fstab entries, add the ``user`` option:
         ``/path/to/repo /mnt/point fuse.borgfs defaults,noauto,user 0 0``
 
+        For mount options, see the fuse(8) manual page. Additional mount options
+        supported by borg:
+
+        - allow_damaged_files: by default damaged files (where missing chunks were
+          replaced with runs of zeros by borg check --repair) are not readable and
+          return EIO (I/O error). Set this option to read such files.
+
         The BORG_MOUNT_DATA_CACHE_ENTRIES environment variable is meant for advanced users
         to tweak the performance. It sets the number of cached data chunks; additional
         memory usage can be up to ~8 MiB times this number. The default is the number
@@ -1714,7 +1810,7 @@ class Archiver:
                                help='repository for which to break the locks')
 
         prune_epilog = textwrap.dedent("""
-        The prune command prunes a repository by deleting archives not matching
+        The prune command prunes a repository by deleting all archives not matching
         any of the specified retention options. This command is normally used by
         automated backup scripts wanting to keep a certain number of historic backups.
 
@@ -1743,7 +1839,7 @@ class Archiver:
         up to 7 most recent days with backups (days without backups do not count).
         The rules are applied from secondly to yearly, and backups selected by previous
         rules do not count towards those of later rules. The time that each backup
-        completes is used for pruning purposes. Dates and times are interpreted in
+        starts is used for pruning purposes. Dates and times are interpreted in
         the local timezone, and weeks go from Monday to Sunday. Specifying a
         negative number of archives to keep means that there is no limit.
 
@@ -1760,6 +1856,9 @@ class Archiver:
         subparser.add_argument('-n', '--dry-run', dest='dry_run',
                                default=False, action='store_true',
                                help='do not change repository')
+        subparser.add_argument('--force', dest='forced',
+                               action='store_true', default=False,
+                               help='force pruning of corrupted archives')
         subparser.add_argument('-s', '--stats', dest='stats',
                                action='store_true', default=False,
                                help='print statistics for the deleted archive')
@@ -1782,7 +1881,7 @@ class Archiver:
                                help='number of monthly archives to keep')
         subparser.add_argument('-y', '--keep-yearly', dest='yearly', type=int, default=0,
                                help='number of yearly archives to keep')
-        subparser.add_argument('-P', '--prefix', dest='prefix', type=str,
+        subparser.add_argument('-P', '--prefix', dest='prefix', type=PrefixSpec,
                                help='only consider archive names starting with this prefix')
         subparser.add_argument('--save-space', dest='save_space', action='store_true',
                                default=False,
@@ -2068,8 +2167,9 @@ class Archiver:
             if result.func != forced_result.func:
                 # someone is trying to execute a different borg subcommand, don't do that!
                 return forced_result
-            # the only thing we take from the forced "borg serve" ssh command is --restrict-to-path
+            # we only take specific options from the forced "borg serve" command:
             result.restrict_to_paths = forced_result.restrict_to_paths
+            result.append_only = forced_result.append_only
         return result
 
     def parse_args(self, args=None):
@@ -2126,7 +2226,7 @@ def sig_info_handler(signum, stack):  # pragma: no cover
             logger.info("{0} {1}/{2}".format(path, format_file_size(pos), format_file_size(total)))
             break
         if func in ('extract_item', ):  # extract op
-            path = loc['item'][b'path']
+            path = loc['item'].path
             try:
                 pos = loc['fd'].tell()
             except Exception:
@@ -2159,14 +2259,22 @@ def main():  # pragma: no cover
     if os.path.basename(sys.argv[0]) == "borgfs":
         sys.argv.insert(1, "mount")
 
-    # Make sure stdout and stderr have errors='replace') to avoid unicode
+    # Make sure stdout and stderr have errors='replace' to avoid unicode
     # issues when print()-ing unicode file names
     sys.stdout = io.TextIOWrapper(sys.stdout.buffer, sys.stdout.encoding, 'replace', line_buffering=True)
     sys.stderr = io.TextIOWrapper(sys.stderr.buffer, sys.stderr.encoding, 'replace', line_buffering=True)
     setup_signal_handlers()
     archiver = Archiver()
     msg = None
-    args = archiver.get_args(sys.argv, os.environ.get('SSH_ORIGINAL_COMMAND'))
+    try:
+        args = archiver.get_args(sys.argv, os.environ.get('SSH_ORIGINAL_COMMAND'))
+    except Error as e:
+        msg = e.get_message()
+        if e.traceback:
+            msg += "\n%s\n%s" % (traceback.format_exc(), sysinfo())
+        # we might not have logging setup yet, so get out quickly
+        print(msg, file=sys.stderr)
+        sys.exit(e.exit_code)
     try:
         exit_code = archiver.run(args)
     except Error as e:

+ 4 - 2
src/borg/cache.py

@@ -16,6 +16,7 @@ from .helpers import get_cache_dir
 from .helpers import decode_dict, int_to_bigint, bigint_to_int, bin_to_hex
 from .helpers import format_file_size
 from .helpers import yes
+from .item import Item
 from .key import PlaintextKey
 from .locking import UpgradableLock
 from .remote import cache_if_remote
@@ -298,8 +299,9 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
                     if not isinstance(item, dict):
                         logger.error('Error: Did not get expected metadata dict - archive corrupted!')
                         continue
-                    if b'chunks' in item:
-                        for chunk_id, size, csize in item[b'chunks']:
+                    item = Item(internal_dict=item)
+                    if 'chunks' in item:
+                        for chunk_id, size, csize in item.chunks:
                             chunk_idx.add(chunk_id, 1, size, csize)
             if self.do_cache:
                 fn = mkpath(archive_id)

+ 14 - 4
src/borg/constants.py

@@ -1,10 +1,20 @@
 # this set must be kept complete, otherwise the RobustUnpacker might malfunction:
-ITEM_KEYS = set([b'path', b'source', b'rdev', b'chunks', b'hardlink_master',
-                 b'mode', b'user', b'group', b'uid', b'gid', b'mtime', b'atime', b'ctime',
-                 b'xattrs', b'bsdflags', b'acl_nfs4', b'acl_access', b'acl_default', b'acl_extended', b'win_dacl'])
+ITEM_KEYS = frozenset(['path', 'source', 'rdev', 'chunks', 'chunks_healthy', 'hardlink_master',
+                       'mode', 'user', 'group', 'uid', 'gid', 'mtime', 'atime', 'ctime',
+                       'xattrs', 'bsdflags', 'acl_nfs4', 'acl_access', 'acl_default', 'acl_extended', 'win_dacl'])
+
+# this is the set of keys that are always present in items:
+REQUIRED_ITEM_KEYS = frozenset(['path', 'mtime', ])
+
+# this set must be kept complete, otherwise rebuild_manifest might malfunction:
+ARCHIVE_KEYS = frozenset(['version', 'name', 'items', 'cmdline', 'hostname', 'username', 'time', 'time_end',
+                          'comment', 'chunker_params',
+                          'recreate_cmdline', 'recreate_source_id', 'recreate_args'])
+
+# this is the set of keys that are always present in archives:
+REQUIRED_ARCHIVE_KEYS = frozenset(['version', 'name', 'items', 'cmdline', 'time', ])
 
 ARCHIVE_TEXT_KEYS = (b'name', b'comment', b'hostname', b'username', b'time', b'time_end')
-ITEM_TEXT_KEYS = (b'path', b'source', b'user', b'group')
 
 # default umask, overriden by --umask, defaults to read/write only for owner
 UMASK_DEFAULT = 0o077

+ 54 - 19
src/borg/crypto.pyx

@@ -12,13 +12,12 @@ cdef extern from "openssl/evp.h":
     ctypedef struct EVP_CIPHER:
         pass
     ctypedef struct EVP_CIPHER_CTX:
-        unsigned char *iv
         pass
     ctypedef struct ENGINE:
         pass
     const EVP_CIPHER *EVP_aes_256_ctr()
-    void EVP_CIPHER_CTX_init(EVP_CIPHER_CTX *a)
-    void EVP_CIPHER_CTX_cleanup(EVP_CIPHER_CTX *a)
+    EVP_CIPHER_CTX *EVP_CIPHER_CTX_new()
+    void EVP_CIPHER_CTX_free(EVP_CIPHER_CTX *a)
 
     int EVP_EncryptInit_ex(EVP_CIPHER_CTX *ctx, const EVP_CIPHER *cipher, ENGINE *impl,
                            const unsigned char *key, const unsigned char *iv)
@@ -44,16 +43,38 @@ import struct
 
 _int = struct.Struct('>I')
 _long = struct.Struct('>Q')
+_2long = struct.Struct('>QQ')
 
 bytes_to_int = lambda x, offset=0: _int.unpack_from(x, offset)[0]
 bytes_to_long = lambda x, offset=0: _long.unpack_from(x, offset)[0]
 long_to_bytes = lambda x: _long.pack(x)
 
 
-cdef Py_buffer ro_buffer(object data) except *:
-    cdef Py_buffer view
-    PyObject_GetBuffer(data, &view, PyBUF_SIMPLE)
-    return view
+def bytes16_to_int(b, offset=0):
+    h, l = _2long.unpack_from(b, offset)
+    return (h << 64) + l
+
+
+def int_to_bytes16(i):
+    max_uint64 = 0xffffffffffffffff
+    l = i & max_uint64
+    h = (i >> 64) & max_uint64
+    return _2long.pack(h, l)
+
+
+def increment_iv(iv, amount=1):
+    """
+    Increment the IV by the given amount (default 1).
+
+    :param iv: input IV, 16 bytes (128 bit)
+    :param amount: increment value
+    :return: input_IV + amount, 16 bytes (128 bit)
+    """
+    assert len(iv) == 16
+    iv = bytes16_to_int(iv)
+    iv += amount
+    iv = int_to_bytes16(iv)
+    return iv
 
 
 def num_aes_blocks(int length):
@@ -63,27 +84,35 @@ def num_aes_blocks(int length):
     return (length + 15) // 16
 
 
+cdef Py_buffer ro_buffer(object data) except *:
+    cdef Py_buffer view
+    PyObject_GetBuffer(data, &view, PyBUF_SIMPLE)
+    return view
+
+
 cdef class AES:
     """A thin wrapper around the OpenSSL EVP cipher API
     """
-    cdef EVP_CIPHER_CTX ctx
+    cdef EVP_CIPHER_CTX *ctx
     cdef int is_encrypt
+    cdef unsigned char iv_orig[16]
+    cdef long long blocks
 
     def __cinit__(self, is_encrypt, key, iv=None):
-        EVP_CIPHER_CTX_init(&self.ctx)
+        self.ctx = EVP_CIPHER_CTX_new()
         self.is_encrypt = is_encrypt
         # Set cipher type and mode
         cipher_mode = EVP_aes_256_ctr()
         if self.is_encrypt:
-            if not EVP_EncryptInit_ex(&self.ctx, cipher_mode, NULL, NULL, NULL):
+            if not EVP_EncryptInit_ex(self.ctx, cipher_mode, NULL, NULL, NULL):
                 raise Exception('EVP_EncryptInit_ex failed')
         else:  # decrypt
-            if not EVP_DecryptInit_ex(&self.ctx, cipher_mode, NULL, NULL, NULL):
+            if not EVP_DecryptInit_ex(self.ctx, cipher_mode, NULL, NULL, NULL):
                 raise Exception('EVP_DecryptInit_ex failed')
         self.reset(key, iv)
 
     def __dealloc__(self):
-        EVP_CIPHER_CTX_cleanup(&self.ctx)
+        EVP_CIPHER_CTX_free(self.ctx)
 
     def reset(self, key=None, iv=None):
         cdef const unsigned char *key2 = NULL
@@ -92,17 +121,21 @@ cdef class AES:
             key2 = key
         if iv:
             iv2 = iv
+            assert isinstance(iv, bytes) and len(iv) == 16
+            for i in range(16):
+                self.iv_orig[i] = iv[i]
+            self.blocks = 0  # number of AES blocks encrypted starting with iv_orig
         # Initialise key and IV
         if self.is_encrypt:
-            if not EVP_EncryptInit_ex(&self.ctx, NULL, NULL, key2, iv2):
+            if not EVP_EncryptInit_ex(self.ctx, NULL, NULL, key2, iv2):
                 raise Exception('EVP_EncryptInit_ex failed')
         else:  # decrypt
-            if not EVP_DecryptInit_ex(&self.ctx, NULL, NULL, key2, iv2):
+            if not EVP_DecryptInit_ex(self.ctx, NULL, NULL, key2, iv2):
                 raise Exception('EVP_DecryptInit_ex failed')
 
     @property
     def iv(self):
-        return self.ctx.iv[:16]
+        return increment_iv(self.iv_orig[:16], self.blocks)
 
     def encrypt(self, data):
         cdef Py_buffer data_buf = ro_buffer(data)
@@ -114,12 +147,13 @@ cdef class AES:
         if not out:
             raise MemoryError
         try:
-            if not EVP_EncryptUpdate(&self.ctx, out, &outl, <const unsigned char*> data_buf.buf, inl):
+            if not EVP_EncryptUpdate(self.ctx, out, &outl, <const unsigned char*> data_buf.buf, inl):
                 raise Exception('EVP_EncryptUpdate failed')
             ctl = outl
-            if not EVP_EncryptFinal_ex(&self.ctx, out+ctl, &outl):
+            if not EVP_EncryptFinal_ex(self.ctx, out+ctl, &outl):
                 raise Exception('EVP_EncryptFinal failed')
             ctl += outl
+            self.blocks += num_aes_blocks(ctl)
             return out[:ctl]
         finally:
             free(out)
@@ -137,15 +171,16 @@ cdef class AES:
         if not out:
             raise MemoryError
         try:
-            if not EVP_DecryptUpdate(&self.ctx, out, &outl, <const unsigned char*> data_buf.buf, inl):
+            if not EVP_DecryptUpdate(self.ctx, out, &outl, <const unsigned char*> data_buf.buf, inl):
                 raise Exception('EVP_DecryptUpdate failed')
             ptl = outl
-            if EVP_DecryptFinal_ex(&self.ctx, out+ptl, &outl) <= 0:
+            if EVP_DecryptFinal_ex(self.ctx, out+ptl, &outl) <= 0:
                 # this error check is very important for modes with padding or
                 # authentication. for them, a failure here means corrupted data.
                 # CTR mode does not use padding nor authentication.
                 raise Exception('EVP_DecryptFinal failed')
             ptl += outl
+            self.blocks += num_aes_blocks(inl)
             return out[:ptl]
         finally:
             free(out)

+ 79 - 62
src/borg/fuse.py

@@ -14,8 +14,8 @@ from .logger import create_logger
 logger = create_logger()
 
 from .archive import Archive
-from .helpers import daemonize
-from .helpers import bigint_to_int
+from .helpers import daemonize, safe_encode
+from .item import Item
 from .lrucache import LRUCache
 
 # Does this version of llfuse support ns precision?
@@ -38,17 +38,21 @@ class ItemCache:
 
     def add(self, item):
         pos = self.fd.seek(0, io.SEEK_END)
-        self.fd.write(msgpack.packb(item))
+        self.fd.write(msgpack.packb(item.as_dict()))
         return pos + self.offset
 
     def get(self, inode):
         self.fd.seek(inode - self.offset, io.SEEK_SET)
-        return next(msgpack.Unpacker(self.fd, read_size=1024))
+        item = next(msgpack.Unpacker(self.fd, read_size=1024))
+        return Item(internal_dict=item)
 
 
 class FuseOperations(llfuse.Operations):
     """Export archive as a fuse filesystem
     """
+
+    allow_damaged_files = False
+
     def __init__(self, key, repository, manifest, archive, cached_repo):
         super().__init__()
         self._inode_count = 0
@@ -57,7 +61,7 @@ class FuseOperations(llfuse.Operations):
         self.items = {}
         self.parent = {}
         self.contents = defaultdict(dict)
-        self.default_dir = {b'mode': 0o40755, b'mtime': int(time.time() * 1e9), b'uid': os.getuid(), b'gid': os.getgid()}
+        self.default_dir = Item(mode=0o40755, mtime=int(time.time() * 1e9), uid=os.getuid(), gid=os.getgid())
         self.pending_archives = {}
         self.accounted_chunks = {}
         self.cache = ItemCache()
@@ -78,6 +82,32 @@ class FuseOperations(llfuse.Operations):
                 self.contents[1][os.fsencode(archive_name)] = archive_inode
                 self.pending_archives[archive_inode] = Archive(repository, key, manifest, archive_name)
 
+    def mount(self, mountpoint, mount_options, foreground=False):
+        """Mount filesystem on *mountpoint* with *mount_options*."""
+        options = ['fsname=borgfs', 'ro']
+        if mount_options:
+            options.extend(mount_options.split(','))
+        try:
+            options.remove('allow_damaged_files')
+            self.allow_damaged_files = True
+        except ValueError:
+            pass
+        llfuse.init(self, mountpoint, options)
+        if not foreground:
+            daemonize()
+
+        # If the file system crashes, we do not want to umount because in that
+        # case the mountpoint suddenly appears to become empty. This can have
+        # nasty consequences, imagine the user has e.g. an active rsync mirror
+        # job - seeing the mountpoint empty, rsync would delete everything in the
+        # mirror.
+        umount = False
+        try:
+            signal = fuse_main()
+            umount = (signal is None)  # no crash and no signal -> umount request
+        finally:
+            llfuse.close(umount)
+
     def process_archive(self, archive, prefix=[]):
         """Build fuse inode hierarchy from archive metadata
         """
@@ -86,8 +116,19 @@ class FuseOperations(llfuse.Operations):
             _, data = self.key.decrypt(key, chunk)
             unpacker.feed(data)
             for item in unpacker:
-                segments = prefix + os.fsencode(os.path.normpath(item[b'path'])).split(b'/')
-                del item[b'path']
+                item = Item(internal_dict=item)
+                try:
+                    # This can happen if an archive was created with a command line like
+                    # $ borg create ... dir1/file dir1
+                    # In this case the code below will have created a default_dir inode for dir1 already.
+                    inode = self._find_inode(safe_encode(item.path), prefix)
+                except KeyError:
+                    pass
+                else:
+                    self.items[inode] = item
+                    continue
+                segments = prefix + os.fsencode(os.path.normpath(item.path)).split(b'/')
+                del item.path
                 num_segments = len(segments)
                 parent = 1
                 for i, segment in enumerate(segments, 1):
@@ -98,10 +139,10 @@ class FuseOperations(llfuse.Operations):
                         self.parent[archive_inode] = parent
                     # Leaf segment?
                     if i == num_segments:
-                        if b'source' in item and stat.S_ISREG(item[b'mode']):
-                            inode = self._find_inode(item[b'source'], prefix)
+                        if 'source' in item and stat.S_ISREG(item.mode):
+                            inode = self._find_inode(item.source, prefix)
                             item = self.cache.get(inode)
-                            item[b'nlink'] = item.get(b'nlink', 1) + 1
+                            item.nlink = item.get('nlink', 1) + 1
                             self.items[inode] = item
                         else:
                             inode = self.cache.add(item)
@@ -151,60 +192,47 @@ class FuseOperations(llfuse.Operations):
         item = self.get_item(inode)
         size = 0
         dsize = 0
-        try:
-            for key, chunksize, _ in item[b'chunks']:
+        if 'chunks' in item:
+            for key, chunksize, _ in item.chunks:
                 size += chunksize
                 if self.accounted_chunks.get(key, inode) == inode:
                     self.accounted_chunks[key] = inode
                     dsize += chunksize
-        except KeyError:
-            pass
         entry = llfuse.EntryAttributes()
         entry.st_ino = inode
         entry.generation = 0
         entry.entry_timeout = 300
         entry.attr_timeout = 300
-        entry.st_mode = item[b'mode']
-        entry.st_nlink = item.get(b'nlink', 1)
-        entry.st_uid = item[b'uid']
-        entry.st_gid = item[b'gid']
-        entry.st_rdev = item.get(b'rdev', 0)
+        entry.st_mode = item.mode
+        entry.st_nlink = item.get('nlink', 1)
+        entry.st_uid = item.uid
+        entry.st_gid = item.gid
+        entry.st_rdev = item.get('rdev', 0)
         entry.st_size = size
         entry.st_blksize = 512
         entry.st_blocks = dsize / 512
         # note: older archives only have mtime (not atime nor ctime)
+        mtime_ns = item.mtime
         if have_fuse_xtime_ns:
-            entry.st_mtime_ns = bigint_to_int(item[b'mtime'])
-            if b'atime' in item:
-                entry.st_atime_ns = bigint_to_int(item[b'atime'])
-            else:
-                entry.st_atime_ns = bigint_to_int(item[b'mtime'])
-            if b'ctime' in item:
-                entry.st_ctime_ns = bigint_to_int(item[b'ctime'])
-            else:
-                entry.st_ctime_ns = bigint_to_int(item[b'mtime'])
+            entry.st_mtime_ns = mtime_ns
+            entry.st_atime_ns = item.get('atime', mtime_ns)
+            entry.st_ctime_ns = item.get('ctime', mtime_ns)
         else:
-            entry.st_mtime = bigint_to_int(item[b'mtime']) / 1e9
-            if b'atime' in item:
-                entry.st_atime = bigint_to_int(item[b'atime']) / 1e9
-            else:
-                entry.st_atime = bigint_to_int(item[b'mtime']) / 1e9
-            if b'ctime' in item:
-                entry.st_ctime = bigint_to_int(item[b'ctime']) / 1e9
-            else:
-                entry.st_ctime = bigint_to_int(item[b'mtime']) / 1e9
+            entry.st_mtime = mtime_ns / 1e9
+            entry.st_atime = item.get('atime', mtime_ns) / 1e9
+            entry.st_ctime = item.get('ctime', mtime_ns) / 1e9
         return entry
 
     def listxattr(self, inode, ctx=None):
         item = self.get_item(inode)
-        return item.get(b'xattrs', {}).keys()
+        return item.get('xattrs', {}).keys()
 
     def getxattr(self, inode, name, ctx=None):
         item = self.get_item(inode)
         try:
-            return item.get(b'xattrs', {})[name]
+            return item.get('xattrs', {})[name]
         except KeyError:
-            raise llfuse.FUSEError(errno.ENODATA) from None
+            raise llfuse.FUSEError(llfuse.ENOATTR) from None
 
     def _load_pending_archive(self, inode):
         # Check if this is an archive we need to load
@@ -225,6 +253,15 @@ class FuseOperations(llfuse.Operations):
         return self.getattr(inode)
 
     def open(self, inode, flags, ctx=None):
+        if not self.allow_damaged_files:
+            item = self.get_item(inode)
+            if 'chunks_healthy' in item:
+                # Processed archive items don't carry the path anymore; for converting the inode
+                # to the path we'd either have to store the inverse of the current structure,
+                # or search the entire archive. So we just don't print it. It's easy to correlate anyway.
+                logger.warning('File has damaged (all-zero) chunks. Try running borg check --repair. '
+                               'Mount with allow_damaged_files to read damaged files.')
+                raise llfuse.FUSEError(errno.EIO)
         return inode
 
     def opendir(self, inode, ctx=None):
@@ -234,7 +271,7 @@ class FuseOperations(llfuse.Operations):
     def read(self, fh, offset, size):
         parts = []
         item = self.get_item(fh)
-        for id, s, csize in item[b'chunks']:
+        for id, s, csize in item.chunks:
             if s < offset:
                 offset -= s
                 continue
@@ -264,24 +301,4 @@ class FuseOperations(llfuse.Operations):
 
     def readlink(self, inode, ctx=None):
         item = self.get_item(inode)
-        return os.fsencode(item[b'source'])
-
-    def mount(self, mountpoint, extra_options, foreground=False):
-        options = ['fsname=borgfs', 'ro']
-        if extra_options:
-            options.extend(extra_options.split(','))
-        llfuse.init(self, mountpoint, options)
-        if not foreground:
-            daemonize()
-
-        # If the file system crashes, we do not want to umount because in that
-        # case the mountpoint suddenly appears to become empty. This can have
-        # nasty consequences, imagine the user has e.g. an active rsync mirror
-        # job - seeing the mountpoint empty, rsync would delete everything in the
-        # mirror.
-        umount = False
-        try:
-            signal = fuse_main()
-            umount = (signal is None)  # no crash and no signal -> umount request
-        finally:
-            llfuse.close(umount)
+        return os.fsencode(item.source)

+ 2 - 3
src/borg/hashindex.pyx

@@ -18,8 +18,6 @@ cdef extern from "_hashindex.c":
     HashIndex *hashindex_read(char *path)
     HashIndex *hashindex_init(int capacity, int key_size, int value_size)
     void hashindex_free(HashIndex *index)
-    void hashindex_merge(HashIndex *index, HashIndex *other)
-    void hashindex_add(HashIndex *index, void *key, void *value)
     int hashindex_get_size(HashIndex *index)
     int hashindex_write(HashIndex *index, char *path)
     void *hashindex_get(HashIndex *index, void *key)
@@ -323,7 +321,8 @@ cdef class ChunkIndex(IndexBase):
             values[1] = data[1]
             values[2] = data[2]
         else:
-            hashindex_set(self.index, key, data)
+            if not hashindex_set(self.index, key, data):
+                raise Exception('hashindex_set failed')
 
     def merge(self, ChunkIndex other):
         cdef void *key = NULL

+ 123 - 78
src/borg/helpers.py

@@ -5,14 +5,17 @@ import os
 import os.path
 import platform
 import re
+import signal
 import socket
 import sys
 import stat
 import textwrap
 import time
 import unicodedata
+import uuid
 from binascii import hexlify
 from collections import namedtuple, deque
+from contextlib import contextmanager
 from datetime import datetime, timezone, timedelta
 from fnmatch import translate
 from functools import wraps, partial
@@ -68,18 +71,6 @@ class ErrorWithTraceback(Error):
     traceback = True
 
 
-class InternalOSError(Error):
-    """Error while accessing repository: [Errno {}] {}: {}"""
-
-    def __init__(self, os_error):
-        self.errno = os_error.errno
-        self.strerror = os_error.strerror
-        self.filename = os_error.filename
-
-    def get_message(self):
-        return self.__doc__.format(self.errno, self.strerror, self.filename)
-
-
 class IntegrityError(ErrorWithTraceback):
     """Data integrity error"""
 
@@ -88,6 +79,14 @@ class ExtensionModuleError(Error):
     """The Borg binary extension modules do not seem to be properly installed"""
 
 
+class NoManifestError(Error):
+    """Repository has no manifest."""
+
+
+class PlaceholderError(Error):
+    """Formatting Error: "{}".format({}): {}({})"""
+
+
 def check_extension_modules():
     from . import platform
     if hashindex.API_VERSION != 2:
@@ -104,11 +103,12 @@ class Manifest:
 
     MANIFEST_ID = b'\0' * 32
 
-    def __init__(self, key, repository):
+    def __init__(self, key, repository, item_keys=None):
         self.archives = {}
         self.config = {}
         self.key = key
         self.repository = repository
+        self.item_keys = frozenset(item_keys) if item_keys is not None else ITEM_KEYS
 
     @property
     def id_str(self):
@@ -117,7 +117,11 @@ class Manifest:
     @classmethod
     def load(cls, repository, key=None):
         from .key import key_factory
-        cdata = repository.get(cls.MANIFEST_ID)
+        from .repository import Repository
+        try:
+            cdata = repository.get(cls.MANIFEST_ID)
+        except Repository.ObjectNotFound:
+            raise NoManifestError
         if not key:
             key = key_factory(repository, cdata)
         manifest = cls(key, repository)
@@ -131,6 +135,8 @@ class Manifest:
         if manifest.timestamp:
             manifest.timestamp = manifest.timestamp.decode('ascii')
         manifest.config = m[b'config']
+        # valid item keys are whatever is known in the repo or every key we know
+        manifest.item_keys = ITEM_KEYS | frozenset(key.decode() for key in m.get(b'item_keys', []))
         return manifest, key
 
     def write(self):
@@ -140,6 +146,7 @@ class Manifest:
             'archives': self.archives,
             'timestamp': self.timestamp,
             'config': self.config,
+            'item_keys': tuple(self.item_keys),
         }))
         self.id = self.key.id_hash(data)
         self.repository.put(self.MANIFEST_ID, self.key.encrypt(Chunk(data)))
@@ -516,6 +523,10 @@ def CompressionSpec(s):
     raise ValueError
 
 
+def PrefixSpec(s):
+    return replace_placeholders(s)
+
+
 def dir_is_cachedir(path):
     """Determines whether the specified path is a cache directory (and
     therefore should potentially be excluded from the backup) according to
@@ -567,18 +578,25 @@ def partial_format(format, mapping):
 
 
 def format_line(format, data):
-    # TODO: Filter out unwanted properties of str.format(), because "format" is user provided.
-
     try:
         return format.format(**data)
-    except (KeyError, ValueError) as e:
-        # this should catch format errors
-        print('Error in lineformat: "{}" - reason "{}"'.format(format, str(e)))
     except Exception as e:
-        # something unexpected, print error and raise exception
-        print('Error in lineformat: "{}" - reason "{}"'.format(format, str(e)))
-        raise
-    return ''
+        raise PlaceholderError(format, data, e.__class__.__name__, str(e))
+
+
+def replace_placeholders(text):
+    """Replace placeholders in text with their values."""
+    current_time = datetime.now()
+    data = {
+        'pid': os.getpid(),
+        'fqdn': socket.getfqdn(),
+        'hostname': socket.gethostname(),
+        'now': current_time.now(),
+        'utcnow': current_time.utcnow(),
+        'user': uid2user(os.getuid(), os.getuid()),
+        'uuid4': str(uuid.uuid4()),
+    }
+    return format_line(text, data)
 
 
 def safe_timestamp(item_timestamp_ns):
@@ -777,21 +795,8 @@ class Location:
         if not self.parse(self.orig):
             raise ValueError
 
-    def preformat_text(self, text):
-        """Format repository and archive path with common tags"""
-        current_time = datetime.now()
-        data = {
-            'pid': os.getpid(),
-            'fqdn': socket.getfqdn(),
-            'hostname': socket.gethostname(),
-            'now': current_time.now(),
-            'utcnow': current_time.utcnow(),
-            'user': uid2user(getuid(), getuid())
-            }
-        return format_line(text, data)
-
     def parse(self, text):
-        text = self.preformat_text(text)
+        text = replace_placeholders(text)
         valid = self._parse(text)
         if valid:
             return True
@@ -995,8 +1000,7 @@ def yes(msg=None, false_msg=None, true_msg=None, default_msg=None,
         retry_msg=None, invalid_msg=None, env_msg=None,
         falsish=FALSISH, truish=TRUISH, defaultish=DEFAULTISH,
         default=False, retry=True, env_var_override=None, ofile=None, input=input):
-    """
-    Output <msg> (usually a question) and let user input an answer.
+    """Output <msg> (usually a question) and let user input an answer.
     Qualifies the answer according to falsish, truish and defaultish as True, False or <default>.
     If it didn't qualify and retry_msg is None (no retries wanted),
     return the default [which defaults to False]. Otherwise let user retry
@@ -1180,7 +1184,7 @@ def log_multi(*msgs, level=logging.INFO, logger=logger):
     """
     log multiple lines of text, each line by a separate logging call for cosmetic reasons
 
-    each positional argument may be a single or multiple lines (separated by \n) of text.
+    each positional argument may be a single or multiple lines (separated by newlines) of text.
     """
     lines = []
     for msg in msgs:
@@ -1189,7 +1193,7 @@ def log_multi(*msgs, level=logging.INFO, logger=logger):
         logger.log(level, line)
 
 
-class ItemFormatter:
+class BaseFormatter:
     FIXED_KEYS = {
         # Formatting aids
         'LF': '\n',
@@ -1200,19 +1204,54 @@ class ItemFormatter:
         'NEWLINE': os.linesep,
         'NL': os.linesep,
     }
+
+    def get_item_data(self, item):
+        raise NotImplementedError
+
+    def format_item(self, item):
+        return self.format.format_map(self.get_item_data(item))
+
+    @staticmethod
+    def keys_help():
+        return " - NEWLINE: OS dependent line separator\n" \
+               " - NL: alias of NEWLINE\n" \
+               " - NUL: NUL character for creating print0 / xargs -0 like output, see barchive/bpath\n" \
+               " - SPACE\n" \
+               " - TAB\n" \
+               " - CR\n" \
+               " - LF"
+
+
+class ArchiveFormatter(BaseFormatter):
+
+    def __init__(self, format):
+        self.format = partial_format(format, self.FIXED_KEYS)
+
+    def get_item_data(self, archive):
+        return {
+            'barchive': archive.name,
+            'archive': remove_surrogates(archive.name),
+            'id': bin_to_hex(archive.id),
+            'time': format_time(to_localtime(archive.ts)),
+        }
+
+    @staticmethod
+    def keys_help():
+        return " - archive: archive name interpreted as text (might be missing non-text characters, see barchive)\n" \
+               " - barchive: verbatim archive name, can contain any character except NUL\n" \
+               " - time: time of creation of the archive\n" \
+               " - id: internal ID of the archive"
+
+
+class ItemFormatter(BaseFormatter):
     KEY_DESCRIPTIONS = {
         'bpath': 'verbatim POSIX path, can contain any character except NUL',
         'path': 'path interpreted as text (might be missing non-text characters, see bpath)',
         'source': 'link target for links (identical to linktarget)',
         'extra': 'prepends {source} with " -> " for soft links and " link to " for hard links',
-
         'csize': 'compressed size',
         'num_chunks': 'number of chunks in this file',
         'unique_chunks': 'number of unique chunks in this file',
-
-        'NEWLINE': 'OS dependent line separator',
-        'NL': 'alias of NEWLINE',
-        'NUL': 'NUL character for creating print0 / xargs -0 like ouput, see bpath',
     }
     KEY_GROUPS = (
         ('type', 'mode', 'uid', 'gid', 'user', 'group', 'path', 'bpath', 'source', 'linktarget', 'flags'),
@@ -1220,7 +1259,6 @@ class ItemFormatter:
         ('mtime', 'ctime', 'atime', 'isomtime', 'isoctime', 'isoatime'),
         tuple(sorted(hashlib.algorithms_guaranteed)),
         ('archiveid', 'archivename', 'extra'),
-        ('NEWLINE', 'NL', 'NUL', 'SPACE', 'TAB', 'CR', 'LF'),
     )
 
     @classmethod
@@ -1228,10 +1266,8 @@ class ItemFormatter:
         class FakeArchive:
             fpr = name = ""
 
-        fake_item = {
-            b'mode': 0, b'path': '', b'user': '', b'group': '', b'mtime': 0,
-            b'uid': 0, b'gid': 0,
-        }
+        from .item import Item
+        fake_item = Item(mode=0, path='', user='', group='', mtime=0, uid=0, gid=0)
         formatter = cls(FakeArchive, "")
         keys = []
         keys.extend(formatter.call_keys.keys())
@@ -1242,6 +1278,9 @@ class ItemFormatter:
     def keys_help(cls):
         help = []
         keys = cls.available_keys()
+        for key in cls.FIXED_KEYS:
+            keys.remove(key)
+
         for group in cls.KEY_GROUPS:
             for key in group:
                 keys.remove(key)
@@ -1267,12 +1306,12 @@ class ItemFormatter:
             'csize': self.calculate_csize,
             'num_chunks': self.calculate_num_chunks,
             'unique_chunks': self.calculate_unique_chunks,
-            'isomtime': partial(self.format_time, b'mtime'),
-            'isoctime': partial(self.format_time, b'ctime'),
-            'isoatime': partial(self.format_time, b'atime'),
-            'mtime': partial(self.time, b'mtime'),
-            'ctime': partial(self.time, b'ctime'),
-            'atime': partial(self.time, b'atime'),
+            'isomtime': partial(self.format_time, 'mtime'),
+            'isoctime': partial(self.format_time, 'ctime'),
+            'isoatime': partial(self.format_time, 'atime'),
+            'mtime': partial(self.time, 'mtime'),
+            'ctime': partial(self.time, 'ctime'),
+            'atime': partial(self.time, 'atime'),
         }
         for hash_function in hashlib.algorithms_guaranteed:
             self.add_key(hash_function, partial(self.hash_item, hash_function))
@@ -1284,11 +1323,11 @@ class ItemFormatter:
         self.used_call_keys = set(self.call_keys) & self.format_keys
 
     def get_item_data(self, item):
-        mode = stat.filemode(item[b'mode'])
+        mode = stat.filemode(item.mode)
         item_type = mode[0]
         item_data = self.item_data
 
-        source = item.get(b'source', '')
+        source = item.get('source', '')
         extra = ''
         if source:
             source = remove_surrogates(source)
@@ -1299,49 +1338,46 @@ class ItemFormatter:
                 extra = ' link to %s' % source
         item_data['type'] = item_type
         item_data['mode'] = mode
-        item_data['user'] = item[b'user'] or item[b'uid']
-        item_data['group'] = item[b'group'] or item[b'gid']
-        item_data['uid'] = item[b'uid']
-        item_data['gid'] = item[b'gid']
-        item_data['path'] = remove_surrogates(item[b'path'])
-        item_data['bpath'] = item[b'path']
+        item_data['user'] = item.user or item.uid
+        item_data['group'] = item.group or item.gid
+        item_data['uid'] = item.uid
+        item_data['gid'] = item.gid
+        item_data['path'] = remove_surrogates(item.path)
+        item_data['bpath'] = item.path
         item_data['source'] = source
         item_data['linktarget'] = source
         item_data['extra'] = extra
-        item_data['flags'] = item.get(b'bsdflags')
+        item_data['flags'] = item.get('bsdflags')
         for key in self.used_call_keys:
             item_data[key] = self.call_keys[key](item)
         return item_data
 
-    def format_item(self, item):
-        return self.format.format_map(self.get_item_data(item))
-
     def calculate_num_chunks(self, item):
-        return len(item.get(b'chunks', []))
+        return len(item.get('chunks', []))
 
     def calculate_unique_chunks(self, item):
         chunk_index = self.archive.cache.chunks
-        return sum(1 for c in item.get(b'chunks', []) if chunk_index[c.id].refcount == 1)
+        return sum(1 for c in item.get('chunks', []) if chunk_index[c.id].refcount == 1)
 
     def calculate_size(self, item):
-        return sum(c.size for c in item.get(b'chunks', []))
+        return sum(c.size for c in item.get('chunks', []))
 
     def calculate_csize(self, item):
-        return sum(c.csize for c in item.get(b'chunks', []))
+        return sum(c.csize for c in item.get('chunks', []))
 
     def hash_item(self, hash_function, item):
-        if b'chunks' not in item:
+        if 'chunks' not in item:
             return ""
         hash = hashlib.new(hash_function)
-        for _, data in self.archive.pipeline.fetch_many([c.id for c in item[b'chunks']]):
+        for _, data in self.archive.pipeline.fetch_many([c.id for c in item.chunks]):
             hash.update(data)
         return hash.hexdigest()
 
     def format_time(self, key, item):
-        return format_time(safe_timestamp(item.get(key) or item[b'mtime']))
+        return format_time(safe_timestamp(item.get(key) or item.mtime))
 
     def time(self, key, item):
-        return safe_timestamp(item.get(key) or item[b'mtime'])
+        return safe_timestamp(item.get(key) or item.mtime)
 
 
 class ChunkIteratorFileWrapper:
@@ -1385,7 +1421,7 @@ class ChunkIteratorFileWrapper:
 
 def open_item(archive, item):
     """Return file-like object for archived item (with chunks)."""
-    chunk_iterator = archive.pipeline.fetch_many([c.id for c in item[b'chunks']])
+    chunk_iterator = archive.pipeline.fetch_many([c.id for c in item.chunks])
     return ChunkIteratorFileWrapper(chunk_iterator)
 
 
@@ -1572,3 +1608,12 @@ class CompressionDecider2:
         compr_args.update(compr_spec)
         logger.debug("len(data) == %d, len(lz4(data)) == %d, choosing %s", data_len, cdata_len, compr_spec)
         return compr_args, Chunk(data, **meta)
+
+
+@contextmanager
+def signal_handler(signo, handler):
+    old_signal_handler = signal.signal(signo, handler)
+    try:
+        yield
+    finally:
+        signal.signal(signo, old_signal_handler)

+ 79 - 18
src/borg/item.py

@@ -21,25 +21,34 @@ class PropDict:
 
     __slots__ = ("_dict", )  # avoid setting attributes not supported by properties
 
-    def __init__(self, data_dict=None, **kw):
+    def __init__(self, data_dict=None, internal_dict=None, **kw):
         if data_dict is None:
             data = kw
         elif not isinstance(data_dict, dict):
             raise TypeError("data_dict must be dict")
         else:
             data = data_dict
-        # internally, we want an dict with only str-typed keys
-        _dict = {}
-        for k, v in data.items():
+        self._dict = {}
+        self.update_internal(internal_dict or {})
+        self.update(data)
+
+    def update(self, d):
+        for k, v in d.items():
             if isinstance(k, bytes):
                 k = k.decode()
-            elif not isinstance(k, str):
-                raise TypeError("dict keys must be str or bytes, not %r" % k)
-            _dict[k] = v
-        unknown_keys = set(_dict) - self.VALID_KEYS
-        if unknown_keys:
-            raise ValueError("dict contains unknown keys %s" % ','.join(unknown_keys))
-        self._dict = _dict
+            setattr(self, self._check_key(k), v)
+
+    def update_internal(self, d):
+        for k, v in d.items():
+            if isinstance(k, bytes):
+                k = k.decode()
+            self._dict[k] = v
+
+    def __eq__(self, other):
+        return self.as_dict() == other.as_dict()
+
+    def __repr__(self):
+        return '%s(internal_dict=%r)' % (self.__class__.__name__, self._dict)
 
     def as_dict(self):
         """return the internal dictionary"""
@@ -110,7 +119,7 @@ class Item(PropDict):
     If an Item shall be serialized, give as_dict() method output to msgpack packer.
     """
 
-    VALID_KEYS = set(key.decode() for key in ITEM_KEYS)  # we want str-typed keys
+    VALID_KEYS = ITEM_KEYS | {'deleted', 'nlink', }  # str-typed keys
 
     __slots__ = ("_dict", )  # avoid setting attributes not supported by properties
 
@@ -118,14 +127,14 @@ class Item(PropDict):
 
     path = PropDict._make_property('path', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
     source = PropDict._make_property('source', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
-    acl_access = PropDict._make_property('acl_access', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
-    acl_default = PropDict._make_property('acl_default', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
-    acl_extended = PropDict._make_property('acl_extended', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
-    acl_nfs4 = PropDict._make_property('acl_nfs4', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
-
     user = PropDict._make_property('user', (str, type(None)), 'surrogate-escaped str or None', encode=safe_encode, decode=safe_decode)
     group = PropDict._make_property('group', (str, type(None)), 'surrogate-escaped str or None', encode=safe_encode, decode=safe_decode)
 
+    acl_access = PropDict._make_property('acl_access', bytes)
+    acl_default = PropDict._make_property('acl_default', bytes)
+    acl_extended = PropDict._make_property('acl_extended', bytes)
+    acl_nfs4 = PropDict._make_property('acl_nfs4', bytes)
+
     mode = PropDict._make_property('mode', int)
     uid = PropDict._make_property('uid', int)
     gid = PropDict._make_property('gid', int)
@@ -138,6 +147,58 @@ class Item(PropDict):
 
     hardlink_master = PropDict._make_property('hardlink_master', bool)
 
-    chunks = PropDict._make_property('chunks', list)
+    chunks = PropDict._make_property('chunks', (list, type(None)), 'list or None')
+    chunks_healthy = PropDict._make_property('chunks_healthy', (list, type(None)), 'list or None')
 
     xattrs = PropDict._make_property('xattrs', StableDict)
+
+    deleted = PropDict._make_property('deleted', bool)
+    nlink = PropDict._make_property('nlink', int)
+
+
+class EncryptedKey(PropDict):
+    """
+    EncryptedKey abstraction that deals with validation and the low-level details internally:
+
+    A EncryptedKey is created either from msgpack unpacker output, from another dict, from kwargs or
+    built step-by-step by setting attributes.
+
+    msgpack gives us a dict with bytes-typed keys, just give it to EncryptedKey(d) and use enc_key.xxx later.
+
+    If a EncryptedKey shall be serialized, give as_dict() method output to msgpack packer.
+    """
+
+    VALID_KEYS = {'version', 'algorithm', 'iterations', 'salt', 'hash', 'data'}  # str-typed keys
+
+    __slots__ = ("_dict", )  # avoid setting attributes not supported by properties
+
+    version = PropDict._make_property('version', int)
+    algorithm = PropDict._make_property('algorithm', str, encode=str.encode, decode=bytes.decode)
+    iterations = PropDict._make_property('iterations', int)
+    salt = PropDict._make_property('salt', bytes)
+    hash = PropDict._make_property('hash', bytes)
+    data = PropDict._make_property('data', bytes)
+
+
+class Key(PropDict):
+    """
+    Key abstraction that deals with validation and the low-level details internally:
+
+    A Key is created either from msgpack unpacker output, from another dict, from kwargs or
+    built step-by-step by setting attributes.
+
+    msgpack gives us a dict with bytes-typed keys, just give it to Key(d) and use key.xxx later.
+
+    If a Key shall be serialized, give as_dict() method output to msgpack packer.
+    """
+
+    VALID_KEYS = {'version', 'repository_id', 'enc_key', 'enc_hmac_key', 'id_key', 'chunk_seed'}  # str-typed keys
+
+    __slots__ = ("_dict", )  # avoid setting attributes not supported by properties
+
+    version = PropDict._make_property('version', int)
+    repository_id = PropDict._make_property('repository_id', bytes)
+    enc_key = PropDict._make_property('enc_key', bytes)
+    enc_hmac_key = PropDict._make_property('enc_hmac_key', bytes)
+    id_key = PropDict._make_property('id_key', bytes)
+    chunk_seed = PropDict._make_property('chunk_seed', int)

+ 34 - 31
src/borg/key.py

@@ -21,6 +21,7 @@ from .helpers import yes
 from .helpers import get_keys_dir
 from .helpers import bin_to_hex
 from .helpers import CompressionDecider2, CompressionSpec
+from .item import Key, EncryptedKey
 
 
 PREFIX = b'\0' * 8
@@ -341,24 +342,26 @@ class KeyfileKeyBase(AESKeyBase):
         cdata = a2b_base64(key_data)
         data = self.decrypt_key_file(cdata, passphrase)
         if data:
-            key = msgpack.unpackb(data)
-            if key[b'version'] != 1:
+            data = msgpack.unpackb(data)
+            key = Key(internal_dict=data)
+            if key.version != 1:
                 raise IntegrityError('Invalid key file header')
-            self.repository_id = key[b'repository_id']
-            self.enc_key = key[b'enc_key']
-            self.enc_hmac_key = key[b'enc_hmac_key']
-            self.id_key = key[b'id_key']
-            self.chunk_seed = key[b'chunk_seed']
+            self.repository_id = key.repository_id
+            self.enc_key = key.enc_key
+            self.enc_hmac_key = key.enc_hmac_key
+            self.id_key = key.id_key
+            self.chunk_seed = key.chunk_seed
             return True
         return False
 
     def decrypt_key_file(self, data, passphrase):
-        d = msgpack.unpackb(data)
-        assert d[b'version'] == 1
-        assert d[b'algorithm'] == b'sha256'
-        key = passphrase.kdf(d[b'salt'], d[b'iterations'], 32)
-        data = AES(is_encrypt=False, key=key).decrypt(d[b'data'])
-        if hmac_sha256(key, data) == d[b'hash']:
+        data = msgpack.unpackb(data)
+        enc_key = EncryptedKey(internal_dict=data)
+        assert enc_key.version == 1
+        assert enc_key.algorithm == 'sha256'
+        key = passphrase.kdf(enc_key.salt, enc_key.iterations, 32)
+        data = AES(is_encrypt=False, key=key).decrypt(enc_key.data)
+        if hmac_sha256(key, data) == enc_key.hash:
             return data
 
     def encrypt_key_file(self, data, passphrase):
@@ -367,26 +370,26 @@ class KeyfileKeyBase(AESKeyBase):
         key = passphrase.kdf(salt, iterations, 32)
         hash = hmac_sha256(key, data)
         cdata = AES(is_encrypt=True, key=key).encrypt(data)
-        d = {
-            'version': 1,
-            'salt': salt,
-            'iterations': iterations,
-            'algorithm': 'sha256',
-            'hash': hash,
-            'data': cdata,
-        }
-        return msgpack.packb(d)
+        enc_key = EncryptedKey(
+            version=1,
+            salt=salt,
+            iterations=iterations,
+            algorithm='sha256',
+            hash=hash,
+            data=cdata,
+        )
+        return msgpack.packb(enc_key.as_dict())
 
     def _save(self, passphrase):
-        key = {
-            'version': 1,
-            'repository_id': self.repository_id,
-            'enc_key': self.enc_key,
-            'enc_hmac_key': self.enc_hmac_key,
-            'id_key': self.id_key,
-            'chunk_seed': self.chunk_seed,
-        }
-        data = self.encrypt_key_file(msgpack.packb(key), passphrase)
+        key = Key(
+            version=1,
+            repository_id=self.repository_id,
+            enc_key=self.enc_key,
+            enc_hmac_key=self.enc_hmac_key,
+            id_key=self.id_key,
+            chunk_seed=self.chunk_seed,
+        )
+        data = self.encrypt_key_file(msgpack.packb(key.as_dict()), passphrase)
         key_data = '\n'.join(textwrap.wrap(b2a_base64(data).decode('ascii')))
         return key_data
 

+ 10 - 6
src/borg/locking.py

@@ -101,9 +101,11 @@ class NotMyLock(LockErrorT):
 class ExclusiveLock:
     """An exclusive Lock based on mkdir fs operation being atomic.
 
-    If possible, try to use the contextmanager here like:
-    with ExclusiveLock(...) as lock:
-        ...
+    If possible, try to use the contextmanager here like::
+
+        with ExclusiveLock(...) as lock:
+            ...
+
     This makes sure the lock is released again if the block is left, no
     matter how (e.g. if an exception occurred).
     """
@@ -222,9 +224,11 @@ class UpgradableLock:
     noone is allowed reading) and read access to a resource needs a shared
     lock (multiple readers are allowed).
 
-    If possible, try to use the contextmanager here like:
-    with UpgradableLock(...) as lock:
-        ...
+    If possible, try to use the contextmanager here like::
+
+        with UpgradableLock(...) as lock:
+            ...
+
     This makes sure the lock is released again if the block is left, no
     matter how (e.g. if an exception occurred).
     """

+ 7 - 1
src/borg/platform/base.py

@@ -1,3 +1,4 @@
+import errno
 import os
 
 """
@@ -52,6 +53,11 @@ def sync_dir(path):
     fd = os.open(path, os.O_RDONLY)
     try:
         os.fsync(fd)
+    except OSError as os_error:
+        # Some network filesystems don't support this and fail with EINVAL.
+        # Other error codes (e.g. EIO) shouldn't be silenced.
+        if os_error.errno != errno.EINVAL:
+            raise
     finally:
         os.close(fd)
 
@@ -75,7 +81,7 @@ class SyncFile:
     """
 
     def __init__(self, path):
-        self.fd = open(path, 'wb')
+        self.fd = open(path, 'xb')
         self.fileno = self.fd.fileno()
 
     def __enter__(self):

+ 12 - 14
src/borg/platform/darwin.pyx

@@ -62,9 +62,9 @@ def acl_get(path, item, st, numeric_owner=False):
         if text == NULL:
             return
         if numeric_owner:
-            item[b'acl_extended'] = _remove_non_numeric_identifier(text)
+            item['acl_extended'] = _remove_non_numeric_identifier(text)
         else:
-            item[b'acl_extended'] = text
+            item['acl_extended'] = text
     finally:
         acl_free(text)
         acl_free(acl)
@@ -72,18 +72,16 @@ def acl_get(path, item, st, numeric_owner=False):
 
 def acl_set(path, item, numeric_owner=False):
     cdef acl_t acl = NULL
-    try:
+    acl_text = item.get('acl_extended')
+    if acl_text is not None:
         try:
             if numeric_owner:
-                acl = acl_from_text(item[b'acl_extended'])
+                acl = acl_from_text(acl_text)
             else:
-                acl = acl_from_text(<bytes>_remove_numeric_id_if_possible(item[b'acl_extended']))
-        except KeyError:
-            return
-        if acl == NULL:
-            return
-        if acl_set_link_np(<bytes>os.fsencode(path), ACL_TYPE_EXTENDED, acl):
-            return
-    finally:
-        acl_free(acl)
-
+                acl = acl_from_text(<bytes>_remove_numeric_id_if_possible(acl_text))
+            if acl == NULL:
+                return
+            if acl_set_link_np(<bytes>os.fsencode(path), ACL_TYPE_EXTENDED, acl):
+                return
+        finally:
+            acl_free(acl)

+ 6 - 6
src/borg/platform/freebsd.pyx

@@ -57,10 +57,10 @@ def acl_get(path, item, st, numeric_owner=False):
         return
     flags |= ACL_TEXT_NUMERIC_IDS if numeric_owner else 0
     if ret > 0:
-        _get_acl(p, ACL_TYPE_NFS4, item, b'acl_nfs4', flags)
+        _get_acl(p, ACL_TYPE_NFS4, item, 'acl_nfs4', flags)
     else:
-        _get_acl(p, ACL_TYPE_ACCESS, item, b'acl_access', flags)
-        _get_acl(p, ACL_TYPE_DEFAULT, item, b'acl_default', flags)
+        _get_acl(p, ACL_TYPE_ACCESS, item, 'acl_access', flags)
+        _get_acl(p, ACL_TYPE_DEFAULT, item, 'acl_default', flags)
 
 
 cdef _set_acl(p, type, item, attribute, numeric_owner=False):
@@ -98,6 +98,6 @@ def acl_set(path, item, numeric_owner=False):
     of the user/group names
     """
     p = os.fsencode(path)
-    _set_acl(p, ACL_TYPE_NFS4, item, b'acl_nfs4', numeric_owner)
-    _set_acl(p, ACL_TYPE_ACCESS, item, b'acl_access', numeric_owner)
-    _set_acl(p, ACL_TYPE_DEFAULT, item, b'acl_default', numeric_owner)
+    _set_acl(p, ACL_TYPE_NFS4, item, 'acl_nfs4', numeric_owner)
+    _set_acl(p, ACL_TYPE_ACCESS, item, 'acl_access', numeric_owner)
+    _set_acl(p, ACL_TYPE_DEFAULT, item, 'acl_default', numeric_owner)

+ 5 - 5
src/borg/platform/linux.pyx

@@ -171,12 +171,12 @@ def acl_get(path, item, st, numeric_owner=False):
         if access_acl:
             access_text = acl_to_text(access_acl, NULL)
             if access_text:
-                item[b'acl_access'] = converter(access_text)
+                item['acl_access'] = converter(access_text)
         default_acl = acl_get_file(p, ACL_TYPE_DEFAULT)
         if default_acl:
             default_text = acl_to_text(default_acl, NULL)
             if default_text:
-                item[b'acl_default'] = converter(default_text)
+                item['acl_default'] = converter(default_text)
     finally:
         acl_free(default_text)
         acl_free(default_acl)
@@ -193,8 +193,8 @@ def acl_set(path, item, numeric_owner=False):
         converter = posix_acl_use_stored_uid_gid
     else:
         converter = acl_use_local_uid_gid
-    access_text = item.get(b'acl_access')
-    default_text = item.get(b'acl_default')
+    access_text = item.get('acl_access')
+    default_text = item.get('acl_default')
     if access_text:
         try:
             access_acl = acl_from_text(<bytes>converter(access_text))
@@ -214,7 +214,7 @@ cdef _sync_file_range(fd, offset, length, flags):
     assert offset & PAGE_MASK == 0, "offset %d not page-aligned" % offset
     assert length & PAGE_MASK == 0, "length %d not page-aligned" % length
     if sync_file_range(fd, offset, length, flags) != 0:
-        raise OSError(errno, os.strerror(errno))
+        raise OSError(errno.errno, os.strerror(errno.errno))
     os.posix_fadvise(fd, offset, length, os.POSIX_FADV_DONTNEED)
 
 cdef unsigned PAGE_MASK = resource.getpagesize() - 1

+ 35 - 22
src/borg/remote.py

@@ -60,9 +60,10 @@ class RepositoryServer:  # pragma: no cover
         'break_lock',
     )
 
-    def __init__(self, restrict_to_paths):
+    def __init__(self, restrict_to_paths, append_only):
         self.repository = None
         self.restrict_to_paths = restrict_to_paths
+        self.append_only = append_only
 
     def serve(self):
         stdin_fd = sys.stdin.fileno()
@@ -129,7 +130,7 @@ class RepositoryServer:  # pragma: no cover
                     break
             else:
                 raise PathNotAllowed(path)
-        self.repository = Repository(path, create, lock_wait=lock_wait, lock=lock)
+        self.repository = Repository(path, create, lock_wait=lock_wait, lock=lock, append_only=self.append_only)
         self.repository.__enter__()  # clean exit handled by serve() method
         return self.repository.id
 
@@ -159,6 +160,7 @@ class RemoteRepository:
             # pyinstaller binary adds LD_LIBRARY_PATH=/tmp/_ME... but we do not want
             # that the system's ssh binary picks up (non-matching) libraries from there
             env.pop('LD_LIBRARY_PATH', None)
+        env.pop('BORG_PASSPHRASE', None)  # security: do not give secrets to subprocess
         self.p = Popen(borg_cmd, bufsize=0, stdin=PIPE, stdout=PIPE, stderr=PIPE, env=env)
         self.stdin_fd = self.p.stdin.fileno()
         self.stdout_fd = self.p.stdout.fileno()
@@ -194,9 +196,14 @@ class RemoteRepository:
         return self
 
     def __exit__(self, exc_type, exc_val, exc_tb):
-        if exc_type is not None:
-            self.rollback()
-        self.close()
+        try:
+            if exc_type is not None:
+                self.rollback()
+        finally:
+            # in any case, we want to cleanly close the repo, even if the
+            # rollback can not succeed (e.g. because the connection was
+            # already closed) and raised another exception:
+            self.close()
 
     @property
     def id_str(self):
@@ -224,7 +231,8 @@ class RemoteRepository:
         if testing:
             return [sys.executable, '-m', 'borg.archiver', 'serve'] + opts + self.extra_test_args
         else:  # pragma: no cover
-            return [args.remote_path, 'serve'] + opts
+            remote_path = args.remote_path or os.environ.get('BORG_REMOTE_PATH', 'borg')
+            return [remote_path, 'serve'] + opts
 
     def ssh_cmd(self, location):
         """return a ssh command line that can be prefixed to a borg command line"""
@@ -251,6 +259,24 @@ class RemoteRepository:
                 del self.cache[args]
             return msgid
 
+        def handle_error(error, res):
+            if error == b'DoesNotExist':
+                raise Repository.DoesNotExist(self.location.orig)
+            elif error == b'AlreadyExists':
+                raise Repository.AlreadyExists(self.location.orig)
+            elif error == b'CheckNeeded':
+                raise Repository.CheckNeeded(self.location.orig)
+            elif error == b'IntegrityError':
+                raise IntegrityError(res)
+            elif error == b'PathNotAllowed':
+                raise PathNotAllowed(*res)
+            elif error == b'ObjectNotFound':
+                raise Repository.ObjectNotFound(res[0], self.location.orig)
+            elif error == b'InvalidRPCMethod':
+                raise InvalidRPCMethod(*res)
+            else:
+                raise self.RPCError(res.decode('utf-8'))
+
         calls = list(calls)
         waiting_for = []
         while wait or calls:
@@ -259,22 +285,7 @@ class RemoteRepository:
                     error, res = self.responses.pop(waiting_for[0])
                     waiting_for.pop(0)
                     if error:
-                        if error == b'DoesNotExist':
-                            raise Repository.DoesNotExist(self.location.orig)
-                        elif error == b'AlreadyExists':
-                            raise Repository.AlreadyExists(self.location.orig)
-                        elif error == b'CheckNeeded':
-                            raise Repository.CheckNeeded(self.location.orig)
-                        elif error == b'IntegrityError':
-                            raise IntegrityError(res)
-                        elif error == b'PathNotAllowed':
-                            raise PathNotAllowed(*res)
-                        elif error == b'ObjectNotFound':
-                            raise Repository.ObjectNotFound(res[0], self.location.orig)
-                        elif error == b'InvalidRPCMethod':
-                            raise InvalidRPCMethod(*res)
-                        else:
-                            raise self.RPCError(res.decode('utf-8'))
+                        handle_error(error, res)
                     else:
                         yield res
                         if not waiting_for and not calls:
@@ -300,6 +311,8 @@ class RemoteRepository:
                         type, msgid, error, res = unpacked
                         if msgid in self.ignore_responses:
                             self.ignore_responses.remove(msgid)
+                            if error:
+                                handle_error(error, res)
                         else:
                             self.responses[msgid] = error, res
                 elif fd is self.stderr_fd:

+ 16 - 12
src/borg/repository.py

@@ -17,7 +17,7 @@ logger = logging.getLogger(__name__)
 
 from .constants import *  # NOQA
 from .hashindex import NSIndex
-from .helpers import Error, ErrorWithTraceback, IntegrityError, InternalOSError
+from .helpers import Error, ErrorWithTraceback, IntegrityError
 from .helpers import Location
 from .helpers import ProgressIndicatorPercent
 from .helpers import bin_to_hex
@@ -96,7 +96,7 @@ class Repository:
     class ObjectNotFound(ErrorWithTraceback):
         """Object with key {} not found in repository {}."""
 
-    def __init__(self, path, create=False, exclusive=False, lock_wait=None, lock=True):
+    def __init__(self, path, create=False, exclusive=False, lock_wait=None, lock=True, append_only=False):
         self.path = os.path.abspath(path)
         self._location = Location('file://%s' % self.path)
         self.io = None
@@ -107,6 +107,7 @@ class Repository:
         self.do_lock = lock
         self.do_create = create
         self.exclusive = exclusive
+        self.append_only = append_only
 
     def __del__(self):
         if self.lock:
@@ -125,6 +126,12 @@ class Repository:
 
     def __exit__(self, exc_type, exc_val, exc_tb):
         if exc_type is not None:
+            no_space_left_on_device = exc_type is OSError and exc_val.errno == errno.ENOSPC
+            # The ENOSPC could have originated somewhere else besides the Repository. The cleanup is always safe, unless
+            # EIO or FS corruption ensues, which is why we specifically check for ENOSPC.
+            if self._active_txn and no_space_left_on_device:
+                logger.warning('No space left on device, cleaning up partial transaction to free space.')
+                self.io.cleanup(self.io.get_segments_transaction_id())
             self.rollback()
         self.close()
 
@@ -176,7 +183,9 @@ class Repository:
         shutil.rmtree(self.path)
 
     def get_index_transaction_id(self):
-        indices = sorted((int(name[6:]) for name in os.listdir(self.path) if name.startswith('index.') and name[6:].isdigit()))
+        indices = sorted(int(fn[6:])
+                         for fn in os.listdir(self.path)
+                         if fn.startswith('index.') and fn[6:].isdigit() and os.stat(os.path.join(self.path, fn)).st_size != 0)
         if indices:
             return indices[-1]
         else:
@@ -217,7 +226,9 @@ class Repository:
             raise self.InvalidRepository(path)
         self.max_segment_size = self.config.getint('repository', 'max_segment_size')
         self.segments_per_dir = self.config.getint('repository', 'segments_per_dir')
-        self.append_only = self.config.getboolean('repository', 'append_only', fallback=False)
+        # append_only can be set in the constructor
+        # it shouldn't be overridden (True -> False) here
+        self.append_only = self.append_only or self.config.getboolean('repository', 'append_only', fallback=False)
         self.id = unhexlify(self.config.get('repository', 'id').strip())
         self.io = LoggedIO(self.path, self.max_segment_size, self.segments_per_dir)
 
@@ -247,18 +258,13 @@ class Repository:
         except RuntimeError as error:
             assert str(error) == 'hashindex_read failed'  # everything else means we're in *deep* trouble
             logger.warning('Repository index missing or corrupted, trying to recover')
-            try:
-                os.unlink(index_path)
-            except OSError as e:
-                raise InternalOSError(e) from None
+            os.unlink(index_path)
             if not auto_recover:
                 raise
             self.prepare_txn(self.get_transaction_id())
             # don't leave an open transaction around
             self.commit()
             return self.open_index(self.get_transaction_id())
-        except OSError as e:
-            raise InternalOSError(e) from None
 
     def prepare_txn(self, transaction_id, do_cleanup=True):
         self._active_txn = True
@@ -296,8 +302,6 @@ class Repository:
                 self.check_transaction()
                 self.prepare_txn(transaction_id)
                 return
-            except OSError as os_error:
-                raise InternalOSError(os_error) from None
             if hints[b'version'] == 1:
                 logger.debug('Upgrading from v1 hints.%d', transaction_id)
                 self.segments = hints[b'segments']

+ 2 - 2
src/borg/selftest.py

@@ -5,7 +5,7 @@ Self testing module
 The selftest() function runs a small test suite of relatively fast tests that are meant to discover issues
 with the way Borg was compiled or packaged and also bugs in Borg itself.
 
-Theses tests are a subset of the borg/testsuite and are run with Pythons built-in unittest, hence none of
+These tests are a subset of the borg/testsuite and are run with Pythons built-in unittest, hence none of
 the tests used for this can or should be ported to py.test currently.
 
 To assert that self test discovery works correctly the number of tests is kept in the SELFTEST_COUNT
@@ -30,7 +30,7 @@ SELFTEST_CASES = [
     ChunkerTestCase,
 ]
 
-SELFTEST_COUNT = 27
+SELFTEST_COUNT = 29
 
 
 class SelfTestResult(TestResult):

+ 1 - 1
src/borg/shellpattern.py

@@ -5,7 +5,7 @@ import re
 def translate(pat):
     """Translate a shell-style pattern to a regular expression.
 
-    The pattern may include "**<sep>" (<sep> stands for the platform-specific path separator; "/" on POSIX systems) for
+    The pattern may include ``**<sep>`` (<sep> stands for the platform-specific path separator; "/" on POSIX systems) for
     matching zero or more directory levels and "*" for matching zero or more arbitrary characters with the exception of
     any path separator. Wrap meta-characters in brackets for a literal match (i.e. "[?]" to match the literal character
     "?").

+ 18 - 0
src/borg/testsuite/__init__.py

@@ -117,6 +117,24 @@ class BaseTestCase(unittest.TestCase):
         for sub_diff in diff.subdirs.values():
             self._assert_dirs_equal_cmp(sub_diff)
 
+    @contextmanager
+    def fuse_mount(self, location, mountpoint, mount_options=None):
+        os.mkdir(mountpoint)
+        args = ['mount', location, mountpoint]
+        if mount_options:
+            args += '-o', mount_options
+        self.cmd(*args, fork=True)
+        self.wait_for_mount(mountpoint)
+        yield
+        if sys.platform.startswith('linux'):
+            cmd = 'fusermount -u %s' % mountpoint
+        else:
+            cmd = 'umount %s' % mountpoint
+        os.system(cmd)
+        os.rmdir(mountpoint)
+        # Give the daemon some time to exit
+        time.sleep(.2)
+
     def wait_for_mount(self, path, timeout=5):
         """Wait until a filesystem is mounted on `path`
         """

+ 67 - 9
src/borg/testsuite/archive.py

@@ -6,7 +6,9 @@ from unittest.mock import Mock
 import pytest
 import msgpack
 
-from ..archive import Archive, CacheChunkBuffer, RobustUnpacker, Statistics
+from ..archive import Archive, CacheChunkBuffer, RobustUnpacker, valid_msgpacked_dict, ITEM_KEYS, Statistics
+from ..archive import BackupOSError, backup_io, backup_io_iter
+from ..item import Item
 from ..key import PlaintextKey
 from ..helpers import Manifest
 from . import BaseTestCase
@@ -38,12 +40,12 @@ def tests_stats_progress(stats, columns=80):
 
     out = StringIO()
     stats.update(10**3, 0, unique=False)
-    stats.show_progress(item={b'path': 'foo'}, final=False, stream=out)
+    stats.show_progress(item=Item(path='foo'), final=False, stream=out)
     s = '1.02 kB O 10 B C 10 B D 0 N foo'
     buf = ' ' * (columns - len(s))
     assert out.getvalue() == s + buf + "\r"
     out = StringIO()
-    stats.show_progress(item={b'path': 'foo'*40}, final=False, stream=out)
+    stats.show_progress(item=Item(path='foo'*40), final=False, stream=out)
     s = '1.02 kB O 10 B C 10 B D 0 N foofoofoofoofoofoofoofo...oofoofoofoofoofoofoofoofoo'
     buf = ' ' * (columns - len(s))
     assert out.getvalue() == s + buf + "\r"
@@ -93,7 +95,7 @@ class ArchiveTimestampTestCase(BaseTestCase):
 class ChunkBufferTestCase(BaseTestCase):
 
     def test(self):
-        data = [{b'foo': 1}, {b'bar': 2}]
+        data = [Item(path='p1'), Item(path='p2')]
         cache = MockCache()
         key = PlaintextKey(None)
         chunks = CacheChunkBuffer(cache, key, None)
@@ -105,11 +107,11 @@ class ChunkBufferTestCase(BaseTestCase):
         unpacker = msgpack.Unpacker()
         for id in chunks.chunks:
             unpacker.feed(cache.objects[id])
-        self.assert_equal(data, list(unpacker))
+        self.assert_equal(data, [Item(internal_dict=d) for d in unpacker])
 
     def test_partial(self):
-        big = b"0123456789" * 10000
-        data = [{b'full': 1, b'data': big}, {b'partial': 2, b'data': big}]
+        big = "0123456789" * 10000
+        data = [Item(path='full', source=big), Item(path='partial', source=big)]
         cache = MockCache()
         key = PlaintextKey(None)
         chunks = CacheChunkBuffer(cache, key, None)
@@ -126,7 +128,7 @@ class ChunkBufferTestCase(BaseTestCase):
         unpacker = msgpack.Unpacker()
         for id in chunks.chunks:
             unpacker.feed(cache.objects[id])
-        self.assert_equal(data, list(unpacker))
+        self.assert_equal(data, [Item(internal_dict=d) for d in unpacker])
 
 
 class RobustUnpackerTestCase(BaseTestCase):
@@ -138,7 +140,7 @@ class RobustUnpackerTestCase(BaseTestCase):
         return isinstance(value, dict) and value.get(b'path') in (b'foo', b'bar', b'boo', b'baz')
 
     def process(self, input):
-        unpacker = RobustUnpacker(validator=self._validator)
+        unpacker = RobustUnpacker(validator=self._validator, item_keys=ITEM_KEYS)
         result = []
         for should_sync, chunks in input:
             if should_sync:
@@ -183,3 +185,59 @@ class RobustUnpackerTestCase(BaseTestCase):
         input = [(False, chunks[:3]), (True, [b'gar', b'bage'] + chunks[3:])]
         result = self.process(input)
         self.assert_equal(result, [{b'path': b'foo'}, {b'path': b'boo'}, {b'path': b'baz'}])
+
+
+@pytest.fixture
+def item_keys_serialized():
+    return [msgpack.packb(name) for name in ITEM_KEYS]
+
+
+@pytest.mark.parametrize('packed',
+    [b'', b'x', b'foobar', ] +
+    [msgpack.packb(o) for o in (
+        [None, 0, 0.0, False, '', {}, [], ()] +
+        [42, 23.42, True, b'foobar', {b'foo': b'bar'}, [b'foo', b'bar'], (b'foo', b'bar')]
+    )])
+def test_invalid_msgpacked_item(packed, item_keys_serialized):
+    assert not valid_msgpacked_dict(packed, item_keys_serialized)
+
+
+@pytest.mark.parametrize('packed',
+    [msgpack.packb(o) for o in [
+        {b'path': b'/a/b/c'},  # small (different msgpack mapping type!)
+        dict((k, b'') for k in ITEM_KEYS),  # as big (key count) as it gets
+        dict((k, b'x' * 1000) for k in ITEM_KEYS),  # as big (key count and volume) as it gets
+    ]])
+def test_valid_msgpacked_items(packed, item_keys_serialized):
+    assert valid_msgpacked_dict(packed, item_keys_serialized)
+
+
+def test_key_length_msgpacked_items():
+    key = b'x' * 32  # 31 bytes is the limit for fixstr msgpack type
+    data = {key: b''}
+    item_keys_serialized = [msgpack.packb(key), ]
+    assert valid_msgpacked_dict(msgpack.packb(data), item_keys_serialized)
+
+
+def test_backup_io():
+    with pytest.raises(BackupOSError):
+        with backup_io():
+            raise OSError(123)
+
+
+def test_backup_io_iter():
+    class Iterator:
+        def __init__(self, exc):
+            self.exc = exc
+
+        def __next__(self):
+            raise self.exc()
+
+    oserror_iterator = Iterator(OSError)
+    with pytest.raises(BackupOSError):
+        for _ in backup_io_iter(oserror_iterator):
+            pass
+
+    normal_iterator = Iterator(StopIteration)
+    for _ in backup_io_iter(normal_iterator):
+        assert False, 'StopIteration handled incorrectly'

+ 204 - 78
src/borg/testsuite/archiver.py

@@ -23,7 +23,7 @@ except ImportError:
     pass
 
 from .. import xattr, helpers, platform
-from ..archive import Archive, ChunkBuffer, ArchiveRecreater
+from ..archive import Archive, ChunkBuffer, ArchiveRecreater, flags_noatime, flags_normal
 from ..archiver import Archiver
 from ..cache import Cache
 from ..constants import *  # NOQA
@@ -225,7 +225,8 @@ class ArchiverTestCaseBase(BaseTestCase):
 
     def tearDown(self):
         os.chdir(self._old_wd)
-        shutil.rmtree(self.tmpdir)
+        # note: ignore_errors=True as workaround for issue #862
+        shutil.rmtree(self.tmpdir, ignore_errors=True)
 
     def cmd(self, *args, **kw):
         exit_code = kw.pop('exit_code', 0)
@@ -241,6 +242,13 @@ class ArchiverTestCaseBase(BaseTestCase):
     def create_src_archive(self, name):
         self.cmd('create', self.repository_location + '::' + name, src_dir)
 
+    def open_archive(self, name):
+        repository = Repository(self.repository_path)
+        with repository:
+            manifest, key = Manifest.load(repository)
+            archive = Archive(repository, key, manifest, name)
+        return archive, repository
+
     def create_regular_file(self, name, size=0, contents=None):
         filename = os.path.join(self.input_path, name)
         if not os.path.exists(os.path.dirname(filename)):
@@ -294,10 +302,14 @@ class ArchiverTestCaseBase(BaseTestCase):
                 # File mode
                 os.chmod('input/dir2', 0o555)  # if we take away write perms, we need root to remove contents
                 # File owner
-                os.chown('input/file1', 100, 200)
+                os.chown('input/file1', 100, 200)  # raises OSError invalid argument on cygwin
                 have_root = True  # we have (fake)root
             except PermissionError:
                 have_root = False
+            except OSError as e:
+                if e.errno != errno.EINVAL:
+                    raise
+                have_root = False
             return have_root
         else:
             return False
@@ -389,8 +401,20 @@ class ArchiverTestCase(ArchiverTestCaseBase):
             assert os.readlink('input/link1') == 'somewhere'
 
     def test_atime(self):
+        def has_noatime(some_file):
+            atime_before = os.stat(some_file).st_atime_ns
+            try:
+                os.close(os.open(some_file, flags_noatime))
+            except PermissionError:
+                return False
+            else:
+                atime_after = os.stat(some_file).st_atime_ns
+                noatime_used = flags_noatime != flags_normal
+                return noatime_used and atime_before == atime_after
+
         self.create_test_files()
         atime, mtime = 123456780, 234567890
+        have_noatime = has_noatime('input/file1')
         os.utime('input/file1', (atime, mtime))
         self.cmd('init', self.repository_location)
         self.cmd('create', self.repository_location + '::test', 'input')
@@ -399,7 +423,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         sti = os.stat('input/file1')
         sto = os.stat('output/input/file1')
         assert sti.st_mtime_ns == sto.st_mtime_ns == mtime * 1e9
-        if hasattr(os, 'O_NOATIME'):
+        if have_noatime:
             assert sti.st_atime_ns == sto.st_atime_ns == atime * 1e9
         else:
             # it touched the input file's atime while backing it up
@@ -419,11 +443,30 @@ class ArchiverTestCase(ArchiverTestCaseBase):
             return repository.id
 
     def test_sparse_file(self):
-        # no sparse file support on Mac OS X
-        sparse_support = sys.platform != 'darwin'
+        def is_sparse(fn, total_size, hole_size):
+            st = os.stat(fn)
+            assert st.st_size == total_size
+            sparse = True
+            if sparse and hasattr(st, 'st_blocks') and st.st_blocks * 512 >= st.st_size:
+                sparse = False
+            if sparse and hasattr(os, 'SEEK_HOLE') and hasattr(os, 'SEEK_DATA'):
+                with open(fn, 'rb') as fd:
+                    # only check if the first hole is as expected, because the 2nd hole check
+                    # is problematic on xfs due to its "dynamic speculative EOF preallocation
+                    try:
+                        if fd.seek(0, os.SEEK_HOLE) != 0:
+                            sparse = False
+                        if fd.seek(0, os.SEEK_DATA) != hole_size:
+                            sparse = False
+                    except OSError:
+                        # OS/FS does not really support SEEK_HOLE/SEEK_DATA
+                        sparse = False
+            return sparse
+
         filename = os.path.join(self.input_path, 'sparse')
         content = b'foobar'
         hole_size = 5 * (1 << CHUNK_MAX_EXP)  # 5 full chunker buffers
+        total_size = hole_size + len(content) + hole_size
         with open(filename, 'wb') as fd:
             # create a file that has a hole at the beginning and end (if the
             # OS and filesystem supports sparse files)
@@ -432,26 +475,23 @@ class ArchiverTestCase(ArchiverTestCaseBase):
             fd.seek(hole_size, 1)
             pos = fd.tell()
             fd.truncate(pos)
-        total_len = hole_size + len(content) + hole_size
-        st = os.stat(filename)
-        self.assert_equal(st.st_size, total_len)
-        if sparse_support and hasattr(st, 'st_blocks'):
-            self.assert_true(st.st_blocks * 512 < total_len / 9)  # is input sparse?
-        self.cmd('init', self.repository_location)
-        self.cmd('create', self.repository_location + '::test', 'input')
-        with changedir('output'):
-            self.cmd('extract', '--sparse', self.repository_location + '::test')
-        self.assert_dirs_equal('input', 'output/input')
-        filename = os.path.join(self.output_path, 'input', 'sparse')
-        with open(filename, 'rb') as fd:
-            # check if file contents are as expected
-            self.assert_equal(fd.read(hole_size), b'\0' * hole_size)
-            self.assert_equal(fd.read(len(content)), content)
-            self.assert_equal(fd.read(hole_size), b'\0' * hole_size)
-        st = os.stat(filename)
-        self.assert_equal(st.st_size, total_len)
-        if sparse_support and hasattr(st, 'st_blocks'):
-            self.assert_true(st.st_blocks * 512 < total_len / 9)  # is output sparse?
+        # we first check if we could create a sparse input file:
+        sparse_support = is_sparse(filename, total_size, hole_size)
+        if sparse_support:
+            # we could create a sparse input file, so creating a backup of it and
+            # extracting it again (as sparse) should also work:
+            self.cmd('init', self.repository_location)
+            self.cmd('create', self.repository_location + '::test', 'input')
+            with changedir(self.output_path):
+                self.cmd('extract', '--sparse', self.repository_location + '::test')
+            self.assert_dirs_equal('input', 'output/input')
+            filename = os.path.join(self.output_path, 'input', 'sparse')
+            with open(filename, 'rb') as fd:
+                # check if file contents are as expected
+                self.assert_equal(fd.read(hole_size), b'\0' * hole_size)
+                self.assert_equal(fd.read(len(content)), content)
+                self.assert_equal(fd.read(hole_size), b'\0' * hole_size)
+            self.assert_true(is_sparse(filename, total_size, hole_size))
 
     def test_unusual_filenames(self):
         filenames = ['normal', 'with some blanks', '(with_parens)', ]
@@ -1168,6 +1208,18 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         self.assertEqual(output_1, output_2)
         self.assertNotEqual(output_1, output_3)
 
+    def test_list_repository_format(self):
+        self.cmd('init', self.repository_location)
+        self.cmd('create', self.repository_location + '::test-1', src_dir)
+        self.cmd('create', self.repository_location + '::test-2', src_dir)
+        output_1 = self.cmd('list', self.repository_location)
+        output_2 = self.cmd('list', '--format', '{archive:<36} {time} [{id}]{NL}', self.repository_location)
+        self.assertEqual(output_1, output_2)
+        output_1 = self.cmd('list', '--short', self.repository_location)
+        self.assertEqual(output_1, 'test-1\ntest-2\n')
+        output_1 = self.cmd('list', '--format', '{barchive}/', self.repository_location)
+        self.assertEqual(output_1, 'test-1/test-2/')
+
     def test_list_hash(self):
         self.create_regular_file('empty_file', size=0)
         self.create_regular_file('amb', contents=b'a' * 1000000)
@@ -1278,52 +1330,96 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         assert 'This command initializes' not in self.cmd('help', 'init', '--usage-only')
 
     @unittest.skipUnless(has_llfuse and sys.platform != 'win32', 'llfuse not installed')
-    def test_fuse_mount_repository(self):
-        mountpoint = os.path.join(self.tmpdir, 'mountpoint')
-        os.mkdir(mountpoint)
+    def test_fuse(self):
         self.cmd('init', self.repository_location)
         self.create_test_files()
         self.cmd('create', self.repository_location + '::archive', 'input')
         self.cmd('create', self.repository_location + '::archive2', 'input')
-        try:
-            self.cmd('mount', self.repository_location, mountpoint, fork=True)
-            self.wait_for_mount(mountpoint)
-            if has_lchflags:
-                # remove the file we did not backup, so input and output become equal
-                os.remove(os.path.join('input', 'flagfile'))
+        if has_lchflags:
+            # remove the file we did not backup, so input and output become equal
+            os.remove(os.path.join('input', 'flagfile'))
+        mountpoint = os.path.join(self.tmpdir, 'mountpoint')
+        # mount the whole repository, archive contents shall show up in archivename subdirs of mountpoint:
+        with self.fuse_mount(self.repository_location, mountpoint):
             self.assert_dirs_equal(self.input_path, os.path.join(mountpoint, 'archive', 'input'))
             self.assert_dirs_equal(self.input_path, os.path.join(mountpoint, 'archive2', 'input'))
-        finally:
-            if sys.platform.startswith('linux'):
-                os.system('fusermount -u ' + mountpoint)
+        # mount only 1 archive, its contents shall show up directly in mountpoint:
+        with self.fuse_mount(self.repository_location + '::archive', mountpoint):
+            self.assert_dirs_equal(self.input_path, os.path.join(mountpoint, 'input'))
+            # regular file
+            in_fn = 'input/file1'
+            out_fn = os.path.join(mountpoint, 'input', 'file1')
+            # stat
+            sti1 = os.stat(in_fn)
+            sto1 = os.stat(out_fn)
+            assert sti1.st_mode == sto1.st_mode
+            assert sti1.st_uid == sto1.st_uid
+            assert sti1.st_gid == sto1.st_gid
+            assert sti1.st_size == sto1.st_size
+            assert sti1.st_atime == sto1.st_atime
+            assert sti1.st_ctime == sto1.st_ctime
+            assert sti1.st_mtime == sto1.st_mtime
+            # note: there is another hardlink to this, see below
+            assert sti1.st_nlink == sto1.st_nlink == 2
+            # read
+            with open(in_fn, 'rb') as in_f, open(out_fn, 'rb') as out_f:
+                assert in_f.read() == out_f.read()
+            # list/read xattrs
+            if xattr.is_enabled(self.input_path):
+                assert xattr.listxattr(out_fn) == ['user.foo', ]
+                assert xattr.getxattr(out_fn, 'user.foo') == b'bar'
             else:
-                os.system('umount ' + mountpoint)
-            os.rmdir(mountpoint)
-            # Give the daemon some time to exit
-            time.sleep(.2)
+                assert xattr.listxattr(out_fn) == []
+                try:
+                    xattr.getxattr(out_fn, 'user.foo')
+                except OSError as e:
+                    assert e.errno == llfuse.ENOATTR
+                else:
+                    assert False, "expected OSError(ENOATTR), but no error was raised"
+            # hardlink (to 'input/file1')
+            in_fn = 'input/hardlink'
+            out_fn = os.path.join(mountpoint, 'input', 'hardlink')
+            sti2 = os.stat(in_fn)
+            sto2 = os.stat(out_fn)
+            assert sti2.st_nlink == sto2.st_nlink == 2
+            assert sto1.st_ino == sto2.st_ino
+            # symlink
+            in_fn = 'input/link1'
+            out_fn = os.path.join(mountpoint, 'input', 'link1')
+            sti = os.stat(in_fn, follow_symlinks=False)
+            sto = os.stat(out_fn, follow_symlinks=False)
+            assert stat.S_ISLNK(sti.st_mode)
+            assert stat.S_ISLNK(sto.st_mode)
+            assert os.readlink(in_fn) == os.readlink(out_fn)
+            # FIFO
+            out_fn = os.path.join(mountpoint, 'input', 'fifo1')
+            sto = os.stat(out_fn)
+            assert stat.S_ISFIFO(sto.st_mode)
 
     @unittest.skipUnless(has_llfuse and sys.platform != 'win32', 'llfuse not installed')
-    def test_fuse_mount_archive(self):
-        mountpoint = os.path.join(self.tmpdir, 'mountpoint')
-        os.mkdir(mountpoint)
+    def test_fuse_allow_damaged_files(self):
         self.cmd('init', self.repository_location)
-        self.create_test_files()
-        self.cmd('create', self.repository_location + '::archive', 'input')
-        try:
-            self.cmd('mount', self.repository_location + '::archive', mountpoint, fork=True)
-            self.wait_for_mount(mountpoint)
-            if has_lchflags:
-                # remove the file we did not backup, so input and output become equal
-                os.remove(os.path.join('input', 'flagfile'))
-            self.assert_dirs_equal(self.input_path, os.path.join(mountpoint, 'input'))
-        finally:
-            if sys.platform.startswith('linux'):
-                os.system('fusermount -u ' + mountpoint)
+        self.create_src_archive('archive')
+        # Get rid of a chunk and repair it
+        archive, repository = self.open_archive('archive')
+        with repository:
+            for item in archive.iter_items():
+                if item.path.endswith('testsuite/archiver.py'):
+                    repository.delete(item.chunks[-1].id)
+                    path = item.path  # store full path for later
+                    break
             else:
-                os.system('umount ' + mountpoint)
-            os.rmdir(mountpoint)
-            # Give the daemon some time to exit
-            time.sleep(.2)
+                assert False  # missed the file
+            repository.commit()
+        self.cmd('check', '--repair', self.repository_location, exit_code=0)
+
+        mountpoint = os.path.join(self.tmpdir, 'mountpoint')
+        with self.fuse_mount(self.repository_location + '::archive', mountpoint):
+            with pytest.raises(OSError) as excinfo:
+                open(os.path.join(mountpoint, path))
+            assert excinfo.value.errno == errno.EIO
+        with self.fuse_mount(self.repository_location + '::archive', mountpoint, 'allow_damaged_files'):
+            open(os.path.join(mountpoint, path)).close()
 
     def verify_aes_counter_uniqueness(self, method):
         seen = set()  # Chunks already seen
@@ -1628,6 +1724,14 @@ class ArchiverTestCaseBinary(ArchiverTestCase):
     def test_recreate_changed_source(self):
         pass
 
+    @unittest.skip('test_basic_functionality seems incompatible with fakeroot and/or the binary.')
+    def test_basic_functionality(self):
+        pass
+
+    @unittest.skip('test_overwrite seems incompatible with fakeroot and/or the binary.')
+    def test_overwrite(self):
+        pass
+
 
 class ArchiverCheckTestCase(ArchiverTestCaseBase):
 
@@ -1638,13 +1742,6 @@ class ArchiverCheckTestCase(ArchiverTestCaseBase):
             self.create_src_archive('archive1')
             self.create_src_archive('archive2')
 
-    def open_archive(self, name):
-        repository = Repository(self.repository_path)
-        with repository:
-            manifest, key = Manifest.load(repository)
-            archive = Archive(repository, key, manifest, name)
-        return archive, repository
-
     def test_check_usage(self):
         output = self.cmd('check', '-v', '--progress', self.repository_location, exit_code=0)
         self.assert_in('Starting repository check', output)
@@ -1666,13 +1763,46 @@ class ArchiverCheckTestCase(ArchiverTestCaseBase):
         archive, repository = self.open_archive('archive1')
         with repository:
             for item in archive.iter_items():
-                if item[b'path'].endswith('testsuite/archiver.py'):
-                    repository.delete(item[b'chunks'][-1].id)
+                if item.path.endswith('testsuite/archiver.py'):
+                    valid_chunks = item.chunks
+                    killed_chunk = valid_chunks[-1]
+                    repository.delete(killed_chunk.id)
                     break
+            else:
+                self.assert_true(False)  # should not happen
             repository.commit()
         self.cmd('check', self.repository_location, exit_code=1)
-        self.cmd('check', '--repair', self.repository_location, exit_code=0)
+        output = self.cmd('check', '--repair', self.repository_location, exit_code=0)
+        self.assert_in('New missing file chunk detected', output)
         self.cmd('check', self.repository_location, exit_code=0)
+        # check that the file in the old archives has now a different chunk list without the killed chunk
+        for archive_name in ('archive1', 'archive2'):
+            archive, repository = self.open_archive(archive_name)
+            with repository:
+                for item in archive.iter_items():
+                    if item.path.endswith('testsuite/archiver.py'):
+                        self.assert_not_equal(valid_chunks, item.chunks)
+                        self.assert_not_in(killed_chunk, item.chunks)
+                        break
+                else:
+                    self.assert_true(False)  # should not happen
+        # do a fresh backup (that will include the killed chunk)
+        with patch.object(ChunkBuffer, 'BUFFER_SIZE', 10):
+            self.create_src_archive('archive3')
+        # check should be able to heal the file now:
+        output = self.cmd('check', '-v', '--repair', self.repository_location, exit_code=0)
+        self.assert_in('Healed previously missing file chunk', output)
+        self.assert_in('testsuite/archiver.py: Completely healed previously damaged file!', output)
+        # check that the file in the old archives has the correct chunks again
+        for archive_name in ('archive1', 'archive2'):
+            archive, repository = self.open_archive(archive_name)
+            with repository:
+                for item in archive.iter_items():
+                    if item.path.endswith('testsuite/archiver.py'):
+                        self.assert_equal(valid_chunks, item.chunks)
+                        break
+                else:
+                    self.assert_true(False)  # should not happen
 
     def test_missing_archive_item_chunk(self):
         archive, repository = self.open_archive('archive1')
@@ -1721,8 +1851,8 @@ class ArchiverCheckTestCase(ArchiverTestCaseBase):
         archive, repository = self.open_archive('archive1')
         with repository:
             for item in archive.iter_items():
-                if item[b'path'].endswith('testsuite/archiver.py'):
-                    chunk = item[b'chunks'][-1]
+                if item.path.endswith('testsuite/archiver.py'):
+                    chunk = item.chunks[-1]
                     data = repository.get(chunk.id) + b'1234'
                     repository.put(chunk.id, data)
                     break
@@ -1757,11 +1887,7 @@ class RemoteArchiverTestCase(ArchiverTestCase):
     # this was introduced because some tests expect stderr contents to show up
     # in "output" also. Also, the non-forking exec_cmd catches both, too.
     @unittest.skip('deadlock issues')
-    def test_fuse_mount_repository(self):
-        pass
-
-    @unittest.skip('deadlock issues')
-    def test_fuse_mount_archive(self):
+    def test_fuse(self):
         pass
 
     @unittest.skip('only works locally')

+ 23 - 0
src/borg/testsuite/crypto.py

@@ -1,6 +1,8 @@
 from binascii import hexlify, unhexlify
 
 from ..crypto import AES, bytes_to_long, bytes_to_int, long_to_bytes, hmac_sha256
+from ..crypto import increment_iv, bytes16_to_int, int_to_bytes16
+
 from . import BaseTestCase
 
 # Note: these tests are part of the self test, do not use or import py.test functionality here.
@@ -16,6 +18,27 @@ class CryptoTestCase(BaseTestCase):
         self.assert_equal(bytes_to_long(b'\0\0\0\0\0\0\0\1'), 1)
         self.assert_equal(long_to_bytes(1), b'\0\0\0\0\0\0\0\1')
 
+    def test_bytes16_to_int(self):
+        self.assert_equal(bytes16_to_int(b'\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\1'), 1)
+        self.assert_equal(int_to_bytes16(1), b'\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\1')
+        self.assert_equal(bytes16_to_int(b'\0\0\0\0\0\0\0\1\0\0\0\0\0\0\0\0'), 2 ** 64)
+        self.assert_equal(int_to_bytes16(2 ** 64), b'\0\0\0\0\0\0\0\1\0\0\0\0\0\0\0\0')
+
+    def test_increment_iv(self):
+        iv0 = b'\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0'
+        iv1 = b'\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\1'
+        iv2 = b'\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\2'
+        self.assert_equal(increment_iv(iv0, 0), iv0)
+        self.assert_equal(increment_iv(iv0, 1), iv1)
+        self.assert_equal(increment_iv(iv0, 2), iv2)
+        iva = b'\0\0\0\0\0\0\0\0\xff\xff\xff\xff\xff\xff\xff\xff'
+        ivb = b'\0\0\0\0\0\0\0\1\x00\x00\x00\x00\x00\x00\x00\x00'
+        ivc = b'\0\0\0\0\0\0\0\1\x00\x00\x00\x00\x00\x00\x00\x01'
+        self.assert_equal(increment_iv(iva, 0), iva)
+        self.assert_equal(increment_iv(iva, 1), ivb)
+        self.assert_equal(increment_iv(iva, 2), ivc)
+        self.assert_equal(increment_iv(iv0, 2**64), ivb)
+
     def test_aes(self):
         key = b'X' * 32
         data = b'foo' * 10

+ 17 - 1
src/borg/testsuite/helpers.py

@@ -10,7 +10,7 @@ import msgpack
 import msgpack.fallback
 
 from ..helpers import Location
-from ..helpers import partial_format, format_file_size, format_timedelta
+from ..helpers import partial_format, format_file_size, format_timedelta, format_line, PlaceholderError
 from ..helpers import make_path_safe, clean_lines
 from ..helpers import prune_within, prune_split
 from ..helpers import get_cache_dir, get_keys_dir
@@ -22,6 +22,7 @@ from ..helpers import ProgressIndicatorPercent, ProgressIndicatorEndless
 from ..helpers import load_excludes
 from ..helpers import CompressionSpec, CompressionDecider1, CompressionDecider2
 from ..helpers import parse_pattern, PatternMatcher, RegexPattern, PathPrefixPattern, FnmatchPattern, ShellPattern
+
 from . import BaseTestCase, environment_variable, FakeInputs
 
 if sys.platform == 'win32':
@@ -958,3 +959,18 @@ def test_compression_decider2():
     assert compr_spec['name'] == 'zlib'
     compr_spec, chunk = cd.decide(Chunk(None, compress=CompressionSpec('lzma')))
     assert compr_spec['name'] == 'lzma'
+
+
+def test_format_line():
+    data = dict(foo='bar baz')
+    assert format_line('', data) == ''
+    assert format_line('{foo}', data) == 'bar baz'
+    assert format_line('foo{foo}foo', data) == 'foobar bazfoo'
+
+
+def test_format_line_erroneous():
+    data = dict()
+    with pytest.raises(PlaceholderError):
+        assert format_line('{invalid}', data)
+    with pytest.raises(PlaceholderError):
+        assert format_line('{}', data)

+ 4 - 4
src/borg/testsuite/item.py

@@ -35,13 +35,13 @@ def test_item_empty():
 
 def test_item_from_dict():
     # does not matter whether we get str or bytes keys
-    item = Item({b'path': b'/a/b/c', b'mode': 0o666})
+    item = Item({b'path': '/a/b/c', b'mode': 0o666})
     assert item.path == '/a/b/c'
     assert item.mode == 0o666
     assert 'path' in item
 
     # does not matter whether we get str or bytes keys
-    item = Item({'path': b'/a/b/c', 'mode': 0o666})
+    item = Item({'path': '/a/b/c', 'mode': 0o666})
     assert item.path == '/a/b/c'
     assert item.mode == 0o666
     assert 'mode' in item
@@ -60,7 +60,7 @@ def test_item_from_dict():
 
 
 def test_item_from_kw():
-    item = Item(path=b'/a/b/c', mode=0o666)
+    item = Item(path='/a/b/c', mode=0o666)
     assert item.path == '/a/b/c'
     assert item.mode == 0o666
 
@@ -107,7 +107,7 @@ def test_item_se_str_property():
         item.path = 42
 
     # non-utf-8 path, needing surrogate-escaping for latin-1 u-umlaut
-    item = Item({'path': b'/a/\xfc/c'})
+    item = Item(internal_dict={'path': b'/a/\xfc/c'})
     assert item.path == '/a/\udcfc/c'  # getting a surrogate-escaped representation
     assert item.as_dict() == {'path': b'/a/\xfc/c'}
     del item.path

+ 17 - 17
src/borg/testsuite/platform.py

@@ -51,26 +51,26 @@ class PlatformLinuxTestCase(BaseTestCase):
         return item
 
     def set_acl(self, path, access=None, default=None, numeric_owner=False):
-        item = {b'acl_access': access, b'acl_default': default}
+        item = {'acl_access': access, 'acl_default': default}
         acl_set(path, item, numeric_owner=numeric_owner)
 
     def test_access_acl(self):
         file = tempfile.NamedTemporaryFile()
         self.assert_equal(self.get_acl(file.name), {})
         self.set_acl(file.name, access=b'user::rw-\ngroup::r--\nmask::rw-\nother::---\nuser:root:rw-:9999\ngroup:root:rw-:9999\n', numeric_owner=False)
-        self.assert_in(b'user:root:rw-:0', self.get_acl(file.name)[b'acl_access'])
-        self.assert_in(b'group:root:rw-:0', self.get_acl(file.name)[b'acl_access'])
-        self.assert_in(b'user:0:rw-:0', self.get_acl(file.name, numeric_owner=True)[b'acl_access'])
+        self.assert_in(b'user:root:rw-:0', self.get_acl(file.name)['acl_access'])
+        self.assert_in(b'group:root:rw-:0', self.get_acl(file.name)['acl_access'])
+        self.assert_in(b'user:0:rw-:0', self.get_acl(file.name, numeric_owner=True)['acl_access'])
         file2 = tempfile.NamedTemporaryFile()
         self.set_acl(file2.name, access=b'user::rw-\ngroup::r--\nmask::rw-\nother::---\nuser:root:rw-:9999\ngroup:root:rw-:9999\n', numeric_owner=True)
-        self.assert_in(b'user:9999:rw-:9999', self.get_acl(file2.name)[b'acl_access'])
-        self.assert_in(b'group:9999:rw-:9999', self.get_acl(file2.name)[b'acl_access'])
+        self.assert_in(b'user:9999:rw-:9999', self.get_acl(file2.name)['acl_access'])
+        self.assert_in(b'group:9999:rw-:9999', self.get_acl(file2.name)['acl_access'])
 
     def test_default_acl(self):
         self.assert_equal(self.get_acl(self.tmpdir), {})
         self.set_acl(self.tmpdir, access=ACCESS_ACL, default=DEFAULT_ACL)
-        self.assert_equal(self.get_acl(self.tmpdir)[b'acl_access'], ACCESS_ACL)
-        self.assert_equal(self.get_acl(self.tmpdir)[b'acl_default'], DEFAULT_ACL)
+        self.assert_equal(self.get_acl(self.tmpdir)['acl_access'], ACCESS_ACL)
+        self.assert_equal(self.get_acl(self.tmpdir)['acl_default'], DEFAULT_ACL)
 
     def test_non_ascii_acl(self):
         # Testing non-ascii ACL processing to see whether our code is robust.
@@ -86,18 +86,18 @@ class PlatformLinuxTestCase(BaseTestCase):
         group_entry_numeric = 'group:666:rw-:666'.encode('ascii')
         acl = b'\n'.join([nothing_special, user_entry, group_entry])
         self.set_acl(file.name, access=acl, numeric_owner=False)
-        acl_access = self.get_acl(file.name, numeric_owner=False)[b'acl_access']
+        acl_access = self.get_acl(file.name, numeric_owner=False)['acl_access']
         self.assert_in(user_entry, acl_access)
         self.assert_in(group_entry, acl_access)
-        acl_access_numeric = self.get_acl(file.name, numeric_owner=True)[b'acl_access']
+        acl_access_numeric = self.get_acl(file.name, numeric_owner=True)['acl_access']
         self.assert_in(user_entry_numeric, acl_access_numeric)
         self.assert_in(group_entry_numeric, acl_access_numeric)
         file2 = tempfile.NamedTemporaryFile()
         self.set_acl(file2.name, access=acl, numeric_owner=True)
-        acl_access = self.get_acl(file2.name, numeric_owner=False)[b'acl_access']
+        acl_access = self.get_acl(file2.name, numeric_owner=False)['acl_access']
         self.assert_in(user_entry, acl_access)
         self.assert_in(group_entry, acl_access)
-        acl_access_numeric = self.get_acl(file.name, numeric_owner=True)[b'acl_access']
+        acl_access_numeric = self.get_acl(file.name, numeric_owner=True)['acl_access']
         self.assert_in(user_entry_numeric, acl_access_numeric)
         self.assert_in(group_entry_numeric, acl_access_numeric)
 
@@ -125,7 +125,7 @@ class PlatformDarwinTestCase(BaseTestCase):
         return item
 
     def set_acl(self, path, acl, numeric_owner=False):
-        item = {b'acl_extended': acl}
+        item = {'acl_extended': acl}
         acl_set(path, item, numeric_owner=numeric_owner)
 
     def test_access_acl(self):
@@ -133,11 +133,11 @@ class PlatformDarwinTestCase(BaseTestCase):
         file2 = tempfile.NamedTemporaryFile()
         self.assert_equal(self.get_acl(file.name), {})
         self.set_acl(file.name, b'!#acl 1\ngroup:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000000:staff:0:allow:read\nuser:FFFFEEEE-DDDD-CCCC-BBBB-AAAA00000000:root:0:allow:read\n', numeric_owner=False)
-        self.assert_in(b'group:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000014:staff:20:allow:read', self.get_acl(file.name)[b'acl_extended'])
-        self.assert_in(b'user:FFFFEEEE-DDDD-CCCC-BBBB-AAAA00000000:root:0:allow:read', self.get_acl(file.name)[b'acl_extended'])
+        self.assert_in(b'group:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000014:staff:20:allow:read', self.get_acl(file.name)['acl_extended'])
+        self.assert_in(b'user:FFFFEEEE-DDDD-CCCC-BBBB-AAAA00000000:root:0:allow:read', self.get_acl(file.name)['acl_extended'])
         self.set_acl(file2.name, b'!#acl 1\ngroup:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000000:staff:0:allow:read\nuser:FFFFEEEE-DDDD-CCCC-BBBB-AAAA00000000:root:0:allow:read\n', numeric_owner=True)
-        self.assert_in(b'group:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000000:wheel:0:allow:read', self.get_acl(file2.name)[b'acl_extended'])
-        self.assert_in(b'group:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000000::0:allow:read', self.get_acl(file2.name, numeric_owner=True)[b'acl_extended'])
+        self.assert_in(b'group:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000000:wheel:0:allow:read', self.get_acl(file2.name)['acl_extended'])
+        self.assert_in(b'group:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000000::0:allow:read', self.get_acl(file2.name, numeric_owner=True)['acl_extended'])
 
 
 @unittest.skipUnless(sys.platform.startswith(('linux', 'freebsd', 'darwin')), 'POSIX only tests')

+ 7 - 4
src/borg/testsuite/repository.py

@@ -8,7 +8,7 @@ from unittest.mock import patch
 
 from ..hashindex import NSIndex
 from ..helpers import Location
-from ..helpers import IntegrityError, InternalOSError
+from ..helpers import IntegrityError
 from ..locking import UpgradableLock, LockFailed
 from ..remote import RemoteRepository, InvalidRPCMethod, ConnectionClosedWithHint, handle_remote_line
 from ..repository import Repository, LoggedIO, MAGIC
@@ -244,11 +244,14 @@ class RepositoryCommitTestCase(RepositoryTestCaseBase):
 
 
 class RepositoryAppendOnlyTestCase(RepositoryTestCaseBase):
+    def open(self, create=False):
+        return Repository(os.path.join(self.tmppath, 'repository'), create=create, append_only=True)
+
     def test_destroy_append_only(self):
         # Can't destroy append only repo (via the API)
-        self.repository.append_only = True
         with self.assert_raises(ValueError):
             self.repository.destroy()
+        assert self.repository.append_only
 
     def test_append_only(self):
         def segments_in_repository():
@@ -300,7 +303,7 @@ class RepositoryAuxiliaryCorruptionTestCase(RepositoryTestCaseBase):
         hints = os.path.join(self.repository.path, 'hints.1')
         os.unlink(hints)
         os.mkdir(hints)
-        with self.assert_raises(InternalOSError):
+        with self.assert_raises(OSError):
             self.do_commit()
 
     def test_index(self):
@@ -318,7 +321,7 @@ class RepositoryAuxiliaryCorruptionTestCase(RepositoryTestCaseBase):
         index = os.path.join(self.repository.path, 'index.1')
         os.unlink(index)
         os.mkdir(index)
-        with self.assert_raises(InternalOSError):
+        with self.assert_raises(OSError):
             self.do_commit()
 
 

+ 14 - 11
src/borg/xattr.py

@@ -2,6 +2,7 @@
 """
 import errno
 import os
+import re
 import subprocess
 import sys
 import tempfile
@@ -52,23 +53,25 @@ if libc_name is None:
 # the 'test_extract_capabilities' test, but also allows xattrs to work with fakeroot on Linux in normal use.
 # TODO: Check whether fakeroot supports xattrs on all platforms supported below.
 # TODO: If that's the case then we can make Borg fakeroot-xattr-compatible on these as well.
-LD_PRELOAD = os.environ.get('LD_PRELOAD', '')
 XATTR_FAKEROOT = False
-if sys.platform.startswith('linux') and 'fakeroot' in LD_PRELOAD:
-    fakeroot_version = LooseVersion(subprocess.check_output(['fakeroot', '-v']).decode('ascii').split()[-1])
-    if fakeroot_version >= LooseVersion("1.20.2"):
-        # 1.20.2 has been confirmed to have xattr support
-        # 1.18.2 has been confirmed not to have xattr support
-        # Versions in-between are unknown
-        libc_name = LD_PRELOAD
-        XATTR_FAKEROOT = True
-
+if sys.platform.startswith('linux'):
+    LD_PRELOAD = os.environ.get('LD_PRELOAD', '')
+    preloads = re.split("[ :]", LD_PRELOAD)
+    for preload in preloads:
+        if preload.startswith("libfakeroot"):
+            fakeroot_version = LooseVersion(subprocess.check_output(['fakeroot', '-v']).decode('ascii').split()[-1])
+            if fakeroot_version >= LooseVersion("1.20.2"):
+                # 1.20.2 has been confirmed to have xattr support
+                # 1.18.2 has been confirmed not to have xattr support
+                # Versions in-between are unknown
+                libc_name = preload
+                XATTR_FAKEROOT = True
+            break
 
 try:
     libc = CDLL(libc_name, use_errno=True)
 except OSError as e:
     msg = "Can't find C library [%s]. Try installing ldconfig, gcc/cc or objdump." % e
-    logger.error(msg)
     raise Exception(msg)
 
 

+ 1 - 1
tox.ini

@@ -7,7 +7,7 @@ envlist = py{34,35,36},flake8
 [testenv]
 deps =
      -rrequirements.d/development.txt
-     attic
+     -rrequirements.d/attic.txt
 commands = py.test --cov=borg --cov-config=.coveragerc --benchmark-skip --pyargs {posargs:borg.testsuite}
 # fakeroot -u needs some env vars:
 passenv = *