浏览代码

Merge branch 'master' into lrucache

Alan Jenkins 9 年之前
父节点
当前提交
5e0013c5db
共有 84 个文件被更改,包括 5595 次插入2515 次删除
  1. 17 0
      .coveragerc
  2. 1 1
      .gitattributes
  3. 10 0
      .gitignore
  4. 45 10
      .travis.yml
  5. 43 0
      .travis/install.sh
  6. 23 0
      .travis/run.sh
  7. 13 0
      .travis/upload_coverage.sh
  8. 9 3
      AUTHORS
  9. 0 163
      CHANGES
  10. 359 0
      CHANGES.rst
  11. 1 0
      LICENSE
  12. 3 2
      MANIFEST.in
  13. 115 43
      README.rst
  14. 0 197
      attic/_version.py
  15. 0 125
      attic/crypto.pyx
  16. 0 528
      attic/testsuite/archiver.py
  17. 0 29
      attic/testsuite/chunker.py
  18. 0 5
      attic/testsuite/mock.py
  19. 0 10
      attic/testsuite/run.py
  20. 0 0
      borg/__init__.py
  21. 3 0
      borg/__main__.py
  22. 45 15
      borg/_chunker.c
  23. 58 25
      borg/_hashindex.c
  24. 239 0
      borg/_version.py
  25. 191 91
      borg/archive.py
  26. 273 90
      borg/archiver.py
  27. 161 31
      borg/cache.py
  28. 17 7
      borg/chunker.pyx
  29. 173 0
      borg/crypto.pyx
  30. 17 13
      borg/fuse.py
  31. 18 7
      borg/hashindex.pyx
  32. 83 67
      borg/helpers.py
  33. 197 103
      borg/key.py
  34. 286 0
      borg/locking.py
  35. 0 0
      borg/lrucache.py
  36. 4 3
      borg/platform.py
  37. 1 1
      borg/platform_darwin.pyx
  38. 1 1
      borg/platform_freebsd.pyx
  39. 1 1
      borg/platform_linux.pyx
  40. 76 51
      borg/remote.py
  41. 82 31
      borg/repository.py
  42. 6 33
      borg/testsuite/__init__.py
  43. 32 6
      borg/testsuite/archive.py
  44. 675 0
      borg/testsuite/archiver.py
  45. 31 0
      borg/testsuite/chunker.py
  46. 15 10
      borg/testsuite/crypto.py
  47. 30 5
      borg/testsuite/hashindex.py
  48. 31 48
      borg/testsuite/helpers.py
  49. 15 14
      borg/testsuite/key.py
  50. 121 0
      borg/testsuite/locking.py
  51. 3 3
      borg/testsuite/lrucache.py
  52. 5 5
      borg/testsuite/platform.py
  53. 22 12
      borg/testsuite/repository.py
  54. 5 3
      borg/testsuite/xattr.py
  55. 2 2
      borg/xattr.py
  56. 11 15
      docs/Makefile
  57. 0 7
      docs/_themes/attic/sidebarlogo.html
  58. 0 10
      docs/_themes/attic/sidebarusefullinks.html
  59. 5 0
      docs/_themes/local/sidebarlogo.html
  60. 20 0
      docs/_themes/local/sidebarusefullinks.html
  61. 65 31
      docs/_themes/local/static/local.css_t
  62. 1 1
      docs/_themes/local/theme.conf
  63. 4 0
      docs/changes.rst
  64. 20 20
      docs/conf.py
  65. 67 0
      docs/development.rst
  66. 100 8
      docs/faq.rst
  67. 0 62
      docs/foreword.rst
  68. 11 10
      docs/global.rst.inc
  69. 7 59
      docs/index.rst
  70. 144 36
      docs/installation.rst
  71. 391 0
      docs/internals.rst
  72. 7 0
      docs/intro.rst
  73. 116 0
      docs/misc/create_chunker-params.txt
  74. 130 0
      docs/misc/create_compression.txt
  75. 33 29
      docs/quickstart.rst
  76. 34 0
      docs/support.rst
  77. 6 6
      docs/update_usage.sh
  78. 170 29
      docs/usage.rst
  79. 5 0
      requirements.d/development.txt
  80. 0 4
      scripts/attic
  81. 8 0
      setup.cfg
  82. 54 38
      setup.py
  83. 10 6
      tox.ini
  84. 618 350
      versioneer.py

+ 17 - 0
.coveragerc

@@ -0,0 +1,17 @@
+[run]
+branch = True
+source = borg
+omit =
+    borg/__init__.py
+    borg/__main__.py
+    borg/_version.py
+
+[report]
+exclude_lines =
+    pragma: no cover
+    def __repr__
+    raise AssertionError
+    raise NotImplementedError
+    if 0:
+    if __name__ == .__main__.:
+ignore_errors = True

+ 1 - 1
.gitattributes

@@ -1 +1 @@
-attic/_version.py export-subst
+borg/_version.py export-subst

+ 10 - 0
.gitignore

@@ -6,8 +6,18 @@ env
 .tox
 .tox
 hashindex.c
 hashindex.c
 chunker.c
 chunker.c
+crypto.c
+platform_darwin.c
+platform_freebsd.c
+platform_linux.c
 *.egg-info
 *.egg-info
 *.pyc
 *.pyc
 *.pyo
 *.pyo
 *.so
 *.so
 docs/usage/*.inc
 docs/usage/*.inc
+.idea/
+.cache/
+borg.build/
+borg.dist/
+borg.exe
+.coverage

+ 45 - 10
.travis.yml

@@ -1,12 +1,47 @@
+sudo: required
+
 language: python
 language: python
-python:
-  - "3.2"
-  - "3.3"
-  - "3.4"
-# command to install dependencies
+
+cache:
+    directories:
+        - $HOME/.cache/pip
+
+matrix:
+    include:
+        - python: 3.2
+          os: linux
+          env: TOXENV=py32
+        - python: 3.3
+          os: linux
+          env: TOXENV=py33
+        - python: 3.4
+          os: linux
+          env: TOXENV=py34
+        - language: generic
+          os: osx
+          osx_image: xcode6.4
+          env: TOXENV=py32
+        - language: generic
+          os: osx
+          osx_image: xcode6.4
+          env: TOXENV=py33
+        - language: generic
+          os: osx
+          osx_image: xcode6.4
+          env: TOXENV=py34
+
 install:
 install:
-  - "sudo apt-get install -y libacl1-dev"
-  - "pip install --use-mirrors Cython"
-  - "pip install -e ."
-# command to run tests
-script: fakeroot -u python -m attic.testsuite.run -vb
+    - ./.travis/install.sh
+
+script:
+    - ./.travis/run.sh
+
+after_success:
+    - ./.travis/upload_coverage.sh
+
+notifications:
+    irc:
+        channels:
+            - "irc.freenode.org#borgbackup"
+        use_notice: true
+        skip_join: true

+ 43 - 0
.travis/install.sh

@@ -0,0 +1,43 @@
+#!/bin/bash
+
+set -e
+set -x
+
+if [[ "$(uname -s)" == 'Darwin' ]]; then
+    brew update || brew update
+
+    if [[ "${OPENSSL}" != "0.9.8" ]]; then
+        brew outdated openssl || brew upgrade openssl
+    fi
+
+    if which pyenv > /dev/null; then
+        eval "$(pyenv init -)"
+    fi
+
+    brew outdated pyenv || brew upgrade pyenv
+
+    case "${TOXENV}" in
+        py32)
+            pyenv install 3.2.6
+            pyenv global 3.2.6
+            ;;
+        py33)
+            pyenv install 3.3.6
+            pyenv global 3.3.6
+            ;;
+        py34)
+            pyenv install 3.4.3
+            pyenv global 3.4.3
+            ;;
+    esac
+    pyenv rehash
+    python -m pip install --user virtualenv
+else
+    pip install virtualenv
+    sudo apt-get install -y libacl1-dev
+fi
+
+python -m virtualenv ~/.venv
+source ~/.venv/bin/activate
+pip install tox pytest pytest-cov codecov Cython
+pip install -e .

+ 23 - 0
.travis/run.sh

@@ -0,0 +1,23 @@
+#!/bin/bash
+
+set -e
+set -x
+
+if [[ "$(uname -s)" == "Darwin" ]]; then
+    eval "$(pyenv init -)"
+    if [[ "${OPENSSL}" != "0.9.8" ]]; then
+        # set our flags to use homebrew openssl
+        export ARCHFLAGS="-arch x86_64"
+        export LDFLAGS="-L/usr/local/opt/openssl/lib"
+        export CFLAGS="-I/usr/local/opt/openssl/include"
+    fi
+fi
+
+source ~/.venv/bin/activate
+
+if [[ "$(uname -s)" == "Darwin" ]]; then
+    # no fakeroot on OS X
+    sudo tox -e $TOXENV
+else
+    fakeroot -u tox
+fi

+ 13 - 0
.travis/upload_coverage.sh

@@ -0,0 +1,13 @@
+#!/bin/bash
+
+set -e
+set -x
+
+NO_COVERAGE_TOXENVS=(pep8)
+if ! [[ "${NO_COVERAGE_TOXENVS[*]}" =~ "${TOXENV}" ]]; then
+    source ~/.venv/bin/activate
+    ln .tox/.coverage .coverage
+    # on osx, tests run as root, need access to .coverage
+    sudo chmod 666 .coverage
+    codecov -e TRAVIS_OS_NAME TOXENV
+fi

+ 9 - 3
AUTHORS

@@ -1,9 +1,15 @@
-Attic is written and maintained by Jonas Borgström and
-various contributors:
+Borg Developers / Contributors ("The Borg Collective")
+``````````````````````````````````````````````````````
+- Thomas Waldmann <tw@waldmann-edv.de>
+- Antoine Beaupré
+- Radek Podgorny <radek@podgorny.cz>
+- Yuri D'Elia
+
+Borg is a fork of Attic. Attic is written and maintained
+by Jonas Borgström and various contributors:
 
 
 Development Lead
 Development Lead
 ````````````````
 ````````````````
-
 - Jonas Borgström <jonas@borgstrom.se>
 - Jonas Borgström <jonas@borgstrom.se>
 
 
 Patches and Suggestions
 Patches and Suggestions

+ 0 - 163
CHANGES

@@ -1,163 +0,0 @@
-Attic Changelog
-===============
-
-Here you can see the full list of changes between each Attic release.
-
-Version 0.17
-------------
-
-(bugfix release, released on X)
-- Fix hashindex ARM memory alignment issue (#309)
-- Improve hashindex error messages (#298)
-
-Version 0.16
-------------
-
-(bugfix release, released on May 16, 2015)
-- Fix typo preventing the security confirmation prompt from working (#303)
-- Improve handling of systems with improperly configured file system encoding (#289)
-- Fix "All archives" output for attic info. (#183)
-- More user friendly error message when repository key file is not found (#236)
-- Fix parsing of iso 8601 timestamps with zero microseconds (#282)
-
-Version 0.15
-------------
-
-(bugfix release, released on Apr 15, 2015)
-- xattr: Be less strict about unknown/unsupported platforms (#239)
-- Reduce repository listing memory usage (#163).
-- Fix BrokenPipeError for remote repositories (#233)
-- Fix incorrect behavior with two character directory names (#265, #268)
-- Require approval before accessing relocated/moved repository (#271)
-- Require approval before accessing previously unknown unencrypted repositories (#271)
-- Fix issue with hash index files larger than 2GB.
-- Fix Python 3.2 compatibility issue with noatime open() (#164)
-- Include missing pyx files in dist files (#168)
-
-Version 0.14
-------------
-
-(feature release, released on Dec 17, 2014)
-- Added support for stripping leading path segments (#95)
-  "attic extract --strip-segments X"
-- Add workaround for old Linux systems without acl_extended_file_no_follow (#96)
-- Add MacPorts' path to the default openssl search path (#101)
-- HashIndex improvements, eliminates unnecessary IO on low memory systems.
-- Fix "Number of files" output for attic info. (#124)
-- limit create file permissions so files aren't read while restoring
-- Fix issue with empty xattr values (#106)
-
-Version 0.13
-------------
-
-(feature release, released on Jun 29, 2014)
-
-- Fix sporadic "Resource temporarily unavailable" when using remote repositories
-- Reduce file cache memory usage (#90)
-- Faster AES encryption (utilizing AES-NI when available)
-- Experimental Linux, OS X and FreeBSD ACL support (#66)
-- Added support for backup and restore of BSDFlags (OSX, FreeBSD) (#56)
-- Fix bug where xattrs on symlinks were not correctly restored
-- Added cachedir support. CACHEDIR.TAG compatible cache directories
-  can now be excluded using ``--exclude-caches`` (#74)
-- Fix crash on extreme mtime timestamps (year 2400+) (#81)
-- Fix Python 3.2 specific lockf issue (EDEADLK)
-
-Version 0.12
-------------
-
-(feature release, released on April 7, 2014)
-
-- Python 3.4 support (#62)
-- Various documentation improvements a new style
-- ``attic mount`` now supports mounting an entire repository not only
-  individual archives (#59)
-- Added option to restrict remote repository access to specific path(s):
-  ``attic serve --restrict-to-path X`` (#51)
-- Include "all archives" size information in "--stats" output. (#54)
-- Added ``--stats`` option to ``attic delete`` and ``attic prune``
-- Fixed bug where ``attic prune`` used UTC instead of the local time zone
-  when determining which archives to keep.
-- Switch to SI units (Power of 1000 instead 1024) when printing file sizes
-
-Version 0.11
-------------
-
-(feature release, released on March 7, 2014)
-
-- New "check" command for repository consistency checking (#24)
-- Documentation improvements
-- Fix exception during "attic create" with repeated files (#39)
-- New "--exclude-from" option for attic create/extract/verify.
-- Improved archive metadata deduplication.
-- "attic verify" has been deprecated. Use "attic extract --dry-run" instead.
-- "attic prune --hourly|daily|..." has been deprecated.
-  Use "attic prune --keep-hourly|daily|..." instead.
-- Ignore xattr errors during "extract" if not supported by the filesystem. (#46)
-
-Version 0.10
-------------
-
-(bugfix release, released on Jan 30, 2014)
-
-- Fix deadlock when extracting 0 sized files from remote repositories
-- "--exclude" wildcard patterns are now properly applied to the full path
-  not just the file name part (#5).
-- Make source code endianness agnostic (#1)
-
-Version 0.9
------------
-
-(feature release, released on Jan 23, 2014)
-
-- Remote repository speed and reliability improvements.
-- Fix sorting of segment names to ignore NFS left over files. (#17)
-- Fix incorrect display of time (#13)
-- Improved error handling / reporting. (#12)
-- Use fcntl() instead of flock() when locking repository/cache. (#15)
-- Let ssh figure out port/user if not specified so we don't override .ssh/config (#9)
-- Improved libcrypto path detection (#23).
-
-Version 0.8.1
--------------
-
-(bugfix release, released on Oct 4, 2013)
-
-- Fix segmentation fault issue.
-
-Version 0.8
------------
-
-(feature release, released on Oct 3, 2013)
-
-- Fix xattr issue when backing up sshfs filesystems (#4)
-- Fix issue with excessive index file size (#6)
-- Support access of read only repositories.
-- New syntax to enable repository encryption:
-    attic init --encryption="none|passphrase|keyfile".
-- Detect and abort if repository is older than the cache.
-
-
-Version 0.7
------------
-
-(feature release, released on Aug 5, 2013)
-
-- Ported to FreeBSD
-- Improved documentation
-- Experimental: Archives mountable as fuse filesystems.
-- The "user." prefix is no longer stripped from xattrs on Linux
-
-
-Version 0.6.1
--------------
-
-(bugfix release, released on July 19, 2013)
-
-- Fixed an issue where mtime was not always correctly restored.
-
-
-Version 0.6
------------
-
-First public release on July 9, 2013

+ 359 - 0
CHANGES.rst

@@ -0,0 +1,359 @@
+Borg Changelog
+==============
+
+
+Version 0.25.0 (not released yet)
+---------------------------------
+
+Incompatible changes (compared to 0.24):
+
+- none yet
+
+Deprecations:
+
+- none yet
+
+New features:
+
+- honor the nodump flag (UF_NODUMP) and do not backup such items
+
+Bug fixes:
+
+- close fds of segments we delete (during compaction)
+
+Other changes:
+
+- none yet
+
+
+Version 0.24.0
+--------------
+
+Incompatible changes (compared to 0.23):
+
+- borg now always issues --umask NNN option when invoking another borg via ssh
+  on the repository server. By that, it's making sure it uses the same umask
+  for remote repos as for local ones. Because of this, you must upgrade both
+  server and client(s) to 0.24.
+- the default umask is 077 now (if you do not specify via --umask) which might
+  be a different one as you used previously. The default umask avoids that
+  you accidentally give access permissions for group and/or others to files
+  created by borg (e.g. the repository).
+
+Deprecations:
+
+- "--encryption passphrase" mode is deprecated, see #85 and #97.
+  See the new "--encryption repokey" mode for a replacement.
+
+New features:
+
+- borg create --chunker-params ... to configure the chunker, fixes #16
+  (attic #302, attic #300, and somehow also #41).
+  This can be used to reduce memory usage caused by chunk management overhead,
+  so borg does not create a huge chunks index/repo index and eats all your RAM
+  if you back up lots of data in huge files (like VM disk images).
+  See docs/misc/create_chunker-params.txt for more information.
+- borg info now reports chunk counts in the chunk index.
+- borg create --compression 0..9 to select zlib compression level, fixes #66
+  (attic #295).
+- borg init --encryption repokey (to store the encryption key into the repo),
+  fixes #85
+- improve at-end error logging, always log exceptions and set exit_code=1
+- LoggedIO: better error checks / exceptions / exception handling
+- implement --remote-path to allow non-default-path borg locations, #125
+- implement --umask M and use 077 as default umask for better security, #117
+- borg check: give a named single archive to it, fixes #139
+- cache sync: show progress indication
+- cache sync: reimplement the chunk index merging in C
+
+Bug fixes:
+
+- fix segfault that happened for unreadable files (chunker: n needs to be a
+  signed size_t), #116
+- fix the repair mode, #144
+- repo delete: add destroy to allowed rpc methods, fixes issue #114
+- more compatible repository locking code (based on mkdir), maybe fixes #92
+  (attic #317, attic #201).
+- better Exception msg if no Borg is installed on the remote repo server, #56
+- create a RepositoryCache implementation that can cope with >2GiB,
+  fixes attic #326.
+- fix Traceback when running check --repair, attic #232
+- clarify help text, fixes #73.
+- add help string for --no-files-cache, fixes #140
+
+Other changes:
+
+- improved docs:
+
+  - added docs/misc directory for misc. writeups that won't be included
+    "as is" into the html docs.
+  - document environment variables and return codes (attic #324, attic #52)
+  - web site: add related projects, fix web site url, IRC #borgbackup
+  - Fedora/Fedora-based install instructions added to docs
+  - Cygwin-based install instructions added to docs
+  - updated AUTHORS
+  - add FAQ entries about redundancy / integrity
+  - clarify that borg extract uses the cwd as extraction target
+  - update internals doc about chunker params, memory usage and compression
+  - added docs about development
+  - add some words about resource usage in general
+  - document how to backup a raw disk
+  - add note about how to run borg from virtual env
+  - add solutions for (ll)fuse installation problems
+  - document what borg check does, fixes #138
+  - reorganize borgbackup.github.io sidebar, prev/next at top
+  - deduplicate and refactor the docs / README.rst
+
+- use borg-tmp as prefix for temporary files / directories
+- short prune options without "keep-" are deprecated, do not suggest them
+- improved tox configuration
+- remove usage of unittest.mock, always use mock from pypi
+- use entrypoints instead of scripts, for better use of the wheel format and
+  modern installs
+- add requirements.d/development.txt and modify tox.ini
+- use travis-ci for testing based on Linux and (new) OS X
+- use coverage.py, pytest-cov and codecov.io for test coverage support
+
+I forgot to list some stuff already implemented in 0.23.0, here they are:
+
+New features:
+
+- efficient archive list from manifest, meaning a big speedup for slow
+  repo connections and "list <repo>", "delete <repo>", "prune" (attic #242,
+  attic #167)
+- big speedup for chunks cache sync (esp. for slow repo connections), fixes #18
+- hashindex: improve error messages
+
+Other changes:
+
+- explicitly specify binary mode to open binary files
+- some easy micro optimizations
+
+
+Version 0.23.0
+--------------
+
+Incompatible changes (compared to attic, fork related):
+
+- changed sw name and cli command to "borg", updated docs
+- package name (and name in urls) uses "borgbackup" to have less collisions
+- changed repo / cache internal magic strings from ATTIC* to BORG*,
+  changed cache location to .cache/borg/ - this means that it currently won't
+  accept attic repos (see issue #21 about improving that)
+
+Bug fixes:
+
+- avoid defect python-msgpack releases, fixes attic #171, fixes attic #185
+- fix traceback when trying to do unsupported passphrase change, fixes attic #189
+- datetime does not like the year 10.000, fixes attic #139
+- fix "info" all archives stats, fixes attic #183
+- fix parsing with missing microseconds, fixes attic #282
+- fix misleading hint the fuse ImportError handler gave, fixes attic #237
+- check unpacked data from RPC for tuple type and correct length, fixes attic #127
+- fix Repository._active_txn state when lock upgrade fails
+- give specific path to xattr.is_enabled(), disable symlink setattr call that
+  always fails
+- fix test setup for 32bit platforms, partial fix for attic #196
+- upgraded versioneer, PEP440 compliance, fixes attic #257
+
+New features:
+
+- less memory usage: add global option --no-cache-files
+- check --last N (only check the last N archives)
+- check: sort archives in reverse time order
+- rename repo::oldname newname (rename repository)
+- create -v output more informative
+- create --progress (backup progress indicator)
+- create --timestamp (utc string or reference file/dir)
+- create: if "-" is given as path, read binary from stdin
+- extract: if --stdout is given, write all extracted binary data to stdout
+- extract --sparse (simple sparse file support)
+- extra debug information for 'fread failed'
+- delete <repo> (deletes whole repo + local cache)
+- FUSE: reflect deduplication in allocated blocks
+- only allow whitelisted RPC calls in server mode
+- normalize source/exclude paths before matching
+- use posix_fadvise to not spoil the OS cache, fixes attic #252
+- toplevel error handler: show tracebacks for better error analysis
+- sigusr1 / sigint handler to print current file infos - attic PR #286
+- RPCError: include the exception args we get from remote
+
+Other changes:
+
+- source: misc. cleanups, pep8, style
+- docs and faq improvements, fixes, updates
+- cleanup crypto.pyx, make it easier to adapt to other AES modes
+- do os.fsync like recommended in the python docs
+- source: Let chunker optionally work with os-level file descriptor.
+- source: Linux: remove duplicate os.fsencode calls
+- source: refactor _open_rb code a bit, so it is more consistent / regular
+- source: refactor indicator (status) and item processing
+- source: use py.test for better testing, flake8 for code style checks
+- source: fix tox >=2.0 compatibility (test runner)
+- pypi package: add python version classifiers, add FreeBSD to platforms
+
+
+Attic Changelog
+===============
+
+Here you can see the full list of changes between each Attic release until Borg
+forked from Attic:
+
+Version 0.17
+------------
+
+(bugfix release, released on X)
+- Fix hashindex ARM memory alignment issue (#309)
+- Improve hashindex error messages (#298)
+
+Version 0.16
+------------
+
+(bugfix release, released on May 16, 2015)
+- Fix typo preventing the security confirmation prompt from working (#303)
+- Improve handling of systems with improperly configured file system encoding (#289)
+- Fix "All archives" output for attic info. (#183)
+- More user friendly error message when repository key file is not found (#236)
+- Fix parsing of iso 8601 timestamps with zero microseconds (#282)
+
+Version 0.15
+------------
+
+(bugfix release, released on Apr 15, 2015)
+- xattr: Be less strict about unknown/unsupported platforms (#239)
+- Reduce repository listing memory usage (#163).
+- Fix BrokenPipeError for remote repositories (#233)
+- Fix incorrect behavior with two character directory names (#265, #268)
+- Require approval before accessing relocated/moved repository (#271)
+- Require approval before accessing previously unknown unencrypted repositories (#271)
+- Fix issue with hash index files larger than 2GB.
+- Fix Python 3.2 compatibility issue with noatime open() (#164)
+- Include missing pyx files in dist files (#168)
+
+Version 0.14
+------------
+
+(feature release, released on Dec 17, 2014)
+- Added support for stripping leading path segments (#95)
+  "attic extract --strip-segments X"
+- Add workaround for old Linux systems without acl_extended_file_no_follow (#96)
+- Add MacPorts' path to the default openssl search path (#101)
+- HashIndex improvements, eliminates unnecessary IO on low memory systems.
+- Fix "Number of files" output for attic info. (#124)
+- limit create file permissions so files aren't read while restoring
+- Fix issue with empty xattr values (#106)
+
+Version 0.13
+------------
+
+(feature release, released on Jun 29, 2014)
+
+- Fix sporadic "Resource temporarily unavailable" when using remote repositories
+- Reduce file cache memory usage (#90)
+- Faster AES encryption (utilizing AES-NI when available)
+- Experimental Linux, OS X and FreeBSD ACL support (#66)
+- Added support for backup and restore of BSDFlags (OSX, FreeBSD) (#56)
+- Fix bug where xattrs on symlinks were not correctly restored
+- Added cachedir support. CACHEDIR.TAG compatible cache directories
+  can now be excluded using ``--exclude-caches`` (#74)
+- Fix crash on extreme mtime timestamps (year 2400+) (#81)
+- Fix Python 3.2 specific lockf issue (EDEADLK)
+
+Version 0.12
+------------
+
+(feature release, released on April 7, 2014)
+
+- Python 3.4 support (#62)
+- Various documentation improvements a new style
+- ``attic mount`` now supports mounting an entire repository not only
+  individual archives (#59)
+- Added option to restrict remote repository access to specific path(s):
+  ``attic serve --restrict-to-path X`` (#51)
+- Include "all archives" size information in "--stats" output. (#54)
+- Added ``--stats`` option to ``attic delete`` and ``attic prune``
+- Fixed bug where ``attic prune`` used UTC instead of the local time zone
+  when determining which archives to keep.
+- Switch to SI units (Power of 1000 instead 1024) when printing file sizes
+
+Version 0.11
+------------
+
+(feature release, released on March 7, 2014)
+
+- New "check" command for repository consistency checking (#24)
+- Documentation improvements
+- Fix exception during "attic create" with repeated files (#39)
+- New "--exclude-from" option for attic create/extract/verify.
+- Improved archive metadata deduplication.
+- "attic verify" has been deprecated. Use "attic extract --dry-run" instead.
+- "attic prune --hourly|daily|..." has been deprecated.
+  Use "attic prune --keep-hourly|daily|..." instead.
+- Ignore xattr errors during "extract" if not supported by the filesystem. (#46)
+
+Version 0.10
+------------
+
+(bugfix release, released on Jan 30, 2014)
+
+- Fix deadlock when extracting 0 sized files from remote repositories
+- "--exclude" wildcard patterns are now properly applied to the full path
+  not just the file name part (#5).
+- Make source code endianness agnostic (#1)
+
+Version 0.9
+-----------
+
+(feature release, released on Jan 23, 2014)
+
+- Remote repository speed and reliability improvements.
+- Fix sorting of segment names to ignore NFS left over files. (#17)
+- Fix incorrect display of time (#13)
+- Improved error handling / reporting. (#12)
+- Use fcntl() instead of flock() when locking repository/cache. (#15)
+- Let ssh figure out port/user if not specified so we don't override .ssh/config (#9)
+- Improved libcrypto path detection (#23).
+
+Version 0.8.1
+-------------
+
+(bugfix release, released on Oct 4, 2013)
+
+- Fix segmentation fault issue.
+
+Version 0.8
+-----------
+
+(feature release, released on Oct 3, 2013)
+
+- Fix xattr issue when backing up sshfs filesystems (#4)
+- Fix issue with excessive index file size (#6)
+- Support access of read only repositories.
+- New syntax to enable repository encryption:
+    attic init --encryption="none|passphrase|keyfile".
+- Detect and abort if repository is older than the cache.
+
+
+Version 0.7
+-----------
+
+(feature release, released on Aug 5, 2013)
+
+- Ported to FreeBSD
+- Improved documentation
+- Experimental: Archives mountable as fuse filesystems.
+- The "user." prefix is no longer stripped from xattrs on Linux
+
+
+Version 0.6.1
+-------------
+
+(bugfix release, released on July 19, 2013)
+
+- Fixed an issue where mtime was not always correctly restored.
+
+
+Version 0.6
+-----------
+
+First public release on July 9, 2013

+ 1 - 0
LICENSE

@@ -1,3 +1,4 @@
+Copyright (C) 2015 The Borg Collective (see AUTHORS file)
 Copyright (C) 2010-2014 Jonas Borgström <jonas@borgstrom.se>
 Copyright (C) 2010-2014 Jonas Borgström <jonas@borgstrom.se>
 All rights reserved.
 All rights reserved.
 
 

+ 3 - 2
MANIFEST.in

@@ -1,6 +1,7 @@
-include README.rst LICENSE CHANGES MANIFEST.in versioneer.py
-recursive-include attic *.pyx
+include README.rst AUTHORS LICENSE CHANGES.rst MANIFEST.in versioneer.py
+recursive-include borg *.pyx
 recursive-include docs *
 recursive-include docs *
 recursive-exclude docs *.pyc
 recursive-exclude docs *.pyc
 recursive-exclude docs *.pyo
 recursive-exclude docs *.pyo
 prune docs/_build
 prune docs/_build
+include borg/_version.py

+ 115 - 43
README.rst

@@ -1,57 +1,129 @@
-What is Attic?
---------------
-Attic is a deduplicating backup program. The main goal of Attic is to provide
-an efficient and secure way to backup data. The data deduplication
-technique used makes Attic suitable for daily backups since only changes
-are stored.
+What is BorgBackup?
+-------------------
+BorgBackup (short: Borg) is a deduplicating backup program.
+Optionally, it supports compression and authenticated encryption.
 
 
-Easy to use
-~~~~~~~~~~~
-Initialize backup repository and create a backup archive::
+The main goal of Borg is to provide an efficient and secure way to backup data.
+The data deduplication technique used makes Borg suitable for daily backups
+since only changes are stored.
+The authenticated encryption technique makes it suitable for backups to not
+fully trusted targets.
+
+`Borg Installation docs <http://borgbackup.github.io/borgbackup/installation.html>`_
 
 
-    $ attic init /usbdrive/my-backup.attic
-    $ attic create -v /usbdrive/my-backup.attic::documents ~/Documents
 
 
 Main features
 Main features
 ~~~~~~~~~~~~~
 ~~~~~~~~~~~~~
-Space efficient storage
-  Variable block size deduplication is used to reduce the number of bytes 
-  stored by detecting redundant data. Each file is split into a number of
-  variable length chunks and only chunks that have never been seen before are
-  compressed and added to the repository.
+**Space efficient storage**
+  Deduplication based on content-defined chunking is used to reduce the number
+  of bytes stored: each file is split into a number of variable length chunks
+  and only chunks that have never been seen before are added to the repository.
 
 
-Optional data encryption
-    All data can be protected using 256-bit AES encryption and data integrity
-    and authenticity is verified using HMAC-SHA256.
+  To deduplicate, all the chunks in the same repository are considered, no
+  matter whether they come from different machines, from previous backups,
+  from the same backup or even from the same single file.
 
 
-Off-site backups
-    Attic can store data on any remote host accessible over SSH.  This is
-    most efficient if Attic is also installed on the remote host.
+  Compared to other deduplication approaches, this method does NOT depend on:
 
 
-Backups mountable as filesystems
-    Backup archives are mountable as userspace filesystems for easy backup
-    verification and restores.
+  * file/directory names staying the same
 
 
-What do I need?
----------------
-Attic requires Python 3.2 or above to work. Besides Python, Attic also requires 
-msgpack-python and sufficiently recent OpenSSL (>= 1.0.0).
-In order to mount archives as filesystems, llfuse is required.
+    So you can move your stuff around without killing the deduplication,
+    even between machines sharing a repo.
 
 
-How do I install it?
---------------------
-::
+  * complete files or time stamps staying the same
 
 
-  $ pip install Attic
+    If a big file changes a little, only a few new chunks will be stored -
+    this is great for VMs or raw disks.
 
 
-Where are the docs?
--------------------
-Go to https://attic-backup.org/ for a prebuilt version of the documentation.
-You can also build it yourself from the docs folder.
+  * the absolute position of a data chunk inside a file
+
+    Stuff may get shifted and will still be found by the deduplication
+    algorithm.
+
+**Speed**
+  * performance critical code (chunking, compression, encryption) is
+    implemented in C/Cython
+  * local caching of files/chunks index data
+  * quick detection of unmodified files
+
+**Data encryption**
+    All data can be protected using 256-bit AES encryption, data integrity and
+    authenticity is verified using HMAC-SHA256.
+
+**Compression**
+    All data can be compressed by zlib, level 0-9.
+
+**Off-site backups**
+    Borg can store data on any remote host accessible over SSH.  If Borg is
+    installed on the remote host, big performance gains can be achieved
+    compared to using a network filesystem (sshfs, nfs, ...).
+
+**Backups mountable as filesystems**
+    Backup archives are mountable as userspace filesystems for easy interactive
+    backup examination and restores (e.g. by using a regular file manager).
+
+**Platforms Borg works on**
+  * Linux
+  * FreeBSD
+  * Mac OS X
+  * Cygwin (unsupported)
+
+**Free and Open Source Software**
+  * security and functionality can be audited independently
+  * licensed under the BSD (3-clause) license
+
+
+Easy to use
+~~~~~~~~~~~
+Initialize a new backup repository and create a backup archive::
+
+    $ borg init /mnt/backup
+    $ borg create /mnt/backup::Monday ~/Documents
+
+Now doing another backup, just to show off the great deduplication::
+
+    $ borg create --stats /mnt/backup::Tuesday ~/Documents
+
+    Archive name: Tuesday
+    Archive fingerprint: 387a5e3f9b0e792e91c...
+    Start time: Tue Mar 25 12:00:10 2014
+    End time:   Tue Mar 25 12:00:10 2014
+    Duration: 0.08 seconds
+    Number of files: 358
+                      Original size    Compressed size    Deduplicated size
+    This archive:          57.16 MB           46.78 MB            151.67 kB  <--- !
+    All archives:         114.02 MB           93.46 MB             44.81 MB
+
+For a graphical frontend refer to our complementary project
+`BorgWeb <https://github.com/borgbackup/borgweb>`_.
+
+
+Notes
+-----
+
+Borg is a fork of `Attic <https://github.com/jborg/attic>`_ and maintained by
+"`The Borg Collective <https://github.com/borgbackup/borg/blob/master/AUTHORS>`_".
+
+Read `issue #1 <https://github.com/borgbackup/borg/issues/1>`_ about the initial
+considerations regarding project goals and policy of the Borg project.
+
+BORG IS NOT COMPATIBLE WITH ORIGINAL ATTIC.
+EXPECT THAT WE WILL BREAK COMPATIBILITY REPEATEDLY WHEN MAJOR RELEASE NUMBER
+CHANGES (like when going from 0.x.y to 1.0.0). Please read CHANGES document.
+
+NOT RELEASED DEVELOPMENT VERSIONS HAVE UNKNOWN COMPATIBILITY PROPERTIES.
+
+THIS IS SOFTWARE IN DEVELOPMENT, DECIDE YOURSELF WHETHER IT FITS YOUR NEEDS.
+
+For more information, please also see the
+`LICENSE  <https://github.com/borgbackup/borg/blob/master/LICENSE>`_.
+
+|build| |coverage|
 
 
-Where are the tests?
---------------------
-The tests are in the attic/testsuite package. To run the test suite use the
-following command::
+.. |build| image:: https://travis-ci.org/borgbackup/borg.svg
+        :alt: Build Status
+        :target: https://travis-ci.org/borgbackup/borg
 
 
-  $ fakeroot -u python -m attic.testsuite.run
+.. |coverage| image:: http://codecov.io/github/borgbackup/borg/coverage.svg?branch=master
+        :alt: Test Coverage
+        :target: http://codecov.io/github/borgbackup/borg?branch=master

+ 0 - 197
attic/_version.py

@@ -1,197 +0,0 @@
-
-IN_LONG_VERSION_PY = True
-# This file helps to compute a version number in source trees obtained from
-# git-archive tarball (such as those provided by githubs download-from-tag
-# feature). Distribution tarballs (build by setup.py sdist) and build
-# directories (produced by setup.py build) will contain a much shorter file
-# that just contains the computed version number.
-
-# This file is released into the public domain. Generated by
-# versioneer-0.7+ (https://github.com/warner/python-versioneer)
-
-# these strings will be replaced by git during git-archive
-git_refnames = "$Format:%d$"
-git_full = "$Format:%H$"
-
-
-import subprocess
-import sys
-
-def run_command(args, cwd=None, verbose=False):
-    try:
-        # remember shell=False, so use git.cmd on windows, not just git
-        p = subprocess.Popen(args, stdout=subprocess.PIPE, cwd=cwd)
-    except EnvironmentError:
-        e = sys.exc_info()[1]
-        if verbose:
-            print("unable to run %s" % args[0])
-            print(e)
-        return None
-    stdout = p.communicate()[0].strip()
-    if sys.version >= '3':
-        stdout = stdout.decode()
-    if p.returncode != 0:
-        if verbose:
-            print("unable to run %s (error)" % args[0])
-        return None
-    return stdout
-
-
-import sys
-import re
-import os.path
-
-def get_expanded_variables(versionfile_source):
-    # the code embedded in _version.py can just fetch the value of these
-    # variables. When used from setup.py, we don't want to import
-    # _version.py, so we do it with a regexp instead. This function is not
-    # used from _version.py.
-    variables = {}
-    try:
-        for line in open(versionfile_source,"r").readlines():
-            if line.strip().startswith("git_refnames ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    variables["refnames"] = mo.group(1)
-            if line.strip().startswith("git_full ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    variables["full"] = mo.group(1)
-    except EnvironmentError:
-        pass
-    return variables
-
-def versions_from_expanded_variables(variables, tag_prefix, verbose=False):
-    refnames = variables["refnames"].strip()
-    if refnames.startswith("$Format"):
-        if verbose:
-            print("variables are unexpanded, not using")
-        return {} # unexpanded, so not in an unpacked git-archive tarball
-    refs = set([r.strip() for r in refnames.strip("()").split(",")])
-    for ref in list(refs):
-        if not re.search(r'\d', ref):
-            if verbose:
-                print("discarding '%s', no digits" % ref)
-            refs.discard(ref)
-            # Assume all version tags have a digit. git's %d expansion
-            # behaves like git log --decorate=short and strips out the
-            # refs/heads/ and refs/tags/ prefixes that would let us
-            # distinguish between branches and tags. By ignoring refnames
-            # without digits, we filter out many common branch names like
-            # "release" and "stabilization", as well as "HEAD" and "master".
-    if verbose:
-        print("remaining refs: %s" % ",".join(sorted(refs)))
-    for ref in sorted(refs):
-        # sorting will prefer e.g. "2.0" over "2.0rc1"
-        if ref.startswith(tag_prefix):
-            r = ref[len(tag_prefix):]
-            if verbose:
-                print("picking %s" % r)
-            return { "version": r,
-                     "full": variables["full"].strip() }
-    # no suitable tags, so we use the full revision id
-    if verbose:
-        print("no suitable tags, using full revision id")
-    return { "version": variables["full"].strip(),
-             "full": variables["full"].strip() }
-
-def versions_from_vcs(tag_prefix, versionfile_source, verbose=False):
-    # this runs 'git' from the root of the source tree. That either means
-    # someone ran a setup.py command (and this code is in versioneer.py, so
-    # IN_LONG_VERSION_PY=False, thus the containing directory is the root of
-    # the source tree), or someone ran a project-specific entry point (and
-    # this code is in _version.py, so IN_LONG_VERSION_PY=True, thus the
-    # containing directory is somewhere deeper in the source tree). This only
-    # gets called if the git-archive 'subst' variables were *not* expanded,
-    # and _version.py hasn't already been rewritten with a short version
-    # string, meaning we're inside a checked out source tree.
-
-    try:
-        here = os.path.abspath(__file__)
-    except NameError:
-        # some py2exe/bbfreeze/non-CPython implementations don't do __file__
-        return {} # not always correct
-
-    # versionfile_source is the relative path from the top of the source tree
-    # (where the .git directory might live) to this file. Invert this to find
-    # the root from __file__.
-    root = here
-    if IN_LONG_VERSION_PY:
-        for i in range(len(versionfile_source.split("/"))):
-            root = os.path.dirname(root)
-    else:
-        root = os.path.dirname(here)
-    if not os.path.exists(os.path.join(root, ".git")):
-        if verbose:
-            print("no .git in %s" % root)
-        return {}
-
-    GIT = "git"
-    if sys.platform == "win32":
-        GIT = "git.cmd"
-    stdout = run_command([GIT, "describe", "--tags", "--dirty", "--always"],
-                         cwd=root)
-    if stdout is None:
-        return {}
-    if not stdout.startswith(tag_prefix):
-        if verbose:
-            print("tag '%s' doesn't start with prefix '%s'" % (stdout, tag_prefix))
-        return {}
-    tag = stdout[len(tag_prefix):]
-    stdout = run_command([GIT, "rev-parse", "HEAD"], cwd=root)
-    if stdout is None:
-        return {}
-    full = stdout.strip()
-    if tag.endswith("-dirty"):
-        full += "-dirty"
-    return {"version": tag, "full": full}
-
-
-def versions_from_parentdir(parentdir_prefix, versionfile_source, verbose=False):
-    if IN_LONG_VERSION_PY:
-        # We're running from _version.py. If it's from a source tree
-        # (execute-in-place), we can work upwards to find the root of the
-        # tree, and then check the parent directory for a version string. If
-        # it's in an installed application, there's no hope.
-        try:
-            here = os.path.abspath(__file__)
-        except NameError:
-            # py2exe/bbfreeze/non-CPython don't have __file__
-            return {} # without __file__, we have no hope
-        # versionfile_source is the relative path from the top of the source
-        # tree to _version.py. Invert this to find the root from __file__.
-        root = here
-        for i in range(len(versionfile_source.split("/"))):
-            root = os.path.dirname(root)
-    else:
-        # we're running from versioneer.py, which means we're running from
-        # the setup.py in a source tree. sys.argv[0] is setup.py in the root.
-        here = os.path.abspath(sys.argv[0])
-        root = os.path.dirname(here)
-
-    # Source tarballs conventionally unpack into a directory that includes
-    # both the project name and a version string.
-    dirname = os.path.basename(root)
-    if not dirname.startswith(parentdir_prefix):
-        if verbose:
-            print("guessing rootdir is '%s', but '%s' doesn't start with prefix '%s'" %
-                  (root, dirname, parentdir_prefix))
-        return None
-    return {"version": dirname[len(parentdir_prefix):], "full": ""}
-
-tag_prefix = ""
-parentdir_prefix = "Attic-"
-versionfile_source = "attic/_version.py"
-
-def get_versions(default={"version": "unknown", "full": ""}, verbose=False):
-    variables = { "refnames": git_refnames, "full": git_full }
-    ver = versions_from_expanded_variables(variables, tag_prefix, verbose)
-    if not ver:
-        ver = versions_from_vcs(tag_prefix, versionfile_source, verbose)
-    if not ver:
-        ver = versions_from_parentdir(parentdir_prefix, versionfile_source,
-                                      verbose)
-    if not ver:
-        ver = default
-    return ver
-

+ 0 - 125
attic/crypto.pyx

@@ -1,125 +0,0 @@
-"""A thin OpenSSL wrapper
-
-This could be replaced by PyCrypto or something similar when the performance
-of their PBKDF2 implementation is comparable to the OpenSSL version.
-"""
-from libc.stdlib cimport malloc, free
-
-API_VERSION = 2
-
-cdef extern from "openssl/rand.h":
-    int  RAND_bytes(unsigned char *buf,int num)
-
-
-cdef extern from "openssl/evp.h":
-    ctypedef struct EVP_MD:
-        pass
-    ctypedef struct EVP_CIPHER:
-        pass
-    ctypedef struct EVP_CIPHER_CTX:
-        unsigned char *iv
-        pass
-    ctypedef struct ENGINE:
-        pass
-    const EVP_MD *EVP_sha256()
-    const EVP_CIPHER *EVP_aes_256_ctr()
-    void EVP_CIPHER_CTX_init(EVP_CIPHER_CTX *a)
-    void EVP_CIPHER_CTX_cleanup(EVP_CIPHER_CTX *a)
-
-    int EVP_EncryptInit_ex(EVP_CIPHER_CTX *ctx,const EVP_CIPHER *cipher, ENGINE *impl,
-                           const unsigned char *key, const unsigned char *iv)
-    int EVP_EncryptUpdate(EVP_CIPHER_CTX *ctx, unsigned char *out,
-                          int *outl, const unsigned char *in_, int inl)
-
-    int PKCS5_PBKDF2_HMAC(const char *password, int passwordlen,
-                          const unsigned char *salt, int saltlen, int iter,
-                          const EVP_MD *digest,
-                          int keylen, unsigned char *out)
-
-import struct
-
-_int = struct.Struct('>I')
-_long = struct.Struct('>Q')
-
-bytes_to_int = lambda x, offset=0: _int.unpack_from(x, offset)[0]
-bytes_to_long = lambda x, offset=0: _long.unpack_from(x, offset)[0]
-long_to_bytes = lambda x: _long.pack(x)
-
-
-def num_aes_blocks(length):
-    """Return the number of AES blocks required to encrypt/decrypt *length* bytes of data
-    """
-    return (length + 15) // 16
-
-
-def pbkdf2_sha256(password, salt, iterations, size):
-    """Password based key derivation function 2 (RFC2898)
-    """
-    cdef unsigned char *key = <unsigned char *>malloc(size)
-    if not key:
-        raise MemoryError
-    try:
-        rv = PKCS5_PBKDF2_HMAC(password, len(password), salt, len(salt), iterations, EVP_sha256(), size, key)
-        if not rv:
-            raise Exception('PKCS5_PBKDF2_HMAC failed')
-        return key[:size]
-    finally:
-        free(key)
-
-
-def get_random_bytes(n):
-    """Return n cryptographically strong pseudo-random bytes
-    """
-    cdef unsigned char *buf = <unsigned char *>malloc(n)
-    if not buf:
-        raise MemoryError
-    try:
-        if RAND_bytes(buf, n) < 1:
-            raise Exception('RAND_bytes failed')
-        return buf[:n]
-    finally:
-        free(buf)
-
-
-cdef class AES:
-    """A thin wrapper around the OpenSSL EVP cipher API
-    """
-    cdef EVP_CIPHER_CTX ctx
-
-    def __cinit__(self, key, iv=None):
-        EVP_CIPHER_CTX_init(&self.ctx)
-        if not EVP_EncryptInit_ex(&self.ctx, EVP_aes_256_ctr(), NULL, NULL, NULL):
-            raise Exception('EVP_EncryptInit_ex failed')
-        self.reset(key, iv)
-
-    def __dealloc__(self):
-        EVP_CIPHER_CTX_cleanup(&self.ctx)
-
-    def reset(self, key=None, iv=None):
-        cdef const unsigned char *key2 = NULL
-        cdef const unsigned char *iv2 = NULL
-        if key:
-            key2 = key
-        if iv:
-            iv2 = iv
-        if not EVP_EncryptInit_ex(&self.ctx, NULL, NULL, key2, iv2):
-            raise Exception('EVP_EncryptInit_ex failed')
-
-    @property
-    def iv(self):
-        return self.ctx.iv[:16]
-
-    def encrypt(self, data):
-        cdef int inl = len(data)
-        cdef int outl
-        cdef unsigned char *out = <unsigned char *>malloc(inl)
-        if not out:
-            raise MemoryError
-        try:
-            if not EVP_EncryptUpdate(&self.ctx, out, &outl, data, inl):
-                raise Exception('EVP_EncryptUpdate failed')
-            return out[:inl]
-        finally:
-            free(out)
-    decrypt = encrypt
-

+ 0 - 528
attic/testsuite/archiver.py

@@ -1,528 +0,0 @@
-from binascii import hexlify
-from configparser import RawConfigParser
-import os
-from io import StringIO
-import stat
-import subprocess
-import sys
-import shutil
-import tempfile
-import time
-import unittest
-from hashlib import sha256
-from attic import xattr
-from attic.archive import Archive, ChunkBuffer
-from attic.archiver import Archiver
-from attic.cache import Cache
-from attic.crypto import bytes_to_long, num_aes_blocks
-from attic.helpers import Manifest
-from attic.remote import RemoteRepository, PathNotAllowed
-from attic.repository import Repository
-from attic.testsuite import AtticTestCase
-from attic.testsuite.mock import patch
-
-try:
-    import llfuse
-    has_llfuse = True
-except ImportError:
-    has_llfuse = False
-
-has_lchflags = hasattr(os, 'lchflags')
-
-src_dir = os.path.join(os.getcwd(), os.path.dirname(__file__), '..')
-
-
-class changedir:
-    def __init__(self, dir):
-        self.dir = dir
-
-    def __enter__(self):
-        self.old = os.getcwd()
-        os.chdir(self.dir)
-
-    def __exit__(self, *args, **kw):
-        os.chdir(self.old)
-
-
-class environment_variable:
-    def __init__(self, **values):
-        self.values = values
-        self.old_values = {}
-
-    def __enter__(self):
-        for k, v in self.values.items():
-            self.old_values[k] = os.environ.get(k)
-            os.environ[k] = v
-
-    def __exit__(self, *args, **kw):
-        for k, v in self.old_values.items():
-            if v is not None:
-                os.environ[k] = v
-
-
-class ArchiverTestCaseBase(AtticTestCase):
-
-    prefix = ''
-
-    def setUp(self):
-        os.environ['ATTIC_CHECK_I_KNOW_WHAT_I_AM_DOING'] = '1'
-        self.archiver = Archiver()
-        self.tmpdir = tempfile.mkdtemp()
-        self.repository_path = os.path.join(self.tmpdir, 'repository')
-        self.repository_location = self.prefix + self.repository_path
-        self.input_path = os.path.join(self.tmpdir, 'input')
-        self.output_path = os.path.join(self.tmpdir, 'output')
-        self.keys_path = os.path.join(self.tmpdir, 'keys')
-        self.cache_path = os.path.join(self.tmpdir, 'cache')
-        self.exclude_file_path = os.path.join(self.tmpdir, 'excludes')
-        os.environ['ATTIC_KEYS_DIR'] = self.keys_path
-        os.environ['ATTIC_CACHE_DIR'] = self.cache_path
-        os.mkdir(self.input_path)
-        os.mkdir(self.output_path)
-        os.mkdir(self.keys_path)
-        os.mkdir(self.cache_path)
-        with open(self.exclude_file_path, 'wb') as fd:
-            fd.write(b'input/file2\n# A commment line, then a blank line\n\n')
-        self._old_wd = os.getcwd()
-        os.chdir(self.tmpdir)
-
-    def tearDown(self):
-        shutil.rmtree(self.tmpdir)
-        os.chdir(self._old_wd)
-
-    def attic(self, *args, **kw):
-        exit_code = kw.get('exit_code', 0)
-        fork = kw.get('fork', False)
-        if fork:
-            try:
-                output = subprocess.check_output((sys.executable, '-m', 'attic.archiver') + args)
-                ret = 0
-            except subprocess.CalledProcessError as e:
-                output = e.output
-                ret = e.returncode
-            output = os.fsdecode(output)
-            if ret != exit_code:
-                print(output)
-            self.assert_equal(exit_code, ret)
-            return output
-        args = list(args)
-        stdin, stdout, stderr = sys.stdin, sys.stdout, sys.stderr
-        try:
-            sys.stdin = StringIO()
-            output = StringIO()
-            sys.stdout = sys.stderr = output
-            ret = self.archiver.run(args)
-            sys.stdin, sys.stdout, sys.stderr = stdin, stdout, stderr
-            if ret != exit_code:
-                print(output.getvalue())
-            self.assert_equal(exit_code, ret)
-            return output.getvalue()
-        finally:
-            sys.stdin, sys.stdout, sys.stderr = stdin, stdout, stderr
-
-    def create_src_archive(self, name):
-        self.attic('create', self.repository_location + '::' + name, src_dir)
-
-
-class ArchiverTestCase(ArchiverTestCaseBase):
-
-    def create_regular_file(self, name, size=0, contents=None):
-        filename = os.path.join(self.input_path, name)
-        if not os.path.exists(os.path.dirname(filename)):
-            os.makedirs(os.path.dirname(filename))
-        with open(filename, 'wb') as fd:
-            if contents is None:
-                contents = b'X' * size
-            fd.write(contents)
-
-    def create_test_files(self):
-        """Create a minimal test case including all supported file types
-        """
-        # File
-        self.create_regular_file('empty', size=0)
-        # 2600-01-01 > 2**64 ns
-        os.utime('input/empty', (19880895600, 19880895600))
-        self.create_regular_file('file1', size=1024 * 80)
-        self.create_regular_file('flagfile', size=1024)
-        # Directory
-        self.create_regular_file('dir2/file2', size=1024 * 80)
-        # File owner
-        os.chown('input/file1', 100, 200)
-        # File mode
-        os.chmod('input/file1', 0o7755)
-        os.chmod('input/dir2', 0o555)
-        # Block device
-        os.mknod('input/bdev', 0o600 | stat.S_IFBLK,  os.makedev(10, 20))
-        # Char device
-        os.mknod('input/cdev', 0o600 | stat.S_IFCHR,  os.makedev(30, 40))
-        # Hard link
-        os.link(os.path.join(self.input_path, 'file1'),
-                os.path.join(self.input_path, 'hardlink'))
-        # Symlink
-        os.symlink('somewhere', os.path.join(self.input_path, 'link1'))
-        if xattr.is_enabled():
-            xattr.setxattr(os.path.join(self.input_path, 'file1'), 'user.foo', b'bar')
-            xattr.setxattr(os.path.join(self.input_path, 'link1'), 'user.foo_symlink', b'bar_symlink', follow_symlinks=False)
-        # FIFO node
-        os.mkfifo(os.path.join(self.input_path, 'fifo1'))
-        if has_lchflags:
-            os.lchflags(os.path.join(self.input_path, 'flagfile'), stat.UF_NODUMP)
-
-    def test_basic_functionality(self):
-        self.create_test_files()
-        self.attic('init', self.repository_location)
-        self.attic('create', self.repository_location + '::test', 'input')
-        self.attic('create', self.repository_location + '::test.2', 'input')
-        with changedir('output'):
-            self.attic('extract', self.repository_location + '::test')
-        self.assert_equal(len(self.attic('list', self.repository_location).splitlines()), 2)
-        self.assert_equal(len(self.attic('list', self.repository_location + '::test').splitlines()), 11)
-        self.assert_dirs_equal('input', 'output/input')
-        info_output = self.attic('info', self.repository_location + '::test')
-        self.assert_in('Number of files: 4', info_output)
-        shutil.rmtree(self.cache_path)
-        with environment_variable(ATTIC_UNKNOWN_UNENCRYPTED_REPO_ACCESS_IS_OK='1'):
-            info_output2 = self.attic('info', self.repository_location + '::test')
-        # info_output2 starts with some "initializing cache" text but should
-        # end the same way as info_output
-        assert info_output2.endswith(info_output)
-
-    def _extract_repository_id(self, path):
-        return Repository(self.repository_path).id
-
-    def _set_repository_id(self, path, id):
-        config = RawConfigParser()
-        config.read(os.path.join(path, 'config'))
-        config.set('repository', 'id', hexlify(id).decode('ascii'))
-        with open(os.path.join(path, 'config'), 'w') as fd:
-            config.write(fd)
-        return Repository(self.repository_path).id
-
-    def test_repository_swap_detection(self):
-        self.create_test_files()
-        os.environ['ATTIC_PASSPHRASE'] = 'passphrase'
-        self.attic('init', '--encryption=passphrase', self.repository_location)
-        repository_id = self._extract_repository_id(self.repository_path)
-        self.attic('create', self.repository_location + '::test', 'input')
-        shutil.rmtree(self.repository_path)
-        self.attic('init', '--encryption=none', self.repository_location)
-        self._set_repository_id(self.repository_path, repository_id)
-        self.assert_equal(repository_id, self._extract_repository_id(self.repository_path))
-        self.assert_raises(Cache.EncryptionMethodMismatch, lambda :self.attic('create', self.repository_location + '::test.2', 'input'))
-
-    def test_repository_swap_detection2(self):
-        self.create_test_files()
-        self.attic('init', '--encryption=none', self.repository_location + '_unencrypted')
-        os.environ['ATTIC_PASSPHRASE'] = 'passphrase'
-        self.attic('init', '--encryption=passphrase', self.repository_location + '_encrypted')
-        self.attic('create', self.repository_location + '_encrypted::test', 'input')
-        shutil.rmtree(self.repository_path + '_encrypted')
-        os.rename(self.repository_path + '_unencrypted', self.repository_path + '_encrypted')
-        self.assert_raises(Cache.RepositoryAccessAborted, lambda :self.attic('create', self.repository_location + '_encrypted::test.2', 'input'))
-
-    def test_strip_components(self):
-        self.attic('init', self.repository_location)
-        self.create_regular_file('dir/file')
-        self.attic('create', self.repository_location + '::test', 'input')
-        with changedir('output'):
-            self.attic('extract', self.repository_location + '::test', '--strip-components', '3')
-            self.assert_true(not os.path.exists('file'))
-            with self.assert_creates_file('file'):
-                self.attic('extract', self.repository_location + '::test', '--strip-components', '2')
-            with self.assert_creates_file('dir/file'):
-                self.attic('extract', self.repository_location + '::test', '--strip-components', '1')
-            with self.assert_creates_file('input/dir/file'):
-                self.attic('extract', self.repository_location + '::test', '--strip-components', '0')
-
-    def test_extract_include_exclude(self):
-        self.attic('init', self.repository_location)
-        self.create_regular_file('file1', size=1024 * 80)
-        self.create_regular_file('file2', size=1024 * 80)
-        self.create_regular_file('file3', size=1024 * 80)
-        self.create_regular_file('file4', size=1024 * 80)
-        self.attic('create', '--exclude=input/file4', self.repository_location + '::test', 'input')
-        with changedir('output'):
-            self.attic('extract', self.repository_location + '::test', 'input/file1', )
-        self.assert_equal(sorted(os.listdir('output/input')), ['file1'])
-        with changedir('output'):
-            self.attic('extract', '--exclude=input/file2', self.repository_location + '::test')
-        self.assert_equal(sorted(os.listdir('output/input')), ['file1', 'file3'])
-        with changedir('output'):
-            self.attic('extract', '--exclude-from=' + self.exclude_file_path, self.repository_location + '::test')
-        self.assert_equal(sorted(os.listdir('output/input')), ['file1', 'file3'])
-
-    def test_exclude_caches(self):
-        self.attic('init', self.repository_location)
-        self.create_regular_file('file1', size=1024 * 80)
-        self.create_regular_file('cache1/CACHEDIR.TAG', contents = b'Signature: 8a477f597d28d172789f06886806bc55 extra stuff')
-        self.create_regular_file('cache2/CACHEDIR.TAG', contents = b'invalid signature')
-        self.attic('create', '--exclude-caches', self.repository_location + '::test', 'input')
-        with changedir('output'):
-            self.attic('extract', self.repository_location + '::test')
-        self.assert_equal(sorted(os.listdir('output/input')), ['cache2', 'file1'])
-        self.assert_equal(sorted(os.listdir('output/input/cache2')), ['CACHEDIR.TAG'])
-
-    def test_path_normalization(self):
-        self.attic('init', self.repository_location)
-        self.create_regular_file('dir1/dir2/file', size=1024 * 80)
-        with changedir('input/dir1/dir2'):
-            self.attic('create', self.repository_location + '::test', '../../../input/dir1/../dir1/dir2/..')
-        output = self.attic('list', self.repository_location + '::test')
-        self.assert_not_in('..', output)
-        self.assert_in(' input/dir1/dir2/file', output)
-
-    def test_repeated_files(self):
-        self.create_regular_file('file1', size=1024 * 80)
-        self.attic('init', self.repository_location)
-        self.attic('create', self.repository_location + '::test', 'input', 'input')
-
-    def test_overwrite(self):
-        self.create_regular_file('file1', size=1024 * 80)
-        self.create_regular_file('dir2/file2', size=1024 * 80)
-        self.attic('init', self.repository_location)
-        self.attic('create', self.repository_location + '::test', 'input')
-        # Overwriting regular files and directories should be supported
-        os.mkdir('output/input')
-        os.mkdir('output/input/file1')
-        os.mkdir('output/input/dir2')
-        with changedir('output'):
-            self.attic('extract', self.repository_location + '::test')
-        self.assert_dirs_equal('input', 'output/input')
-        # But non-empty dirs should fail
-        os.unlink('output/input/file1')
-        os.mkdir('output/input/file1')
-        os.mkdir('output/input/file1/dir')
-        with changedir('output'):
-            self.attic('extract', self.repository_location + '::test', exit_code=1)
-
-    def test_delete(self):
-        self.create_regular_file('file1', size=1024 * 80)
-        self.create_regular_file('dir2/file2', size=1024 * 80)
-        self.attic('init', self.repository_location)
-        self.attic('create', self.repository_location + '::test', 'input')
-        self.attic('create', self.repository_location + '::test.2', 'input')
-        self.attic('extract', '--dry-run', self.repository_location + '::test')
-        self.attic('extract', '--dry-run', self.repository_location + '::test.2')
-        self.attic('delete', self.repository_location + '::test')
-        self.attic('extract', '--dry-run', self.repository_location + '::test.2')
-        self.attic('delete', self.repository_location + '::test.2')
-        # Make sure all data except the manifest has been deleted
-        repository = Repository(self.repository_path)
-        self.assert_equal(len(repository), 1)
-
-    def test_corrupted_repository(self):
-        self.attic('init', self.repository_location)
-        self.create_src_archive('test')
-        self.attic('extract', '--dry-run', self.repository_location + '::test')
-        self.attic('check', self.repository_location)
-        name = sorted(os.listdir(os.path.join(self.tmpdir, 'repository', 'data', '0')), reverse=True)[0]
-        fd = open(os.path.join(self.tmpdir, 'repository', 'data', '0', name), 'r+')
-        fd.seek(100)
-        fd.write('XXXX')
-        fd.close()
-        self.attic('check', self.repository_location, exit_code=1)
-
-    def test_readonly_repository(self):
-        self.attic('init', self.repository_location)
-        self.create_src_archive('test')
-        os.system('chmod -R ugo-w ' + self.repository_path)
-        try:
-            self.attic('extract', '--dry-run', self.repository_location + '::test')
-        finally:
-            # Restore permissions so shutil.rmtree is able to delete it
-            os.system('chmod -R u+w ' + self.repository_path)
-
-    def test_cmdline_compatibility(self):
-        self.create_regular_file('file1', size=1024 * 80)
-        self.attic('init', self.repository_location)
-        self.attic('create', self.repository_location + '::test', 'input')
-        output = self.attic('verify', '-v', self.repository_location + '::test')
-        self.assert_in('"attic verify" has been deprecated', output)
-        output = self.attic('prune', self.repository_location, '--hourly=1')
-        self.assert_in('"--hourly" has been deprecated. Use "--keep-hourly" instead', output)
-
-    def test_prune_repository(self):
-        self.attic('init', self.repository_location)
-        self.attic('create', self.repository_location + '::test1', src_dir)
-        self.attic('create', self.repository_location + '::test2', src_dir)
-        output = self.attic('prune', '-v', '--dry-run', self.repository_location, '--keep-daily=2')
-        self.assert_in('Keeping archive: test2', output)
-        self.assert_in('Would prune:     test1', output)
-        output = self.attic('list', self.repository_location)
-        self.assert_in('test1', output)
-        self.assert_in('test2', output)
-        self.attic('prune', self.repository_location, '--keep-daily=2')
-        output = self.attic('list', self.repository_location)
-        self.assert_not_in('test1', output)
-        self.assert_in('test2', output)
-
-    def test_usage(self):
-        self.assert_raises(SystemExit, lambda: self.attic())
-        self.assert_raises(SystemExit, lambda: self.attic('-h'))
-
-    @unittest.skipUnless(has_llfuse, 'llfuse not installed')
-    def test_fuse_mount_repository(self):
-        mountpoint = os.path.join(self.tmpdir, 'mountpoint')
-        os.mkdir(mountpoint)
-        self.attic('init', self.repository_location)
-        self.create_test_files()
-        self.attic('create', self.repository_location + '::archive', 'input')
-        self.attic('create', self.repository_location + '::archive2', 'input')
-        try:
-            self.attic('mount', self.repository_location, mountpoint, fork=True)
-            self.wait_for_mount(mountpoint)
-            self.assert_dirs_equal(self.input_path, os.path.join(mountpoint, 'archive', 'input'))
-            self.assert_dirs_equal(self.input_path, os.path.join(mountpoint, 'archive2', 'input'))
-        finally:
-            if sys.platform.startswith('linux'):
-                os.system('fusermount -u ' + mountpoint)
-            else:
-                os.system('umount ' + mountpoint)
-            os.rmdir(mountpoint)
-            # Give the daemon some time to exit
-            time.sleep(.2)
-
-    @unittest.skipUnless(has_llfuse, 'llfuse not installed')
-    def test_fuse_mount_archive(self):
-        mountpoint = os.path.join(self.tmpdir, 'mountpoint')
-        os.mkdir(mountpoint)
-        self.attic('init', self.repository_location)
-        self.create_test_files()
-        self.attic('create', self.repository_location + '::archive', 'input')
-        try:
-            self.attic('mount', self.repository_location + '::archive', mountpoint, fork=True)
-            self.wait_for_mount(mountpoint)
-            self.assert_dirs_equal(self.input_path, os.path.join(mountpoint, 'input'))
-        finally:
-            if sys.platform.startswith('linux'):
-                os.system('fusermount -u ' + mountpoint)
-            else:
-                os.system('umount ' + mountpoint)
-            os.rmdir(mountpoint)
-            # Give the daemon some time to exit
-            time.sleep(.2)
-
-    def verify_aes_counter_uniqueness(self, method):
-        seen = set()  # Chunks already seen
-        used = set()  # counter values already used
-
-        def verify_uniqueness():
-            repository = Repository(self.repository_path)
-            for key, _ in repository.open_index(repository.get_transaction_id()).iteritems():
-                data = repository.get(key)
-                hash = sha256(data).digest()
-                if not hash in seen:
-                    seen.add(hash)
-                    num_blocks = num_aes_blocks(len(data) - 41)
-                    nonce = bytes_to_long(data[33:41])
-                    for counter in range(nonce, nonce + num_blocks):
-                        self.assert_not_in(counter, used)
-                        used.add(counter)
-
-        self.create_test_files()
-        os.environ['ATTIC_PASSPHRASE'] = 'passphrase'
-        self.attic('init', '--encryption=' + method, self.repository_location)
-        verify_uniqueness()
-        self.attic('create', self.repository_location + '::test', 'input')
-        verify_uniqueness()
-        self.attic('create', self.repository_location + '::test.2', 'input')
-        verify_uniqueness()
-        self.attic('delete', self.repository_location + '::test.2')
-        verify_uniqueness()
-        self.assert_equal(used, set(range(len(used))))
-
-    def test_aes_counter_uniqueness_keyfile(self):
-        self.verify_aes_counter_uniqueness('keyfile')
-
-    def test_aes_counter_uniqueness_passphrase(self):
-        self.verify_aes_counter_uniqueness('passphrase')
-
-
-class ArchiverCheckTestCase(ArchiverTestCaseBase):
-
-    def setUp(self):
-        super(ArchiverCheckTestCase, self).setUp()
-        with patch.object(ChunkBuffer, 'BUFFER_SIZE', 10):
-            self.attic('init', self.repository_location)
-            self.create_src_archive('archive1')
-            self.create_src_archive('archive2')
-
-    def open_archive(self, name):
-        repository = Repository(self.repository_path)
-        manifest, key = Manifest.load(repository)
-        archive = Archive(repository, key, manifest, name)
-        return archive, repository
-
-    def test_check_usage(self):
-        output = self.attic('check', self.repository_location, exit_code=0)
-        self.assert_in('Starting repository check', output)
-        self.assert_in('Starting archive consistency check', output)
-        output = self.attic('check', '--repository-only', self.repository_location, exit_code=0)
-        self.assert_in('Starting repository check', output)
-        self.assert_not_in('Starting archive consistency check', output)
-        output = self.attic('check', '--archives-only', self.repository_location, exit_code=0)
-        self.assert_not_in('Starting repository check', output)
-        self.assert_in('Starting archive consistency check', output)
-
-    def test_missing_file_chunk(self):
-        archive, repository = self.open_archive('archive1')
-        for item in archive.iter_items():
-            if item[b'path'].endswith('testsuite/archiver.py'):
-                repository.delete(item[b'chunks'][-1][0])
-                break
-        repository.commit()
-        self.attic('check', self.repository_location, exit_code=1)
-        self.attic('check', '--repair', self.repository_location, exit_code=0)
-        self.attic('check', self.repository_location, exit_code=0)
-
-    def test_missing_archive_item_chunk(self):
-        archive, repository = self.open_archive('archive1')
-        repository.delete(archive.metadata[b'items'][-5])
-        repository.commit()
-        self.attic('check', self.repository_location, exit_code=1)
-        self.attic('check', '--repair', self.repository_location, exit_code=0)
-        self.attic('check', self.repository_location, exit_code=0)
-
-    def test_missing_archive_metadata(self):
-        archive, repository = self.open_archive('archive1')
-        repository.delete(archive.id)
-        repository.commit()
-        self.attic('check', self.repository_location, exit_code=1)
-        self.attic('check', '--repair', self.repository_location, exit_code=0)
-        self.attic('check', self.repository_location, exit_code=0)
-
-    def test_missing_manifest(self):
-        archive, repository = self.open_archive('archive1')
-        repository.delete(Manifest.MANIFEST_ID)
-        repository.commit()
-        self.attic('check', self.repository_location, exit_code=1)
-        output = self.attic('check', '--repair', self.repository_location, exit_code=0)
-        self.assert_in('archive1', output)
-        self.assert_in('archive2', output)
-        self.attic('check', self.repository_location, exit_code=0)
-
-    def test_extra_chunks(self):
-        self.attic('check', self.repository_location, exit_code=0)
-        repository = Repository(self.repository_location)
-        repository.put(b'01234567890123456789012345678901', b'xxxx')
-        repository.commit()
-        repository.close()
-        self.attic('check', self.repository_location, exit_code=1)
-        self.attic('check', self.repository_location, exit_code=1)
-        self.attic('check', '--repair', self.repository_location, exit_code=0)
-        self.attic('check', self.repository_location, exit_code=0)
-        self.attic('extract', '--dry-run', self.repository_location + '::archive1', exit_code=0)
-
-
-class RemoteArchiverTestCase(ArchiverTestCase):
-    prefix = '__testsuite__:'
-
-    def test_remote_repo_restrict_to_path(self):
-        self.attic('init', self.repository_location)
-        path_prefix = os.path.dirname(self.repository_path)
-        with patch.object(RemoteRepository, 'extra_test_args', ['--restrict-to-path', '/foo']):
-            self.assert_raises(PathNotAllowed, lambda: self.attic('init', self.repository_location + '_1'))
-        with patch.object(RemoteRepository, 'extra_test_args', ['--restrict-to-path', path_prefix]):
-            self.attic('init', self.repository_location + '_2')
-        with patch.object(RemoteRepository, 'extra_test_args', ['--restrict-to-path', '/foo', '--restrict-to-path', path_prefix]):
-            self.attic('init', self.repository_location + '_3')

+ 0 - 29
attic/testsuite/chunker.py

@@ -1,29 +0,0 @@
-from attic.chunker import Chunker, buzhash, buzhash_update
-from attic.testsuite import AtticTestCase
-from io import BytesIO
-
-
-class ChunkerTestCase(AtticTestCase):
-
-    def test_chunkify(self):
-        data = b'0' * 1024 * 1024 * 15 + b'Y'
-        parts = [bytes(c) for c in Chunker(2, 0x3, 2, 0).chunkify(BytesIO(data))]
-        self.assert_equal(len(parts), 2)
-        self.assert_equal(b''.join(parts), data)
-        self.assert_equal([bytes(c) for c in Chunker(2, 0x3, 2, 0).chunkify(BytesIO(b''))], [])
-        self.assert_equal([bytes(c) for c in Chunker(2, 0x3, 2, 0).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'fooba', b'rboobaz', b'fooba', b'rboobaz', b'fooba', b'rboobaz'])
-        self.assert_equal([bytes(c) for c in Chunker(2, 0x3, 2, 1).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'fo', b'obarb', b'oob', b'azf', b'oobarb', b'oob', b'azf', b'oobarb', b'oobaz'])
-        self.assert_equal([bytes(c) for c in Chunker(2, 0x3, 2, 2).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foob', b'ar', b'boobazfoob', b'ar', b'boobazfoob', b'ar', b'boobaz'])
-        self.assert_equal([bytes(c) for c in Chunker(3, 0x3, 3, 0).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foobarboobaz' * 3])
-        self.assert_equal([bytes(c) for c in Chunker(3, 0x3, 3, 1).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foobar', b'boo', b'bazfo', b'obar', b'boo', b'bazfo', b'obar', b'boobaz'])
-        self.assert_equal([bytes(c) for c in Chunker(3, 0x3, 3, 2).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foo', b'barboobaz', b'foo', b'barboobaz', b'foo', b'barboobaz'])
-        self.assert_equal([bytes(c) for c in Chunker(3, 0x3, 4, 0).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foobarboobaz' * 3])
-        self.assert_equal([bytes(c) for c in Chunker(3, 0x3, 4, 1).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foobar', b'boobazfo', b'obar', b'boobazfo', b'obar', b'boobaz'])
-        self.assert_equal([bytes(c) for c in Chunker(3, 0x3, 4, 2).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foob', b'arboobaz', b'foob', b'arboobaz', b'foob', b'arboobaz'])
-
-    def test_buzhash(self):
-        self.assert_equal(buzhash(b'abcdefghijklmnop', 0), 3795437769)
-        self.assert_equal(buzhash(b'abcdefghijklmnop', 1), 3795400502)
-        self.assert_equal(buzhash(b'abcdefghijklmnop', 1), buzhash_update(buzhash(b'Xabcdefghijklmno', 1), ord('X'), ord('p'), 16, 1))
-        # Test with more than 31 bytes to make sure our barrel_shift macro works correctly
-        self.assert_equal(buzhash(b'abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz', 0), 566521248)

+ 0 - 5
attic/testsuite/mock.py

@@ -1,5 +0,0 @@
-try:
-    # Only available in python 3.3+
-    from unittest.mock import *
-except ImportError:
-    from mock import *

+ 0 - 10
attic/testsuite/run.py

@@ -1,10 +0,0 @@
-import unittest
-from attic.testsuite import TestLoader
-
-
-def main():
-    unittest.main(testLoader=TestLoader(), defaultTest='')
-
-
-if __name__ == '__main__':
-    main()

+ 0 - 0
attic/__init__.py → borg/__init__.py


+ 3 - 0
borg/__main__.py

@@ -0,0 +1,3 @@
+from borg.archiver import main
+main()
+

+ 45 - 15
attic/_chunker.c → borg/_chunker.c

@@ -1,4 +1,5 @@
 #include <Python.h>
 #include <Python.h>
+#include <fcntl.h>
 
 
 /* Cyclic polynomial / buzhash: https://en.wikipedia.org/wiki/Rolling_hash */
 /* Cyclic polynomial / buzhash: https://en.wikipedia.org/wiki/Rolling_hash */
 
 
@@ -80,29 +81,31 @@ typedef struct {
     uint32_t *table;
     uint32_t *table;
     uint8_t *data;
     uint8_t *data;
     PyObject *fd;
     PyObject *fd;
+    int fh;
     int done, eof;
     int done, eof;
     size_t remaining, bytes_read, bytes_yielded, position, last;
     size_t remaining, bytes_read, bytes_yielded, position, last;
 } Chunker;
 } Chunker;
 
 
 static Chunker *
 static Chunker *
-chunker_init(int window_size, int chunk_mask, int min_size, uint32_t seed)
+chunker_init(int window_size, int chunk_mask, int min_size, int max_size, uint32_t seed)
 {
 {
     Chunker *c = calloc(sizeof(Chunker), 1);
     Chunker *c = calloc(sizeof(Chunker), 1);
     c->window_size = window_size;
     c->window_size = window_size;
     c->chunk_mask = chunk_mask;
     c->chunk_mask = chunk_mask;
     c->min_size = min_size;
     c->min_size = min_size;
     c->table = buzhash_init_table(seed);
     c->table = buzhash_init_table(seed);
-    c->buf_size = 10 * 1024 * 1024;
+    c->buf_size = max_size;
     c->data = malloc(c->buf_size);
     c->data = malloc(c->buf_size);
     return c;
     return c;
 }
 }
 
 
 static void
 static void
-chunker_set_fd(Chunker *c, PyObject *fd)
+chunker_set_fd(Chunker *c, PyObject *fd, int fh)
 {
 {
     Py_XDECREF(c->fd);
     Py_XDECREF(c->fd);
     c->fd = fd;
     c->fd = fd;
     Py_INCREF(fd);
     Py_INCREF(fd);
+    c->fh = fh;
     c->done = 0;
     c->done = 0;
     c->remaining = 0;
     c->remaining = 0;
     c->bytes_read = 0;
     c->bytes_read = 0;
@@ -124,7 +127,7 @@ chunker_free(Chunker *c)
 static int
 static int
 chunker_fill(Chunker *c)
 chunker_fill(Chunker *c)
 {
 {
-    size_t n;
+    ssize_t n;
     PyObject *data;
     PyObject *data;
     memmove(c->data, c->data + c->last, c->position + c->remaining - c->last);
     memmove(c->data, c->data + c->last, c->position + c->remaining - c->last);
     c->position -= c->last;
     c->position -= c->last;
@@ -133,20 +136,47 @@ chunker_fill(Chunker *c)
     if(c->eof || n == 0) {
     if(c->eof || n == 0) {
         return 1;
         return 1;
     }
     }
-    data = PyObject_CallMethod(c->fd, "read", "i", n);
-    if(!data) {
-        return 0;
-    }
-    n = PyBytes_Size(data);
-    if(n) {
-        memcpy(c->data + c->position + c->remaining, PyBytes_AsString(data), n);
-        c->remaining += n;
-        c->bytes_read += n;
+    if(c->fh >= 0) {
+        // if we have a os-level file descriptor, use os-level API
+        n = read(c->fh, c->data + c->position + c->remaining, n);
+        if(n > 0) {
+            c->remaining += n;
+            c->bytes_read += n;
+        }
+        else
+        if(n == 0) {
+            c->eof = 1;
+        }
+        else {
+            // some error happened
+            return 0;
+        }
+        #if ( _XOPEN_SOURCE >= 600 || _POSIX_C_SOURCE >= 200112L )
+        // We tell the OS that we do not need the data of this file any more
+        // that it maybe has in the cache. This avoids that we spoil the
+        // complete cache with data that we only read once and (due to cache
+        // size limit) kick out data from the cache that might be still useful
+        // for the OS or other processes.
+        posix_fadvise(c->fh, (off_t) 0, (off_t) 0, POSIX_FADV_DONTNEED);
+        #endif
     }
     }
     else {
     else {
-        c->eof = 1;
+        // no os-level file descriptor, use Python file object API
+        data = PyObject_CallMethod(c->fd, "read", "i", n);
+        if(!data) {
+            return 0;
+        }
+        n = PyBytes_Size(data);
+        if(n) {
+            memcpy(c->data + c->position + c->remaining, PyBytes_AsString(data), n);
+            c->remaining += n;
+            c->bytes_read += n;
+        }
+        else {
+            c->eof = 1;
+        }
+        Py_DECREF(data);
     }
     }
-    Py_DECREF(data);
     return 1;
     return 1;
 }
 }
 
 

+ 58 - 25
attic/_hashindex.c → borg/_hashindex.c

@@ -18,8 +18,11 @@
 #error Unknown byte order
 #error Unknown byte order
 #endif
 #endif
 
 
+#define MAGIC "BORG_IDX"
+#define MAGIC_LEN 8
+
 typedef struct {
 typedef struct {
-    char magic[8];
+    char magic[MAGIC_LEN];
     int32_t num_entries;
     int32_t num_entries;
     int32_t num_buckets;
     int32_t num_buckets;
     int8_t  key_size;
     int8_t  key_size;
@@ -37,7 +40,6 @@ typedef struct {
     int upper_limit;
     int upper_limit;
 } HashIndex;
 } HashIndex;
 
 
-#define MAGIC "ATTICIDX"
 #define EMPTY _htole32(0xffffffff)
 #define EMPTY _htole32(0xffffffff)
 #define DELETED _htole32(0xfffffffe)
 #define DELETED _htole32(0xfffffffe)
 #define MAX_BUCKET_SIZE 512
 #define MAX_BUCKET_SIZE 512
@@ -132,20 +134,23 @@ static HashIndex *
 hashindex_read(const char *path)
 hashindex_read(const char *path)
 {
 {
     FILE *fd;
     FILE *fd;
-    off_t length, buckets_length;
+    off_t length, buckets_length, bytes_read;
     HashHeader header;
     HashHeader header;
     HashIndex *index = NULL;
     HashIndex *index = NULL;
 
 
-    if((fd = fopen(path, "r")) == NULL) {
-        EPRINTF_PATH(path, "fopen failed");
+    if((fd = fopen(path, "rb")) == NULL) {
+        EPRINTF_PATH(path, "fopen for reading failed");
         return NULL;
         return NULL;
     }
     }
-    if(fread(&header, 1, sizeof(HashHeader), fd) != sizeof(HashHeader)) {
+    bytes_read = fread(&header, 1, sizeof(HashHeader), fd);
+    if(bytes_read != sizeof(HashHeader)) {
         if(ferror(fd)) {
         if(ferror(fd)) {
-            EPRINTF_PATH(path, "fread failed");
+            EPRINTF_PATH(path, "fread header failed (expected %ju, got %ju)",
+                         (uintmax_t) sizeof(HashHeader), (uintmax_t) bytes_read);
         }
         }
         else {
         else {
-            EPRINTF_MSG_PATH(path, "failed to read %ld bytes", sizeof(HashHeader));
+            EPRINTF_MSG_PATH(path, "fread header failed (expected %ju, got %ju)",
+                             (uintmax_t) sizeof(HashHeader), (uintmax_t) bytes_read);
         }
         }
         goto fail;
         goto fail;
     }
     }
@@ -161,31 +166,35 @@ hashindex_read(const char *path)
         EPRINTF_PATH(path, "fseek failed");
         EPRINTF_PATH(path, "fseek failed");
         goto fail;
         goto fail;
     }
     }
-    if(memcmp(header.magic, MAGIC, 8)) {
-        EPRINTF_MSG_PATH(path, "Unknown file header");
+    if(memcmp(header.magic, MAGIC, MAGIC_LEN)) {
+        EPRINTF_MSG_PATH(path, "Unknown MAGIC in header");
         goto fail;
         goto fail;
     }
     }
     buckets_length = (off_t)_le32toh(header.num_buckets) * (header.key_size + header.value_size);
     buckets_length = (off_t)_le32toh(header.num_buckets) * (header.key_size + header.value_size);
     if(length != sizeof(HashHeader) + buckets_length) {
     if(length != sizeof(HashHeader) + buckets_length) {
-        EPRINTF_MSG_PATH(path, "Incorrect file length");
+        EPRINTF_MSG_PATH(path, "Incorrect file length (expected %ju, got %ju)",
+                         (uintmax_t) sizeof(HashHeader) + buckets_length, (uintmax_t) length);
         goto fail;
         goto fail;
     }
     }
     if(!(index = malloc(sizeof(HashIndex)))) {
     if(!(index = malloc(sizeof(HashIndex)))) {
-        EPRINTF_PATH(path, "malloc failed");
+        EPRINTF_PATH(path, "malloc header failed");
         goto fail;
         goto fail;
     }
     }
     if(!(index->buckets = malloc(buckets_length))) {
     if(!(index->buckets = malloc(buckets_length))) {
-        EPRINTF_PATH(path, "malloc failed");
+        EPRINTF_PATH(path, "malloc buckets failed");
         free(index);
         free(index);
         index = NULL;
         index = NULL;
         goto fail;
         goto fail;
     }
     }
-    if(fread(index->buckets, 1, buckets_length, fd) != buckets_length) {
+    bytes_read = fread(index->buckets, 1, buckets_length, fd);
+    if(bytes_read != buckets_length) {
         if(ferror(fd)) {
         if(ferror(fd)) {
-            EPRINTF_PATH(path, "fread failed");
+            EPRINTF_PATH(path, "fread buckets failed (expected %ju, got %ju)",
+                         (uintmax_t) buckets_length, (uintmax_t) bytes_read);
         }
         }
         else {
         else {
-            EPRINTF_MSG_PATH(path, "failed to read %ld bytes", length);
+            EPRINTF_MSG_PATH(path, "fread buckets failed (expected %ju, got %ju)",
+                             (uintmax_t) buckets_length, (uintmax_t) bytes_read);
         }
         }
         free(index->buckets);
         free(index->buckets);
         free(index);
         free(index);
@@ -215,12 +224,12 @@ hashindex_init(int capacity, int key_size, int value_size)
     capacity = MAX(MIN_BUCKETS, capacity);
     capacity = MAX(MIN_BUCKETS, capacity);
 
 
     if(!(index = malloc(sizeof(HashIndex)))) {
     if(!(index = malloc(sizeof(HashIndex)))) {
-        EPRINTF("malloc failed");
+        EPRINTF("malloc header failed");
         return NULL;
         return NULL;
     }
     }
     buckets_length = (off_t)capacity * (key_size + value_size);
     buckets_length = (off_t)capacity * (key_size + value_size);
     if(!(index->buckets = calloc(buckets_length, 1))) {
     if(!(index->buckets = calloc(buckets_length, 1))) {
-        EPRINTF("malloc failed");
+        EPRINTF("malloc buckets failed");
         free(index);
         free(index);
         return NULL;
         return NULL;
     }
     }
@@ -258,16 +267,16 @@ hashindex_write(HashIndex *index, const char *path)
     };
     };
     int ret = 1;
     int ret = 1;
 
 
-    if((fd = fopen(path, "w")) == NULL) {
-        EPRINTF_PATH(path, "open failed");
+    if((fd = fopen(path, "wb")) == NULL) {
+        EPRINTF_PATH(path, "fopen for writing failed");
         return 0;
         return 0;
     }
     }
     if(fwrite(&header, 1, sizeof(header), fd) != sizeof(header)) {
     if(fwrite(&header, 1, sizeof(header), fd) != sizeof(header)) {
-        EPRINTF_PATH(path, "fwrite failed");
+        EPRINTF_PATH(path, "fwrite header failed");
         ret = 0;
         ret = 0;
     }
     }
     if(fwrite(index->buckets, 1, buckets_length, fd) != buckets_length) {
     if(fwrite(index->buckets, 1, buckets_length, fd) != buckets_length) {
-        EPRINTF_PATH(path, "fwrite failed");
+        EPRINTF_PATH(path, "fwrite buckets failed");
         ret = 0;
         ret = 0;
     }
     }
     if(fclose(fd) < 0) {
     if(fclose(fd) < 0) {
@@ -357,14 +366,18 @@ hashindex_get_size(HashIndex *index)
 }
 }
 
 
 static void
 static void
-hashindex_summarize(HashIndex *index, long long *total_size, long long *total_csize, long long *total_unique_size, long long *total_unique_csize)
+hashindex_summarize(HashIndex *index, long long *total_size, long long *total_csize,
+                    long long *total_unique_size, long long *total_unique_csize,
+                    long long *total_unique_chunks, long long *total_chunks)
 {
 {
-    int64_t size = 0, csize = 0, unique_size = 0, unique_csize = 0;
+    int64_t size = 0, csize = 0, unique_size = 0, unique_csize = 0, chunks = 0, unique_chunks = 0;
     const int32_t *values;
     const int32_t *values;
     void *key = NULL;
     void *key = NULL;
 
 
     while((key = hashindex_next_key(index, key))) {
     while((key = hashindex_next_key(index, key))) {
-        values = key + 32;
+        values = key + index->key_size;
+        unique_chunks++;
+        chunks += values[0];
         unique_size += values[1];
         unique_size += values[1];
         unique_csize += values[2];
         unique_csize += values[2];
         size += values[0] * values[1];
         size += values[0] * values[1];
@@ -374,5 +387,25 @@ hashindex_summarize(HashIndex *index, long long *total_size, long long *total_cs
     *total_csize = csize;
     *total_csize = csize;
     *total_unique_size = unique_size;
     *total_unique_size = unique_size;
     *total_unique_csize = unique_csize;
     *total_unique_csize = unique_csize;
+    *total_unique_chunks = unique_chunks;
+    *total_chunks = chunks;
 }
 }
 
 
+static void
+hashindex_merge(HashIndex *index, HashIndex *other)
+{
+    int32_t key_size = index->key_size;
+    const int32_t *other_values;
+    int32_t *my_values;
+    void *key = NULL;
+
+    while((key = hashindex_next_key(other, key))) {
+        other_values = key + key_size;
+        my_values = (int32_t *)hashindex_get(index, key);
+        if(my_values == NULL) {
+            hashindex_set(index, key, other_values);
+        } else {
+            *my_values += *other_values;
+        }
+    }
+}

+ 239 - 0
borg/_version.py

@@ -0,0 +1,239 @@
+
+# This file helps to compute a version number in source trees obtained from
+# git-archive tarball (such as those provided by githubs download-from-tag
+# feature). Distribution tarballs (built by setup.py sdist) and build
+# directories (produced by setup.py build) will contain a much shorter file
+# that just contains the computed version number.
+
+# This file is released into the public domain. Generated by
+# versioneer-0.14 (https://github.com/warner/python-versioneer)
+
+import errno
+import os
+import re
+import subprocess
+import sys
+
+# these strings will be replaced by git during git-archive
+git_refnames = "$Format:%d$"
+git_full = "$Format:%H$"
+
+# these strings are filled in when 'setup.py versioneer' creates _version.py
+tag_prefix = ""
+parentdir_prefix = "borgbackup-"
+versionfile_source = "borg/_version.py"
+
+
+def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False):
+    assert isinstance(commands, list)
+    p = None
+    for c in commands:
+        try:
+            # remember shell=False, so use git.cmd on windows, not just git
+            p = subprocess.Popen([c] + args, cwd=cwd, stdout=subprocess.PIPE,
+                                 stderr=(subprocess.PIPE if hide_stderr
+                                         else None))
+            break
+        except EnvironmentError:
+            e = sys.exc_info()[1]
+            if e.errno == errno.ENOENT:
+                continue
+            if verbose:
+                print("unable to run %s" % args[0])
+                print(e)
+            return None
+    else:
+        if verbose:
+            print("unable to find command, tried %s" % (commands,))
+        return None
+    stdout = p.communicate()[0].strip()
+    if sys.version_info[0] >= 3:
+        stdout = stdout.decode()
+    if p.returncode != 0:
+        if verbose:
+            print("unable to run %s (error)" % args[0])
+        return None
+    return stdout
+
+
+def versions_from_parentdir(parentdir_prefix, root, verbose=False):
+    # Source tarballs conventionally unpack into a directory that includes
+    # both the project name and a version string.
+    dirname = os.path.basename(root)
+    if not dirname.startswith(parentdir_prefix):
+        if verbose:
+            print("guessing rootdir is '%s', but '%s' doesn't start with "
+                  "prefix '%s'" % (root, dirname, parentdir_prefix))
+        return None
+    return {"version": dirname[len(parentdir_prefix):], "full": ""}
+
+
+def git_get_keywords(versionfile_abs):
+    # the code embedded in _version.py can just fetch the value of these
+    # keywords. When used from setup.py, we don't want to import _version.py,
+    # so we do it with a regexp instead. This function is not used from
+    # _version.py.
+    keywords = {}
+    try:
+        f = open(versionfile_abs, "r")
+        for line in f.readlines():
+            if line.strip().startswith("git_refnames ="):
+                mo = re.search(r'=\s*"(.*)"', line)
+                if mo:
+                    keywords["refnames"] = mo.group(1)
+            if line.strip().startswith("git_full ="):
+                mo = re.search(r'=\s*"(.*)"', line)
+                if mo:
+                    keywords["full"] = mo.group(1)
+        f.close()
+    except EnvironmentError:
+        pass
+    return keywords
+
+
+def git_versions_from_keywords(keywords, tag_prefix, verbose=False):
+    if not keywords:
+        return {}  # keyword-finding function failed to find keywords
+    refnames = keywords["refnames"].strip()
+    if refnames.startswith("$Format"):
+        if verbose:
+            print("keywords are unexpanded, not using")
+        return {}  # unexpanded, so not in an unpacked git-archive tarball
+    refs = set([r.strip() for r in refnames.strip("()").split(",")])
+    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
+    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
+    TAG = "tag: "
+    tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
+    if not tags:
+        # Either we're using git < 1.8.3, or there really are no tags. We use
+        # a heuristic: assume all version tags have a digit. The old git %d
+        # expansion behaves like git log --decorate=short and strips out the
+        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
+        # between branches and tags. By ignoring refnames without digits, we
+        # filter out many common branch names like "release" and
+        # "stabilization", as well as "HEAD" and "master".
+        tags = set([r for r in refs if re.search(r'\d', r)])
+        if verbose:
+            print("discarding '%s', no digits" % ",".join(refs-tags))
+    if verbose:
+        print("likely tags: %s" % ",".join(sorted(tags)))
+    for ref in sorted(tags):
+        # sorting will prefer e.g. "2.0" over "2.0rc1"
+        if ref.startswith(tag_prefix):
+            r = ref[len(tag_prefix):]
+            if verbose:
+                print("picking %s" % r)
+            return {"version": r,
+                    "full": keywords["full"].strip()}
+    # no suitable tags, so version is "0+unknown", but full hex is still there
+    if verbose:
+        print("no suitable tags, using unknown + full revision id")
+    return {"version": "0+unknown",
+            "full": keywords["full"].strip()}
+
+
+def git_parse_vcs_describe(git_describe, tag_prefix, verbose=False):
+    # TAG-NUM-gHEX[-dirty] or HEX[-dirty] . TAG might have hyphens.
+
+    # dirty
+    dirty = git_describe.endswith("-dirty")
+    if dirty:
+        git_describe = git_describe[:git_describe.rindex("-dirty")]
+    dirty_suffix = ".dirty" if dirty else ""
+
+    # now we have TAG-NUM-gHEX or HEX
+
+    if "-" not in git_describe:  # just HEX
+        return "0+untagged.g"+git_describe+dirty_suffix, dirty
+
+    # just TAG-NUM-gHEX
+    mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
+    if not mo:
+        # unparseable. Maybe git-describe is misbehaving?
+        return "0+unparseable"+dirty_suffix, dirty
+
+    # tag
+    full_tag = mo.group(1)
+    if not full_tag.startswith(tag_prefix):
+        if verbose:
+            fmt = "tag '%s' doesn't start with prefix '%s'"
+            print(fmt % (full_tag, tag_prefix))
+        return None, dirty
+    tag = full_tag[len(tag_prefix):]
+
+    # distance: number of commits since tag
+    distance = int(mo.group(2))
+
+    # commit: short hex revision ID
+    commit = mo.group(3)
+
+    # now build up version string, with post-release "local version
+    # identifier". Our goal: TAG[+NUM.gHEX[.dirty]] . Note that if you get a
+    # tagged build and then dirty it, you'll get TAG+0.gHEX.dirty . So you
+    # can always test version.endswith(".dirty").
+    version = tag
+    if distance or dirty:
+        version += "+%d.g%s" % (distance, commit) + dirty_suffix
+
+    return version, dirty
+
+
+def git_versions_from_vcs(tag_prefix, root, verbose=False):
+    # this runs 'git' from the root of the source tree. This only gets called
+    # if the git-archive 'subst' keywords were *not* expanded, and
+    # _version.py hasn't already been rewritten with a short version string,
+    # meaning we're inside a checked out source tree.
+
+    if not os.path.exists(os.path.join(root, ".git")):
+        if verbose:
+            print("no .git in %s" % root)
+        return {}  # get_versions() will try next method
+
+    GITS = ["git"]
+    if sys.platform == "win32":
+        GITS = ["git.cmd", "git.exe"]
+    # if there is a tag, this yields TAG-NUM-gHEX[-dirty]
+    # if there are no tags, this yields HEX[-dirty] (no NUM)
+    stdout = run_command(GITS, ["describe", "--tags", "--dirty",
+                                "--always", "--long"],
+                         cwd=root)
+    # --long was added in git-1.5.5
+    if stdout is None:
+        return {}  # try next method
+    version, dirty = git_parse_vcs_describe(stdout, tag_prefix, verbose)
+
+    # build "full", which is FULLHEX[.dirty]
+    stdout = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
+    if stdout is None:
+        return {}
+    full = stdout.strip()
+    if dirty:
+        full += ".dirty"
+
+    return {"version": version, "full": full}
+
+
+def get_versions(default={"version": "0+unknown", "full": ""}, verbose=False):
+    # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
+    # __file__, we can work backwards from there to the root. Some
+    # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
+    # case we can only use expanded keywords.
+
+    keywords = {"refnames": git_refnames, "full": git_full}
+    ver = git_versions_from_keywords(keywords, tag_prefix, verbose)
+    if ver:
+        return ver
+
+    try:
+        root = os.path.realpath(__file__)
+        # versionfile_source is the relative path from the top of the source
+        # tree (where the .git directory might live) to this file. Invert
+        # this to find the root from __file__.
+        for i in versionfile_source.split('/'):
+            root = os.path.dirname(root)
+    except NameError:
+        return default
+
+    return (git_versions_from_vcs(tag_prefix, root, verbose)
+            or versions_from_parentdir(parentdir_prefix, root, verbose)
+            or default)

+ 191 - 91
attic/archive.py → borg/archive.py

@@ -2,10 +2,8 @@ from datetime import datetime
 from getpass import getuser
 from getpass import getuser
 from itertools import groupby
 from itertools import groupby
 import errno
 import errno
-import shutil
-import tempfile
-from attic.key import key_factory
-from attic.remote import cache_if_remote
+from .key import key_factory
+from .remote import cache_if_remote
 import msgpack
 import msgpack
 import os
 import os
 import socket
 import socket
@@ -13,17 +11,22 @@ import stat
 import sys
 import sys
 import time
 import time
 from io import BytesIO
 from io import BytesIO
-from attic import xattr
-from attic.platform import acl_get, acl_set
-from attic.chunker import Chunker
-from attic.hashindex import ChunkIndex
-from attic.helpers import parse_timestamp, Error, uid2user, user2uid, gid2group, group2gid, \
+from . import xattr
+from .platform import acl_get, acl_set
+from .chunker import Chunker
+from .hashindex import ChunkIndex
+from .helpers import parse_timestamp, Error, uid2user, user2uid, gid2group, group2gid, \
     Manifest, Statistics, decode_dict, st_mtime_ns, make_path_safe, StableDict, int_to_bigint, bigint_to_int
     Manifest, Statistics, decode_dict, st_mtime_ns, make_path_safe, StableDict, int_to_bigint, bigint_to_int
 
 
 ITEMS_BUFFER = 1024 * 1024
 ITEMS_BUFFER = 1024 * 1024
-CHUNK_MIN = 1024
-WINDOW_SIZE = 0xfff
-CHUNK_MASK = 0xffff
+
+CHUNK_MIN_EXP = 10  # 2**10 == 1kiB
+CHUNK_MAX_EXP = 23  # 2**23 == 8MiB
+HASH_WINDOW_SIZE = 0xfff  # 4095B
+HASH_MASK_BITS = 16  # results in ~64kiB chunks statistically
+
+# defaults, use --chunker-params to override
+CHUNKER_PARAMS = (CHUNK_MIN_EXP, CHUNK_MAX_EXP, HASH_MASK_BITS, HASH_WINDOW_SIZE)
 
 
 utime_supports_fd = os.utime in getattr(os, 'supports_fd', {})
 utime_supports_fd = os.utime in getattr(os, 'supports_fd', {})
 utime_supports_follow_symlinks = os.utime in getattr(os, 'supports_follow_symlinks', {})
 utime_supports_follow_symlinks = os.utime in getattr(os, 'supports_follow_symlinks', {})
@@ -66,12 +69,12 @@ class DownloadPipeline:
 class ChunkBuffer:
 class ChunkBuffer:
     BUFFER_SIZE = 1 * 1024 * 1024
     BUFFER_SIZE = 1 * 1024 * 1024
 
 
-    def __init__(self, key):
+    def __init__(self, key, chunker_params=CHUNKER_PARAMS):
         self.buffer = BytesIO()
         self.buffer = BytesIO()
         self.packer = msgpack.Packer(unicode_errors='surrogateescape')
         self.packer = msgpack.Packer(unicode_errors='surrogateescape')
         self.chunks = []
         self.chunks = []
         self.key = key
         self.key = key
-        self.chunker = Chunker(WINDOW_SIZE, CHUNK_MASK, CHUNK_MIN, self.key.chunk_seed)
+        self.chunker = Chunker(self.key.chunk_seed, *chunker_params)
 
 
     def add(self, item):
     def add(self, item):
         self.buffer.write(self.packer.pack(StableDict(item)))
         self.buffer.write(self.packer.pack(StableDict(item)))
@@ -101,8 +104,8 @@ class ChunkBuffer:
 
 
 class CacheChunkBuffer(ChunkBuffer):
 class CacheChunkBuffer(ChunkBuffer):
 
 
-    def __init__(self, cache, key, stats):
-        super(CacheChunkBuffer, self).__init__(key)
+    def __init__(self, cache, key, stats, chunker_params=CHUNKER_PARAMS):
+        super().__init__(key, chunker_params)
         self.cache = cache
         self.cache = cache
         self.stats = stats
         self.stats = stats
 
 
@@ -122,9 +125,9 @@ class Archive:
     class IncompatibleFilesystemEncodingError(Error):
     class IncompatibleFilesystemEncodingError(Error):
         """Failed to encode filename "{}" into file system encoding "{}". Consider configuring the LANG environment variable."""
         """Failed to encode filename "{}" into file system encoding "{}". Consider configuring the LANG environment variable."""
 
 
-
     def __init__(self, repository, key, manifest, name, cache=None, create=False,
     def __init__(self, repository, key, manifest, name, cache=None, create=False,
-                 checkpoint_interval=300, numeric_owner=False):
+                 checkpoint_interval=300, numeric_owner=False, progress=False,
+                 chunker_params=CHUNKER_PARAMS):
         self.cwd = os.getcwd()
         self.cwd = os.getcwd()
         self.key = key
         self.key = key
         self.repository = repository
         self.repository = repository
@@ -132,20 +135,22 @@ class Archive:
         self.manifest = manifest
         self.manifest = manifest
         self.hard_links = {}
         self.hard_links = {}
         self.stats = Statistics()
         self.stats = Statistics()
+        self.show_progress = progress
+        self.last_progress = time.time()
         self.name = name
         self.name = name
         self.checkpoint_interval = checkpoint_interval
         self.checkpoint_interval = checkpoint_interval
         self.numeric_owner = numeric_owner
         self.numeric_owner = numeric_owner
         self.pipeline = DownloadPipeline(self.repository, self.key)
         self.pipeline = DownloadPipeline(self.repository, self.key)
         if create:
         if create:
-            self.items_buffer = CacheChunkBuffer(self.cache, self.key, self.stats)
-            self.chunker = Chunker(WINDOW_SIZE, CHUNK_MASK, CHUNK_MIN, self.key.chunk_seed)
+            self.items_buffer = CacheChunkBuffer(self.cache, self.key, self.stats, chunker_params)
+            self.chunker = Chunker(self.key.chunk_seed, *chunker_params)
             if name in manifest.archives:
             if name in manifest.archives:
                 raise self.AlreadyExists(name)
                 raise self.AlreadyExists(name)
             self.last_checkpoint = time.time()
             self.last_checkpoint = time.time()
             i = 0
             i = 0
             while True:
             while True:
                 self.checkpoint_name = '%s.checkpoint%s' % (name, i and ('.%d' % i) or '')
                 self.checkpoint_name = '%s.checkpoint%s' % (name, i and ('.%d' % i) or '')
-                if not self.checkpoint_name in manifest.archives:
+                if self.checkpoint_name not in manifest.archives:
                     break
                     break
                 i += 1
                 i += 1
         else:
         else:
@@ -153,13 +158,18 @@ class Archive:
                 raise self.DoesNotExist(name)
                 raise self.DoesNotExist(name)
             info = self.manifest.archives[name]
             info = self.manifest.archives[name]
             self.load(info[b'id'])
             self.load(info[b'id'])
+            self.zeros = b'\0' * (1 << chunker_params[1])
+
+    def _load_meta(self, id):
+        data = self.key.decrypt(id, self.repository.get(id))
+        metadata = msgpack.unpackb(data)
+        if metadata[b'version'] != 1:
+            raise Exception('Unknown archive metadata version')
+        return metadata
 
 
     def load(self, id):
     def load(self, id):
         self.id = id
         self.id = id
-        data = self.key.decrypt(self.id, self.repository.get(self.id))
-        self.metadata = msgpack.unpackb(data)
-        if self.metadata[b'version'] != 1:
-            raise Exception('Unknown archive metadata version')
+        self.metadata = self._load_meta(self.id)
         decode_dict(self.metadata, (b'name', b'hostname', b'username', b'time'))
         decode_dict(self.metadata, (b'name', b'hostname', b'username', b'time'))
         self.metadata[b'cmdline'] = [arg.decode('utf-8', 'surrogateescape') for arg in self.metadata[b'cmdline']]
         self.metadata[b'cmdline'] = [arg.decode('utf-8', 'surrogateescape') for arg in self.metadata[b'cmdline']]
         self.name = self.metadata[b'name']
         self.name = self.metadata[b'name']
@@ -177,6 +187,9 @@ class Archive:
             yield item
             yield item
 
 
     def add_item(self, item):
     def add_item(self, item):
+        if self.show_progress and time.time() - self.last_progress > 0.2:
+            self.stats.show_progress(item=item)
+            self.last_progress = time.time()
         self.items_buffer.add(item)
         self.items_buffer.add(item)
         if time.time() - self.last_checkpoint > self.checkpoint_interval:
         if time.time() - self.last_checkpoint > self.checkpoint_interval:
             self.write_checkpoint()
             self.write_checkpoint()
@@ -187,11 +200,13 @@ class Archive:
         del self.manifest.archives[self.checkpoint_name]
         del self.manifest.archives[self.checkpoint_name]
         self.cache.chunk_decref(self.id, self.stats)
         self.cache.chunk_decref(self.id, self.stats)
 
 
-    def save(self, name=None):
+    def save(self, name=None, timestamp=None):
         name = name or self.name
         name = name or self.name
         if name in self.manifest.archives:
         if name in self.manifest.archives:
             raise self.AlreadyExists(name)
             raise self.AlreadyExists(name)
         self.items_buffer.flush(flush=True)
         self.items_buffer.flush(flush=True)
+        if timestamp is None:
+            timestamp = datetime.utcnow()
         metadata = StableDict({
         metadata = StableDict({
             'version': 1,
             'version': 1,
             'name': name,
             'name': name,
@@ -199,7 +214,7 @@ class Archive:
             'cmdline': sys.argv,
             'cmdline': sys.argv,
             'hostname': socket.gethostname(),
             'hostname': socket.gethostname(),
             'username': getuser(),
             'username': getuser(),
-            'time': datetime.utcnow().isoformat(),
+            'time': timestamp.isoformat(),
         })
         })
         data = msgpack.packb(metadata, unicode_errors='surrogateescape')
         data = msgpack.packb(metadata, unicode_errors='surrogateescape')
         self.id = self.key.id_hash(data)
         self.id = self.key.id_hash(data)
@@ -214,9 +229,11 @@ class Archive:
             count, size, csize = cache.chunks[id]
             count, size, csize = cache.chunks[id]
             stats.update(size, csize, count == 1)
             stats.update(size, csize, count == 1)
             cache.chunks[id] = count - 1, size, csize
             cache.chunks[id] = count - 1, size, csize
+
         def add_file_chunks(chunks):
         def add_file_chunks(chunks):
             for id, _, _ in chunks:
             for id, _, _ in chunks:
                 add(id)
                 add(id)
+
         # This function is a bit evil since it abuses the cache to calculate
         # This function is a bit evil since it abuses the cache to calculate
         # the stats. The cache transaction must be rolled back afterwards
         # the stats. The cache transaction must be rolled back afterwards
         unpacker = msgpack.Unpacker(use_list=False)
         unpacker = msgpack.Unpacker(use_list=False)
@@ -233,11 +250,14 @@ class Archive:
         cache.rollback()
         cache.rollback()
         return stats
         return stats
 
 
-    def extract_item(self, item, restore_attrs=True, dry_run=False):
-        if dry_run:
+    def extract_item(self, item, restore_attrs=True, dry_run=False, stdout=False, sparse=False):
+        if dry_run or stdout:
             if b'chunks' in item:
             if b'chunks' in item:
-                for _ in self.pipeline.fetch_many([c[0] for c in item[b'chunks']], is_preloaded=True):
-                    pass
+                for data in self.pipeline.fetch_many([c[0] for c in item[b'chunks']], is_preloaded=True):
+                    if stdout:
+                        sys.stdout.buffer.write(data)
+                if stdout:
+                    sys.stdout.buffer.flush()
             return
             return
 
 
         dest = self.cwd
         dest = self.cwd
@@ -256,12 +276,7 @@ class Archive:
         except OSError:
         except OSError:
             pass
             pass
         mode = item[b'mode']
         mode = item[b'mode']
-        if stat.S_ISDIR(mode):
-            if not os.path.exists(path):
-                os.makedirs(path)
-            if restore_attrs:
-                self.restore_attrs(path, item)
-        elif stat.S_ISREG(mode):
+        if stat.S_ISREG(mode):
             if not os.path.exists(os.path.dirname(path)):
             if not os.path.exists(os.path.dirname(path)):
                 os.makedirs(os.path.dirname(path))
                 os.makedirs(os.path.dirname(path))
             # Hard link?
             # Hard link?
@@ -274,14 +289,20 @@ class Archive:
                 with open(path, 'wb') as fd:
                 with open(path, 'wb') as fd:
                     ids = [c[0] for c in item[b'chunks']]
                     ids = [c[0] for c in item[b'chunks']]
                     for data in self.pipeline.fetch_many(ids, is_preloaded=True):
                     for data in self.pipeline.fetch_many(ids, is_preloaded=True):
-                        fd.write(data)
+                        if sparse and self.zeros.startswith(data):
+                            # all-zero chunk: create a hole in a sparse file
+                            fd.seek(len(data), 1)
+                        else:
+                            fd.write(data)
+                    pos = fd.tell()
+                    fd.truncate(pos)
                     fd.flush()
                     fd.flush()
                     self.restore_attrs(path, item, fd=fd.fileno())
                     self.restore_attrs(path, item, fd=fd.fileno())
-        elif stat.S_ISFIFO(mode):
-            if not os.path.exists(os.path.dirname(path)):
-                os.makedirs(os.path.dirname(path))
-            os.mkfifo(path)
-            self.restore_attrs(path, item)
+        elif stat.S_ISDIR(mode):
+            if not os.path.exists(path):
+                os.makedirs(path)
+            if restore_attrs:
+                self.restore_attrs(path, item)
         elif stat.S_ISLNK(mode):
         elif stat.S_ISLNK(mode):
             if not os.path.exists(os.path.dirname(path)):
             if not os.path.exists(os.path.dirname(path)):
                 os.makedirs(os.path.dirname(path))
                 os.makedirs(os.path.dirname(path))
@@ -290,6 +311,11 @@ class Archive:
                 os.unlink(path)
                 os.unlink(path)
             os.symlink(source, path)
             os.symlink(source, path)
             self.restore_attrs(path, item, symlink=True)
             self.restore_attrs(path, item, symlink=True)
+        elif stat.S_ISFIFO(mode):
+            if not os.path.exists(os.path.dirname(path)):
+                os.makedirs(os.path.dirname(path))
+            os.mkfifo(path)
+            self.restore_attrs(path, item)
         elif stat.S_ISCHR(mode) or stat.S_ISBLK(mode):
         elif stat.S_ISCHR(mode) or stat.S_ISBLK(mode):
             os.mknod(path, item[b'mode'], item[b'rdev'])
             os.mknod(path, item[b'mode'], item[b'rdev'])
             self.restore_attrs(path, item)
             self.restore_attrs(path, item)
@@ -340,6 +366,18 @@ class Archive:
             except OSError:
             except OSError:
                 pass
                 pass
 
 
+    def rename(self, name):
+        if name in self.manifest.archives:
+            raise self.AlreadyExists(name)
+        metadata = StableDict(self._load_meta(self.id))
+        metadata[b'name'] = name
+        data = msgpack.packb(metadata, unicode_errors='surrogateescape')
+        new_id = self.key.id_hash(data)
+        self.cache.add_chunk(new_id, data, self.stats)
+        self.manifest.archives[name] = {'id': new_id, 'time': metadata[b'time']}
+        self.cache.chunk_decref(self.id, self.stats)
+        del self.manifest.archives[self.name]
+
     def delete(self, stats):
     def delete(self, stats):
         unpacker = msgpack.Unpacker(use_list=False)
         unpacker = msgpack.Unpacker(use_list=False)
         for items_id, data in zip(self.metadata[b'items'], self.repository.get_many(self.metadata[b'items'])):
         for items_id, data in zip(self.metadata[b'items'], self.repository.get_many(self.metadata[b'items'])):
@@ -370,23 +408,53 @@ class Archive:
         acl_get(path, item, st, self.numeric_owner)
         acl_get(path, item, st, self.numeric_owner)
         return item
         return item
 
 
-    def process_item(self, path, st):
+    def process_dir(self, path, st):
         item = {b'path': make_path_safe(path)}
         item = {b'path': make_path_safe(path)}
         item.update(self.stat_attrs(st, path))
         item.update(self.stat_attrs(st, path))
         self.add_item(item)
         self.add_item(item)
+        return 'd'  # directory
+
+    def process_fifo(self, path, st):
+        item = {b'path': make_path_safe(path)}
+        item.update(self.stat_attrs(st, path))
+        self.add_item(item)
+        return 'f'  # fifo
 
 
     def process_dev(self, path, st):
     def process_dev(self, path, st):
         item = {b'path': make_path_safe(path), b'rdev': st.st_rdev}
         item = {b'path': make_path_safe(path), b'rdev': st.st_rdev}
         item.update(self.stat_attrs(st, path))
         item.update(self.stat_attrs(st, path))
         self.add_item(item)
         self.add_item(item)
+        if stat.S_ISCHR(st.st_mode):
+            return 'c'  # char device
+        elif stat.S_ISBLK(st.st_mode):
+            return 'b'  # block device
 
 
     def process_symlink(self, path, st):
     def process_symlink(self, path, st):
         source = os.readlink(path)
         source = os.readlink(path)
         item = {b'path': make_path_safe(path), b'source': source}
         item = {b'path': make_path_safe(path), b'source': source}
         item.update(self.stat_attrs(st, path))
         item.update(self.stat_attrs(st, path))
         self.add_item(item)
         self.add_item(item)
+        return 's'  # symlink
+
+    def process_stdin(self, path, cache):
+        uid, gid = 0, 0
+        fd = sys.stdin.buffer  # binary
+        chunks = []
+        for chunk in self.chunker.chunkify(fd):
+            chunks.append(cache.add_chunk(self.key.id_hash(chunk), chunk, self.stats))
+        self.stats.nfiles += 1
+        item = {
+            b'path': path,
+            b'chunks': chunks,
+            b'mode': 0o100660,  # regular file, ug=rw
+            b'uid': uid, b'user': uid2user(uid),
+            b'gid': gid, b'group': gid2group(gid),
+            b'mtime': int_to_bigint(int(time.time()) * 1000000000)
+        }
+        self.add_item(item)
 
 
     def process_file(self, path, st, cache):
     def process_file(self, path, st, cache):
+        status = None
         safe_path = make_path_safe(path)
         safe_path = make_path_safe(path)
         # Is it a hard link?
         # Is it a hard link?
         if st.st_nlink > 1:
         if st.st_nlink > 1:
@@ -395,7 +463,8 @@ class Archive:
                 item = self.stat_attrs(st, path)
                 item = self.stat_attrs(st, path)
                 item.update({b'path': safe_path, b'source': source})
                 item.update({b'path': safe_path, b'source': source})
                 self.add_item(item)
                 self.add_item(item)
-                return
+                status = 'h'  # regular file, hardlink (to already seen inodes)
+                return status
             else:
             else:
                 self.hard_links[st.st_ino, st.st_dev] = safe_path
                 self.hard_links[st.st_ino, st.st_dev] = safe_path
         path_hash = self.key.id_hash(os.path.join(self.cwd, path).encode('utf-8', 'surrogateescape'))
         path_hash = self.key.id_hash(os.path.join(self.cwd, path).encode('utf-8', 'surrogateescape'))
@@ -408,70 +477,80 @@ class Archive:
                     break
                     break
             else:
             else:
                 chunks = [cache.chunk_incref(id_, self.stats) for id_ in ids]
                 chunks = [cache.chunk_incref(id_, self.stats) for id_ in ids]
+                status = 'U'  # regular file, unchanged
+        else:
+            status = 'A'  # regular file, added
         # Only chunkify the file if needed
         # Only chunkify the file if needed
         if chunks is None:
         if chunks is None:
-            with Archive._open_rb(path, st) as fd:
+            fh = Archive._open_rb(path, st)
+            with os.fdopen(fh, 'rb') as fd:
                 chunks = []
                 chunks = []
-                for chunk in self.chunker.chunkify(fd):
+                for chunk in self.chunker.chunkify(fd, fh):
                     chunks.append(cache.add_chunk(self.key.id_hash(chunk), chunk, self.stats))
                     chunks.append(cache.add_chunk(self.key.id_hash(chunk), chunk, self.stats))
             cache.memorize_file(path_hash, st, [c[0] for c in chunks])
             cache.memorize_file(path_hash, st, [c[0] for c in chunks])
+            status = status or 'M'  # regular file, modified (if not 'A' already)
         item = {b'path': safe_path, b'chunks': chunks}
         item = {b'path': safe_path, b'chunks': chunks}
         item.update(self.stat_attrs(st, path))
         item.update(self.stat_attrs(st, path))
         self.stats.nfiles += 1
         self.stats.nfiles += 1
         self.add_item(item)
         self.add_item(item)
+        return status
 
 
     @staticmethod
     @staticmethod
     def list_archives(repository, key, manifest, cache=None):
     def list_archives(repository, key, manifest, cache=None):
+        # expensive! see also Manifest.list_archive_infos.
         for name, info in manifest.archives.items():
         for name, info in manifest.archives.items():
             yield Archive(repository, key, manifest, name, cache=cache)
             yield Archive(repository, key, manifest, name, cache=cache)
 
 
     @staticmethod
     @staticmethod
     def _open_rb(path, st):
     def _open_rb(path, st):
-        flags_noatime = None
+        flags_normal = os.O_RDONLY | getattr(os, 'O_BINARY', 0)
+        flags_noatime = flags_normal | getattr(os, 'NO_ATIME', 0)
         euid = None
         euid = None
 
 
         def open_simple(p, s):
         def open_simple(p, s):
-            return open(p, 'rb')
+            return os.open(p, flags_normal)
+
+        def open_noatime(p, s):
+            return os.open(p, flags_noatime)
 
 
         def open_noatime_if_owner(p, s):
         def open_noatime_if_owner(p, s):
-            if s.st_uid == euid:
-                return os.fdopen(os.open(p, flags_noatime), 'rb')
+            if euid == 0 or s.st_uid == euid:
+                # we are root or owner of file
+                return open_noatime(p, s)
             else:
             else:
-                return open(p, 'rb')
+                return open_simple(p, s)
 
 
-        def open_noatime(p, s):
+        def open_noatime_with_fallback(p, s):
             try:
             try:
                 fd = os.open(p, flags_noatime)
                 fd = os.open(p, flags_noatime)
             except PermissionError:
             except PermissionError:
                 # Was this EPERM due to the O_NOATIME flag?
                 # Was this EPERM due to the O_NOATIME flag?
-                fo = open(p, 'rb')
+                fd = os.open(p, flags_normal)
                 # Yes, it was -- otherwise the above line would have thrown
                 # Yes, it was -- otherwise the above line would have thrown
                 # another exception.
                 # another exception.
+                nonlocal euid
                 euid = os.geteuid()
                 euid = os.geteuid()
                 # So in future, let's check whether the file is owned by us
                 # So in future, let's check whether the file is owned by us
                 # before attempting to use O_NOATIME.
                 # before attempting to use O_NOATIME.
                 Archive._open_rb = open_noatime_if_owner
                 Archive._open_rb = open_noatime_if_owner
-                return fo
-            return os.fdopen(fd, 'rb')
+            return fd
 
 
-        o_noatime = getattr(os, 'O_NOATIME', None)
-        if o_noatime is not None:
-            flags_noatime = os.O_RDONLY | getattr(os, 'O_BINARY', 0) | o_noatime
+        if flags_noatime != flags_normal:
             # Always use O_NOATIME version.
             # Always use O_NOATIME version.
-            Archive._open_rb = open_noatime
+            Archive._open_rb = open_noatime_with_fallback
         else:
         else:
             # Always use non-O_NOATIME version.
             # Always use non-O_NOATIME version.
             Archive._open_rb = open_simple
             Archive._open_rb = open_simple
         return Archive._open_rb(path, st)
         return Archive._open_rb(path, st)
 
 
 
 
-class RobustUnpacker():
+class RobustUnpacker:
     """A restartable/robust version of the streaming msgpack unpacker
     """A restartable/robust version of the streaming msgpack unpacker
     """
     """
     item_keys = [msgpack.packb(name) for name in ('path', 'mode', 'source', 'chunks', 'rdev', 'xattrs', 'user', 'group', 'uid', 'gid', 'mtime')]
     item_keys = [msgpack.packb(name) for name in ('path', 'mode', 'source', 'chunks', 'rdev', 'xattrs', 'user', 'group', 'uid', 'gid', 'mtime')]
 
 
     def __init__(self, validator):
     def __init__(self, validator):
-        super(RobustUnpacker, self).__init__()
+        super().__init__()
         self.validator = validator
         self.validator = validator
         self._buffered_data = []
         self._buffered_data = []
         self._resync = False
         self._resync = False
@@ -529,23 +608,21 @@ class ArchiveChecker:
     def __init__(self):
     def __init__(self):
         self.error_found = False
         self.error_found = False
         self.possibly_superseded = set()
         self.possibly_superseded = set()
-        self.tmpdir = tempfile.mkdtemp()
-
-    def __del__(self):
-        shutil.rmtree(self.tmpdir)
 
 
-    def check(self, repository, repair=False):
+    def check(self, repository, repair=False, archive=None, last=None):
         self.report_progress('Starting archive consistency check...')
         self.report_progress('Starting archive consistency check...')
+        self.check_all = archive is None and last is None
         self.repair = repair
         self.repair = repair
         self.repository = repository
         self.repository = repository
         self.init_chunks()
         self.init_chunks()
         self.key = self.identify_key(repository)
         self.key = self.identify_key(repository)
-        if not Manifest.MANIFEST_ID in self.chunks:
+        if Manifest.MANIFEST_ID not in self.chunks:
             self.manifest = self.rebuild_manifest()
             self.manifest = self.rebuild_manifest()
         else:
         else:
             self.manifest, _ = Manifest.load(repository, key=self.key)
             self.manifest, _ = Manifest.load(repository, key=self.key)
-        self.rebuild_refcounts()
-        self.verify_chunks()
+        self.rebuild_refcounts(archive=archive, last=last)
+        self.orphan_chunks_check()
+        self.finish()
         if not self.error_found:
         if not self.error_found:
             self.report_progress('Archive consistency check complete, no problems found.')
             self.report_progress('Archive consistency check complete, no problems found.')
         return self.repair or not self.error_found
         return self.repair or not self.error_found
@@ -553,7 +630,7 @@ class ArchiveChecker:
     def init_chunks(self):
     def init_chunks(self):
         """Fetch a list of all object keys from repository
         """Fetch a list of all object keys from repository
         """
         """
-        # Explicity set the initial hash table capacity to avoid performance issues
+        # Explicitly set the initial hash table capacity to avoid performance issues
         # due to hash table "resonance"
         # due to hash table "resonance"
         capacity = int(len(self.repository) * 1.2)
         capacity = int(len(self.repository) * 1.2)
         self.chunks = ChunkIndex(capacity)
         self.chunks = ChunkIndex(capacity)
@@ -588,11 +665,13 @@ class ArchiveChecker:
             # Some basic sanity checks of the payload before feeding it into msgpack
             # Some basic sanity checks of the payload before feeding it into msgpack
             if len(data) < 2 or ((data[0] & 0xf0) != 0x80) or ((data[1] & 0xe0) != 0xa0):
             if len(data) < 2 or ((data[0] & 0xf0) != 0x80) or ((data[1] & 0xe0) != 0xa0):
                 continue
                 continue
-            if not b'cmdline' in data or not b'\xa7version\x01' in data:
+            if b'cmdline' not in data or b'\xa7version\x01' not in data:
                 continue
                 continue
             try:
             try:
                 archive = msgpack.unpackb(data)
                 archive = msgpack.unpackb(data)
-            except:
+            # Ignore exceptions that might be raised when feeding
+            # msgpack with invalid data
+            except (TypeError, ValueError, StopIteration):
                 continue
                 continue
             if isinstance(archive, dict) and b'items' in archive and b'cmdline' in archive:
             if isinstance(archive, dict) and b'items' in archive and b'cmdline' in archive:
                 self.report_progress('Found archive ' + archive[b'name'].decode('utf-8'), error=True)
                 self.report_progress('Found archive ' + archive[b'name'].decode('utf-8'), error=True)
@@ -600,7 +679,7 @@ class ArchiveChecker:
         self.report_progress('Manifest rebuild complete', error=True)
         self.report_progress('Manifest rebuild complete', error=True)
         return manifest
         return manifest
 
 
-    def rebuild_refcounts(self):
+    def rebuild_refcounts(self, archive=None, last=None):
         """Rebuild object reference counts by walking the metadata
         """Rebuild object reference counts by walking the metadata
 
 
         Missing and/or incorrect data is repaired when detected
         Missing and/or incorrect data is repaired when detected
@@ -637,7 +716,7 @@ class ArchiveChecker:
             offset = 0
             offset = 0
             chunk_list = []
             chunk_list = []
             for chunk_id, size, csize in item[b'chunks']:
             for chunk_id, size, csize in item[b'chunks']:
-                if not chunk_id in self.chunks:
+                if chunk_id not in self.chunks:
                     # If a file chunk is missing, create an all empty replacement chunk
                     # If a file chunk is missing, create an all empty replacement chunk
                     self.report_progress('{}: Missing file chunk detected (Byte {}-{})'.format(item[b'path'].decode('utf-8', 'surrogateescape'), offset, offset + size), error=True)
                     self.report_progress('{}: Missing file chunk detected (Byte {}-{})'.format(item[b'path'].decode('utf-8', 'surrogateescape'), offset, offset + size), error=True)
                     data = bytes(size)
                     data = bytes(size)
@@ -658,11 +737,13 @@ class ArchiveChecker:
             """
             """
             unpacker = RobustUnpacker(lambda item: isinstance(item, dict) and b'path' in item)
             unpacker = RobustUnpacker(lambda item: isinstance(item, dict) and b'path' in item)
             _state = 0
             _state = 0
+
             def missing_chunk_detector(chunk_id):
             def missing_chunk_detector(chunk_id):
                 nonlocal _state
                 nonlocal _state
-                if _state % 2 != int(not chunk_id in self.chunks):
+                if _state % 2 != int(chunk_id not in self.chunks):
                     _state += 1
                     _state += 1
                 return _state
                 return _state
+
             for state, items in groupby(archive[b'items'], missing_chunk_detector):
             for state, items in groupby(archive[b'items'], missing_chunk_detector):
                 items = list(items)
                 items = list(items)
                 if state % 2:
                 if state % 2:
@@ -673,14 +754,28 @@ class ArchiveChecker:
                 for chunk_id, cdata in zip(items, repository.get_many(items)):
                 for chunk_id, cdata in zip(items, repository.get_many(items)):
                     unpacker.feed(self.key.decrypt(chunk_id, cdata))
                     unpacker.feed(self.key.decrypt(chunk_id, cdata))
                     for item in unpacker:
                     for item in unpacker:
+                        if not isinstance(item, dict):
+                            self.report_progress('Did not get expected metadata dict - archive corrupted!',
+                                                 error=True)
+                            continue
                         yield item
                         yield item
 
 
         repository = cache_if_remote(self.repository)
         repository = cache_if_remote(self.repository)
-        num_archives = len(self.manifest.archives)
-        for i, (name, info) in enumerate(list(self.manifest.archives.items()), 1):
-            self.report_progress('Analyzing archive {} ({}/{})'.format(name, i, num_archives))
+        if archive is None:
+            # we need last N or all archives
+            archive_items = sorted(self.manifest.archives.items(), reverse=True,
+                                   key=lambda name_info: name_info[1][b'time'])
+            num_archives = len(self.manifest.archives)
+            end = None if last is None else min(num_archives, last)
+        else:
+            # we only want one specific archive
+            archive_items = [item for item in self.manifest.archives.items() if item[0] == archive]
+            num_archives = 1
+            end = 1
+        for i, (name, info) in enumerate(archive_items[:end]):
+            self.report_progress('Analyzing archive {} ({}/{})'.format(name, num_archives - i, num_archives))
             archive_id = info[b'id']
             archive_id = info[b'id']
-            if not archive_id in self.chunks:
+            if archive_id not in self.chunks:
                 self.report_progress('Archive metadata block is missing', error=True)
                 self.report_progress('Archive metadata block is missing', error=True)
                 del self.manifest.archives[name]
                 del self.manifest.archives[name]
                 continue
                 continue
@@ -707,17 +802,22 @@ class ArchiveChecker:
             add_reference(new_archive_id, len(data), len(cdata), cdata)
             add_reference(new_archive_id, len(data), len(cdata), cdata)
             info[b'id'] = new_archive_id
             info[b'id'] = new_archive_id
 
 
-    def verify_chunks(self):
-        unused = set()
-        for id_, (count, size, csize) in self.chunks.iteritems():
-            if count == 0:
-                unused.add(id_)
-        orphaned = unused - self.possibly_superseded
-        if orphaned:
-            self.report_progress('{} orphaned objects found'.format(len(orphaned)), error=True)
+    def orphan_chunks_check(self):
+        if self.check_all:
+            unused = set()
+            for id_, (count, size, csize) in self.chunks.iteritems():
+                if count == 0:
+                    unused.add(id_)
+            orphaned = unused - self.possibly_superseded
+            if orphaned:
+                self.report_progress('{} orphaned objects found'.format(len(orphaned)), error=True)
+            if self.repair:
+                for id_ in unused:
+                    self.repository.delete(id_)
+        else:
+            self.report_progress('Orphaned objects check skipped (needs all archives checked)')
+
+    def finish(self):
         if self.repair:
         if self.repair:
-            for id_ in unused:
-                self.repository.delete(id_)
             self.manifest.write()
             self.manifest.write()
             self.repository.commit()
             self.repository.commit()
-

+ 273 - 90
attic/archiver.py → borg/archiver.py

@@ -3,23 +3,28 @@ from binascii import hexlify
 from datetime import datetime
 from datetime import datetime
 from operator import attrgetter
 from operator import attrgetter
 import functools
 import functools
+import inspect
 import io
 import io
 import os
 import os
+import signal
 import stat
 import stat
 import sys
 import sys
 import textwrap
 import textwrap
-
-from attic import __version__
-from attic.archive import Archive, ArchiveChecker
-from attic.repository import Repository
-from attic.cache import Cache
-from attic.key import key_creator
-from attic.helpers import Error, location_validator, format_time, \
-    format_file_mode, ExcludePattern, exclude_path, adjust_patterns, to_localtime, \
+import traceback
+
+from . import __version__
+from .archive import Archive, ArchiveChecker, CHUNKER_PARAMS
+from .repository import Repository
+from .cache import Cache
+from .key import key_creator
+from .helpers import Error, location_validator, format_time, format_file_size, \
+    format_file_mode, ExcludePattern, exclude_path, adjust_patterns, to_localtime, timestamp, \
     get_cache_dir, get_keys_dir, format_timedelta, prune_within, prune_split, \
     get_cache_dir, get_keys_dir, format_timedelta, prune_within, prune_split, \
     Manifest, remove_surrogates, update_excludes, format_archive, check_extension_modules, Statistics, \
     Manifest, remove_surrogates, update_excludes, format_archive, check_extension_modules, Statistics, \
-    is_cachedir, bigint_to_int
-from attic.remote import RepositoryServer, RemoteRepository
+    is_cachedir, bigint_to_int, ChunkerParams
+from .remote import RepositoryServer, RemoteRepository
+
+has_lchflags = hasattr(os, 'lchflags')
 
 
 
 
 class Archiver:
 class Archiver:
@@ -38,7 +43,7 @@ class Archiver:
     def print_error(self, msg, *args):
     def print_error(self, msg, *args):
         msg = args and msg % args or msg
         msg = args and msg % args or msg
         self.exit_code = 1
         self.exit_code = 1
-        print('attic: ' + msg, file=sys.stderr)
+        print('borg: ' + msg, file=sys.stderr)
 
 
     def print_verbose(self, msg, *args, **kw):
     def print_verbose(self, msg, *args, **kw):
         if self.verbose:
         if self.verbose:
@@ -49,7 +54,7 @@ class Archiver:
                 print(msg, end=' ')
                 print(msg, end=' ')
 
 
     def do_serve(self, args):
     def do_serve(self, args):
-        """Start Attic in server mode. This command is usually not used manually.
+        """Start in server mode. This command is usually not used manually.
         """
         """
         return RepositoryServer(restrict_to_paths=args.restrict_to_paths).serve()
         return RepositoryServer(restrict_to_paths=args.restrict_to_paths).serve()
 
 
@@ -69,7 +74,7 @@ class Archiver:
         """Check repository consistency"""
         """Check repository consistency"""
         repository = self.open_repository(args.repository, exclusive=args.repair)
         repository = self.open_repository(args.repository, exclusive=args.repair)
         if args.repair:
         if args.repair:
-            while not os.environ.get('ATTIC_CHECK_I_KNOW_WHAT_I_AM_DOING'):
+            while not os.environ.get('BORG_CHECK_I_KNOW_WHAT_I_AM_DOING'):
                 self.print_error("""Warning: 'check --repair' is an experimental feature that might result
                 self.print_error("""Warning: 'check --repair' is an experimental feature that might result
 in data loss.
 in data loss.
 
 
@@ -82,8 +87,9 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
                 print('Repository check complete, no problems found.')
                 print('Repository check complete, no problems found.')
             else:
             else:
                 return 1
                 return 1
-        if not args.repo_only and not ArchiveChecker().check(repository, repair=args.repair):
-                return 1
+        if not args.repo_only and not ArchiveChecker().check(
+                repository, repair=args.repair, archive=args.repository.archive, last=args.last):
+            return 1
         return 0
         return 0
 
 
     def do_change_passphrase(self, args):
     def do_change_passphrase(self, args):
@@ -98,11 +104,13 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
         t0 = datetime.now()
         t0 = datetime.now()
         repository = self.open_repository(args.archive, exclusive=True)
         repository = self.open_repository(args.archive, exclusive=True)
         manifest, key = Manifest.load(repository)
         manifest, key = Manifest.load(repository)
-        cache = Cache(repository, key, manifest)
+        key.compression_level = args.compression
+        cache = Cache(repository, key, manifest, do_files=args.cache_files)
         archive = Archive(repository, key, manifest, args.archive.archive, cache=cache,
         archive = Archive(repository, key, manifest, args.archive.archive, cache=cache,
                           create=True, checkpoint_interval=args.checkpoint_interval,
                           create=True, checkpoint_interval=args.checkpoint_interval,
-                          numeric_owner=args.numeric_owner)
-        # Add Attic cache dir to inode_skip list
+                          numeric_owner=args.numeric_owner, progress=args.progress,
+                          chunker_params=args.chunker_params)
+        # Add cache dir to inode_skip list
         skip_inodes = set()
         skip_inodes = set()
         try:
         try:
             st = os.stat(get_cache_dir())
             st = os.stat(get_cache_dir())
@@ -117,6 +125,14 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
             except IOError:
             except IOError:
                 pass
                 pass
         for path in args.paths:
         for path in args.paths:
+            if path == '-':  # stdin
+                path = 'stdin'
+                self.print_verbose(path)
+                try:
+                    archive.process_stdin(path, cache)
+                except IOError as e:
+                    self.print_error('%s: %s', path, e)
+                continue
             path = os.path.normpath(path)
             path = os.path.normpath(path)
             if args.dontcross:
             if args.dontcross:
                 try:
                 try:
@@ -127,7 +143,9 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
             else:
             else:
                 restrict_dev = None
                 restrict_dev = None
             self._process(archive, cache, args.excludes, args.exclude_caches, skip_inodes, path, restrict_dev)
             self._process(archive, cache, args.excludes, args.exclude_caches, skip_inodes, path, restrict_dev)
-        archive.save()
+        archive.save(timestamp=args.timestamp)
+        if args.progress:
+            archive.stats.show_progress(final=True)
         if args.stats:
         if args.stats:
             t = datetime.now()
             t = datetime.now()
             diff = t - t0
             diff = t - t0
@@ -155,48 +173,67 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
         # Entering a new filesystem?
         # Entering a new filesystem?
         if restrict_dev and st.st_dev != restrict_dev:
         if restrict_dev and st.st_dev != restrict_dev:
             return
             return
-        # Ignore unix sockets
-        if stat.S_ISSOCK(st.st_mode):
+        status = None
+        # Ignore if nodump flag is set
+        if has_lchflags and (st.st_flags & stat.UF_NODUMP):
             return
             return
-        self.print_verbose(remove_surrogates(path))
         if stat.S_ISREG(st.st_mode):
         if stat.S_ISREG(st.st_mode):
             try:
             try:
-                archive.process_file(path, st, cache)
+                status = archive.process_file(path, st, cache)
             except IOError as e:
             except IOError as e:
                 self.print_error('%s: %s', path, e)
                 self.print_error('%s: %s', path, e)
         elif stat.S_ISDIR(st.st_mode):
         elif stat.S_ISDIR(st.st_mode):
             if exclude_caches and is_cachedir(path):
             if exclude_caches and is_cachedir(path):
                 return
                 return
-            archive.process_item(path, st)
+            status = archive.process_dir(path, st)
             try:
             try:
                 entries = os.listdir(path)
                 entries = os.listdir(path)
             except OSError as e:
             except OSError as e:
                 self.print_error('%s: %s', path, e)
                 self.print_error('%s: %s', path, e)
             else:
             else:
                 for filename in sorted(entries):
                 for filename in sorted(entries):
+                    entry_path = os.path.normpath(os.path.join(path, filename))
                     self._process(archive, cache, excludes, exclude_caches, skip_inodes,
                     self._process(archive, cache, excludes, exclude_caches, skip_inodes,
-                                  os.path.join(path, filename), restrict_dev)
+                                  entry_path, restrict_dev)
         elif stat.S_ISLNK(st.st_mode):
         elif stat.S_ISLNK(st.st_mode):
-            archive.process_symlink(path, st)
+            status = archive.process_symlink(path, st)
         elif stat.S_ISFIFO(st.st_mode):
         elif stat.S_ISFIFO(st.st_mode):
-            archive.process_item(path, st)
+            status = archive.process_fifo(path, st)
         elif stat.S_ISCHR(st.st_mode) or stat.S_ISBLK(st.st_mode):
         elif stat.S_ISCHR(st.st_mode) or stat.S_ISBLK(st.st_mode):
-            archive.process_dev(path, st)
+            status = archive.process_dev(path, st)
+        elif stat.S_ISSOCK(st.st_mode):
+            # Ignore unix sockets
+            return
         else:
         else:
             self.print_error('Unknown file type: %s', path)
             self.print_error('Unknown file type: %s', path)
+            return
+        # Status output
+        # A lowercase character means a file type other than a regular file,
+        # borg usually just stores them. E.g. (d)irectory.
+        # Hardlinks to already seen content are indicated by (h).
+        # A uppercase character means a regular file that was (A)dded,
+        # (M)odified or was (U)nchanged.
+        # Note: A/M/U is relative to the "files" cache, not to the repo.
+        # This would be an issue if the files cache is not used.
+        if status is None:
+            status = '?'  # need to add a status code somewhere
+        # output ALL the stuff - it can be easily filtered using grep.
+        # even stuff considered unchanged might be interesting.
+        self.print_verbose("%1s %s", status, remove_surrogates(path))
 
 
     def do_extract(self, args):
     def do_extract(self, args):
         """Extract archive contents"""
         """Extract archive contents"""
         # be restrictive when restoring files, restore permissions later
         # be restrictive when restoring files, restore permissions later
         if sys.getfilesystemencoding() == 'ascii':
         if sys.getfilesystemencoding() == 'ascii':
             print('Warning: File system encoding is "ascii", extracting non-ascii filenames will not be supported.')
             print('Warning: File system encoding is "ascii", extracting non-ascii filenames will not be supported.')
-        os.umask(0o077)
         repository = self.open_repository(args.archive)
         repository = self.open_repository(args.archive)
         manifest, key = Manifest.load(repository)
         manifest, key = Manifest.load(repository)
         archive = Archive(repository, key, manifest, args.archive.archive,
         archive = Archive(repository, key, manifest, args.archive.archive,
                           numeric_owner=args.numeric_owner)
                           numeric_owner=args.numeric_owner)
         patterns = adjust_patterns(args.paths, args.excludes)
         patterns = adjust_patterns(args.paths, args.excludes)
         dry_run = args.dry_run
         dry_run = args.dry_run
+        stdout = args.stdout
+        sparse = args.sparse
         strip_components = args.strip_components
         strip_components = args.strip_components
         dirs = []
         dirs = []
         for item in archive.iter_items(lambda item: not exclude_path(item[b'path'], patterns), preload=True):
         for item in archive.iter_items(lambda item: not exclude_path(item[b'path'], patterns), preload=True):
@@ -207,7 +244,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
                     continue
                     continue
             if not args.dry_run:
             if not args.dry_run:
                 while dirs and not item[b'path'].startswith(dirs[-1][b'path']):
                 while dirs and not item[b'path'].startswith(dirs[-1][b'path']):
-                    archive.extract_item(dirs.pop(-1))
+                    archive.extract_item(dirs.pop(-1), stdout=stdout)
             self.print_verbose(remove_surrogates(orig_path))
             self.print_verbose(remove_surrogates(orig_path))
             try:
             try:
                 if dry_run:
                 if dry_run:
@@ -217,7 +254,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
                         dirs.append(item)
                         dirs.append(item)
                         archive.extract_item(item, restore_attrs=False)
                         archive.extract_item(item, restore_attrs=False)
                     else:
                     else:
-                        archive.extract_item(item)
+                        archive.extract_item(item, stdout=stdout, sparse=sparse)
             except IOError as e:
             except IOError as e:
                 self.print_error('%s: %s', remove_surrogates(orig_path), e)
                 self.print_error('%s: %s', remove_surrogates(orig_path), e)
 
 
@@ -226,27 +263,51 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
                 archive.extract_item(dirs.pop(-1))
                 archive.extract_item(dirs.pop(-1))
         return self.exit_code
         return self.exit_code
 
 
-    def do_delete(self, args):
-        """Delete an existing archive"""
+    def do_rename(self, args):
+        """Rename an existing archive"""
         repository = self.open_repository(args.archive, exclusive=True)
         repository = self.open_repository(args.archive, exclusive=True)
         manifest, key = Manifest.load(repository)
         manifest, key = Manifest.load(repository)
         cache = Cache(repository, key, manifest)
         cache = Cache(repository, key, manifest)
         archive = Archive(repository, key, manifest, args.archive.archive, cache=cache)
         archive = Archive(repository, key, manifest, args.archive.archive, cache=cache)
-        stats = Statistics()
-        archive.delete(stats)
+        archive.rename(args.name)
         manifest.write()
         manifest.write()
         repository.commit()
         repository.commit()
         cache.commit()
         cache.commit()
-        if args.stats:
-            stats.print_('Deleted data:', cache)
+        return self.exit_code
+
+    def do_delete(self, args):
+        """Delete an existing repository or archive"""
+        repository = self.open_repository(args.target, exclusive=True)
+        manifest, key = Manifest.load(repository)
+        cache = Cache(repository, key, manifest, do_files=args.cache_files)
+        if args.target.archive:
+            archive = Archive(repository, key, manifest, args.target.archive, cache=cache)
+            stats = Statistics()
+            archive.delete(stats)
+            manifest.write()
+            repository.commit()
+            cache.commit()
+            if args.stats:
+                stats.print_('Deleted data:', cache)
+        else:
+            print("You requested to completely DELETE the repository *including* all archives it contains:")
+            for archive_info in manifest.list_archive_infos(sort_by='ts'):
+                print(format_archive(archive_info))
+            while not os.environ.get('BORG_CHECK_I_KNOW_WHAT_I_AM_DOING'):
+                print("""Type "YES" if you understand this and want to continue.\n""")
+                if input('Do you want to continue? ') == 'YES':
+                    break
+            repository.destroy()
+            cache.destroy()
+            print("Repository and corresponding cache were deleted.")
         return self.exit_code
         return self.exit_code
 
 
     def do_mount(self, args):
     def do_mount(self, args):
         """Mount archive or an entire repository as a FUSE fileystem"""
         """Mount archive or an entire repository as a FUSE fileystem"""
         try:
         try:
-            from attic.fuse import AtticOperations
-        except ImportError:
-            self.print_error('the "llfuse" module is required to use this feature')
+            from .fuse import FuseOperations
+        except ImportError as e:
+            self.print_error('loading fuse support failed [ImportError: %s]' % str(e))
             return self.exit_code
             return self.exit_code
 
 
         if not os.path.isdir(args.mountpoint) or not os.access(args.mountpoint, os.R_OK | os.W_OK | os.X_OK):
         if not os.path.isdir(args.mountpoint) or not os.access(args.mountpoint, os.R_OK | os.W_OK | os.X_OK):
@@ -259,7 +320,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
             archive = Archive(repository, key, manifest, args.src.archive)
             archive = Archive(repository, key, manifest, args.src.archive)
         else:
         else:
             archive = None
             archive = None
-        operations = AtticOperations(key, repository, manifest, archive)
+        operations = FuseOperations(key, repository, manifest, archive)
         self.print_verbose("Mounting filesystem")
         self.print_verbose("Mounting filesystem")
         try:
         try:
             operations.mount(args.mountpoint, args.options, args.foreground)
             operations.mount(args.mountpoint, args.options, args.foreground)
@@ -284,7 +345,11 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
                         size = sum(size for _, size, _ in item[b'chunks'])
                         size = sum(size for _, size, _ in item[b'chunks'])
                     except KeyError:
                     except KeyError:
                         pass
                         pass
-                mtime = format_time(datetime.fromtimestamp(bigint_to_int(item[b'mtime']) / 1e9))
+                try:
+                    mtime = datetime.fromtimestamp(bigint_to_int(item[b'mtime']) / 1e9)
+                except ValueError:
+                    # likely a broken mtime and datetime did not want to go beyond year 9999
+                    mtime = datetime(9999, 12, 31, 23, 59, 59)
                 if b'source' in item:
                 if b'source' in item:
                     if type == 'l':
                     if type == 'l':
                         extra = ' -> %s' % item[b'source']
                         extra = ' -> %s' % item[b'source']
@@ -293,19 +358,20 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
                         extra = ' link to %s' % item[b'source']
                         extra = ' link to %s' % item[b'source']
                 else:
                 else:
                     extra = ''
                     extra = ''
-                print('%s%s %-6s %-6s %8d %s %s%s' % (type, mode, item[b'user'] or item[b'uid'],
-                                                  item[b'group'] or item[b'gid'], size, mtime,
-                                                  remove_surrogates(item[b'path']), extra))
+                print('%s%s %-6s %-6s %8d %s %s%s' % (
+                    type, mode, item[b'user'] or item[b'uid'],
+                    item[b'group'] or item[b'gid'], size, format_time(mtime),
+                    remove_surrogates(item[b'path']), extra))
         else:
         else:
-            for archive in sorted(Archive.list_archives(repository, key, manifest), key=attrgetter('ts')):
-                print(format_archive(archive))
+            for archive_info in manifest.list_archive_infos(sort_by='ts'):
+                print(format_archive(archive_info))
         return self.exit_code
         return self.exit_code
 
 
     def do_info(self, args):
     def do_info(self, args):
         """Show archive details such as disk space used"""
         """Show archive details such as disk space used"""
         repository = self.open_repository(args.archive)
         repository = self.open_repository(args.archive)
         manifest, key = Manifest.load(repository)
         manifest, key = Manifest.load(repository)
-        cache = Cache(repository, key, manifest)
+        cache = Cache(repository, key, manifest, do_files=args.cache_files)
         archive = Archive(repository, key, manifest, args.archive.archive, cache=cache)
         archive = Archive(repository, key, manifest, args.archive.archive, cache=cache)
         stats = archive.calc_stats(cache)
         stats = archive.calc_stats(cache)
         print('Name:', archive.name)
         print('Name:', archive.name)
@@ -322,12 +388,11 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
         """Prune repository archives according to specified rules"""
         """Prune repository archives according to specified rules"""
         repository = self.open_repository(args.repository, exclusive=True)
         repository = self.open_repository(args.repository, exclusive=True)
         manifest, key = Manifest.load(repository)
         manifest, key = Manifest.load(repository)
-        cache = Cache(repository, key, manifest)
-        archives = list(sorted(Archive.list_archives(repository, key, manifest, cache),
-                               key=attrgetter('ts'), reverse=True))
+        cache = Cache(repository, key, manifest, do_files=args.cache_files)
+        archives = manifest.list_archive_infos(sort_by='ts', reverse=True)  # just a ArchiveInfo list
         if args.hourly + args.daily + args.weekly + args.monthly + args.yearly == 0 and args.within is None:
         if args.hourly + args.daily + args.weekly + args.monthly + args.yearly == 0 and args.within is None:
-            self.print_error('At least one of the "within", "hourly", "daily", "weekly", "monthly" or "yearly" '
-                             'settings must be specified')
+            self.print_error('At least one of the "within", "keep-hourly", "keep-daily", "keep-weekly", '
+                             '"keep-monthly" or "keep-yearly" settings must be specified')
             return 1
             return 1
         if args.prefix:
         if args.prefix:
             archives = [archive for archive in archives if archive.name.startswith(args.prefix)]
             archives = [archive for archive in archives if archive.name.startswith(args.prefix)]
@@ -355,7 +420,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
                 self.print_verbose('Would prune:     %s' % format_archive(archive))
                 self.print_verbose('Would prune:     %s' % format_archive(archive))
             else:
             else:
                 self.print_verbose('Pruning archive: %s' % format_archive(archive))
                 self.print_verbose('Pruning archive: %s' % format_archive(archive))
-                archive.delete(stats)
+                Archive(repository, key, manifest, archive.name, cache).delete(stats)
         if to_delete and not args.dry_run:
         if to_delete and not args.dry_run:
             manifest.write()
             manifest.write()
             repository.commit()
             repository.commit()
@@ -381,17 +446,17 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
         Examples:
         Examples:
 
 
         # Exclude '/home/user/file.o' but not '/home/user/file.odt':
         # Exclude '/home/user/file.o' but not '/home/user/file.odt':
-        $ attic create -e '*.o' repo.attic /
+        $ borg create -e '*.o' backup /
 
 
         # Exclude '/home/user/junk' and '/home/user/subdir/junk' but
         # Exclude '/home/user/junk' and '/home/user/subdir/junk' but
         # not '/home/user/importantjunk' or '/etc/junk':
         # not '/home/user/importantjunk' or '/etc/junk':
-        $ attic create -e '/home/*/junk' repo.attic /
+        $ borg create -e '/home/*/junk' backup /
 
 
         # Exclude the contents of '/home/user/cache' but not the directory itself:
         # Exclude the contents of '/home/user/cache' but not the directory itself:
-        $ attic create -e /home/user/cache/ repo.attic /
+        $ borg create -e /home/user/cache/ backup /
 
 
         # The file '/home/user/cache/important' is *not* backed up:
         # The file '/home/user/cache/important' is *not* backed up:
-        $ attic create -e /home/user/cache/ repo.attic / /home/user/cache/important
+        $ borg create -e /home/user/cache/ backup / /home/user/cache/important
         '''
         '''
 
 
     def do_help(self, parser, commands, args):
     def do_help(self, parser, commands, args):
@@ -420,7 +485,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
             ('--yearly', '--keep-yearly', 'Warning: "--yearly" has been deprecated. Use "--keep-yearly" instead.')
             ('--yearly', '--keep-yearly', 'Warning: "--yearly" has been deprecated. Use "--keep-yearly" instead.')
         ]
         ]
         if args and args[0] == 'verify':
         if args and args[0] == 'verify':
-            print('Warning: "attic verify" has been deprecated. Use "attic extract --dry-run" instead.')
+            print('Warning: "borg verify" has been deprecated. Use "borg extract --dry-run" instead.')
             args = ['extract', '--dry-run'] + args[1:]
             args = ['extract', '--dry-run'] + args[1:]
         for i, arg in enumerate(args[:]):
         for i, arg in enumerate(args[:]):
             for old_name, new_name, warning in deprecations:
             for old_name, new_name, warning in deprecations:
@@ -442,24 +507,34 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
             with open(os.path.join(cache_dir, 'CACHEDIR.TAG'), 'w') as fd:
             with open(os.path.join(cache_dir, 'CACHEDIR.TAG'), 'w') as fd:
                 fd.write(textwrap.dedent("""
                 fd.write(textwrap.dedent("""
                     Signature: 8a477f597d28d172789f06886806bc55
                     Signature: 8a477f597d28d172789f06886806bc55
-                    # This file is a cache directory tag created by Attic.
+                    # This file is a cache directory tag created by Borg.
                     # For information about cache directory tags, see:
                     # For information about cache directory tags, see:
                     #       http://www.brynosaurus.com/cachedir/
                     #       http://www.brynosaurus.com/cachedir/
                     """).lstrip())
                     """).lstrip())
         common_parser = argparse.ArgumentParser(add_help=False)
         common_parser = argparse.ArgumentParser(add_help=False)
         common_parser.add_argument('-v', '--verbose', dest='verbose', action='store_true',
         common_parser.add_argument('-v', '--verbose', dest='verbose', action='store_true',
-                            default=False,
-                            help='verbose output')
+                                   default=False,
+                                   help='verbose output')
+        common_parser.add_argument('--no-files-cache', dest='cache_files', action='store_false',
+                                   help='do not load/update the file metadata cache used to detect unchanged files')
+        common_parser.add_argument('--umask', dest='umask', type=lambda s: int(s, 8), default=0o077, metavar='M',
+                                   help='set umask to M (local and remote, default: 0o077)')
+        common_parser.add_argument('--remote-path', dest='remote_path', default='borg', metavar='PATH',
+                                   help='set remote path to executable (default: "borg")')
 
 
         # We can't use argparse for "serve" since we don't want it to show up in "Available commands"
         # We can't use argparse for "serve" since we don't want it to show up in "Available commands"
         if args:
         if args:
             args = self.preprocess_args(args)
             args = self.preprocess_args(args)
 
 
-        parser = argparse.ArgumentParser(description='Attic %s - Deduplicated Backups' % __version__)
+        parser = argparse.ArgumentParser(description='Borg %s - Deduplicated Backups' % __version__)
         subparsers = parser.add_subparsers(title='Available commands')
         subparsers = parser.add_subparsers(title='Available commands')
 
 
+        serve_epilog = textwrap.dedent("""
+        This command starts a repository server process. This command is usually not used manually.
+        """)
         subparser = subparsers.add_parser('serve', parents=[common_parser],
         subparser = subparsers.add_parser('serve', parents=[common_parser],
-                                          description=self.do_serve.__doc__)
+                                          description=self.do_serve.__doc__, epilog=serve_epilog,
+                                          formatter_class=argparse.RawDescriptionHelpFormatter)
         subparser.set_defaults(func=self.do_serve)
         subparser.set_defaults(func=self.do_serve)
         subparser.add_argument('--restrict-to-path', dest='restrict_to_paths', action='append',
         subparser.add_argument('--restrict-to-path', dest='restrict_to_paths', action='append',
                                metavar='PATH', help='restrict repository access to PATH')
                                metavar='PATH', help='restrict repository access to PATH')
@@ -467,6 +542,8 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
         This command initializes an empty repository. A repository is a filesystem
         This command initializes an empty repository. A repository is a filesystem
         directory containing the deduplicated data from zero or more archives.
         directory containing the deduplicated data from zero or more archives.
         Encryption can be enabled at repository init time.
         Encryption can be enabled at repository init time.
+        Please note that the 'passphrase' encryption mode is DEPRECATED (instead of it,
+        consider using 'repokey').
         """)
         """)
         subparser = subparsers.add_parser('init', parents=[common_parser],
         subparser = subparsers.add_parser('init', parents=[common_parser],
                                           description=self.do_init.__doc__, epilog=init_epilog,
                                           description=self.do_init.__doc__, epilog=init_epilog,
@@ -476,27 +553,51 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
                                type=location_validator(archive=False),
                                type=location_validator(archive=False),
                                help='repository to create')
                                help='repository to create')
         subparser.add_argument('-e', '--encryption', dest='encryption',
         subparser.add_argument('-e', '--encryption', dest='encryption',
-                               choices=('none', 'passphrase', 'keyfile'), default='none',
-                               help='select encryption method')
+                               choices=('none', 'keyfile', 'repokey', 'passphrase'), default='none',
+                               help='select encryption key mode')
 
 
         check_epilog = textwrap.dedent("""
         check_epilog = textwrap.dedent("""
-        The check command verifies the consistency of a repository and the corresponding
-        archives. The underlying repository data files are first checked to detect bit rot
-        and other types of damage. After that the consistency and correctness of the archive
-        metadata is verified.
-
-        The archive metadata checks can be time consuming and requires access to the key
-        file and/or passphrase if encryption is enabled. These checks can be skipped using
-        the --repository-only option.
+        The check command verifies the consistency of a repository and the corresponding archives.
+
+        First, the underlying repository data files are checked:
+        - For all segments the segment magic (header) is checked
+        - For all objects stored in the segments, all metadata (e.g. crc and size) and
+          all data is read. The read data is checked by size and CRC. Bit rot and other
+          types of accidental damage can be detected this way.
+        - If we are in repair mode and a integrity error is detected for a segment,
+          we try to recover as many objects from the segment as possible.
+        - In repair mode, it makes sure that the index is consistent with the data
+          stored in the segments.
+        - If you use a remote repo server via ssh:, the repo check is executed on the
+          repo server without causing significant network traffic.
+        - The repository check can be skipped using the --archives-only option.
+
+        Second, the consistency and correctness of the archive metadata is verified:
+        - Is the repo manifest present? If not, it is rebuilt from archive metadata
+          chunks (this requires reading and decrypting of all metadata and data).
+        - Check if archive metadata chunk is present. if not, remove archive from
+          manifest.
+        - For all files (items) in the archive, for all chunks referenced by these
+          files, check if chunk is present (if not and we are in repair mode, replace
+          it with a same-size chunk of zeros). This requires reading of archive and
+          file metadata, but not data.
+        - If we are in repair mode and we checked all the archives: delete orphaned
+          chunks from the repo.
+        - if you use a remote repo server via ssh:, the archive check is executed on
+          the client machine (because if encryption is enabled, the checks will require
+          decryption and this is always done client-side, because key access will be
+          required).
+        - The archive checks can be time consuming, they can be skipped using the
+          --repository-only option.
         """)
         """)
         subparser = subparsers.add_parser('check', parents=[common_parser],
         subparser = subparsers.add_parser('check', parents=[common_parser],
                                           description=self.do_check.__doc__,
                                           description=self.do_check.__doc__,
                                           epilog=check_epilog,
                                           epilog=check_epilog,
                                           formatter_class=argparse.RawDescriptionHelpFormatter)
                                           formatter_class=argparse.RawDescriptionHelpFormatter)
         subparser.set_defaults(func=self.do_check)
         subparser.set_defaults(func=self.do_check)
-        subparser.add_argument('repository', metavar='REPOSITORY',
-                               type=location_validator(archive=False),
-                               help='repository to check consistency of')
+        subparser.add_argument('repository', metavar='REPOSITORY_OR_ARCHIVE',
+                               type=location_validator(),
+                               help='repository or archive to check consistency of')
         subparser.add_argument('--repository-only', dest='repo_only', action='store_true',
         subparser.add_argument('--repository-only', dest='repo_only', action='store_true',
                                default=False,
                                default=False,
                                help='only perform repository checks')
                                help='only perform repository checks')
@@ -506,6 +607,9 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
         subparser.add_argument('--repair', dest='repair', action='store_true',
         subparser.add_argument('--repair', dest='repair', action='store_true',
                                default=False,
                                default=False,
                                help='attempt to repair any inconsistencies found')
                                help='attempt to repair any inconsistencies found')
+        subparser.add_argument('--last', dest='last',
+                               type=int, default=None, metavar='N',
+                               help='only check last N archives (Default: all)')
 
 
         change_passphrase_epilog = textwrap.dedent("""
         change_passphrase_epilog = textwrap.dedent("""
         The key files used for repository encryption are optionally passphrase
         The key files used for repository encryption are optionally passphrase
@@ -524,7 +628,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
         traversing all paths specified. The archive will consume almost no disk space for
         traversing all paths specified. The archive will consume almost no disk space for
         files or parts of files that have already been stored in other archives.
         files or parts of files that have already been stored in other archives.
 
 
-        See "attic help patterns" for more help on exclude patterns.
+        See the output of the "borg help patterns" command for more help on exclude patterns.
         """)
         """)
 
 
         subparser = subparsers.add_parser('create', parents=[common_parser],
         subparser = subparsers.add_parser('create', parents=[common_parser],
@@ -535,6 +639,9 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
         subparser.add_argument('-s', '--stats', dest='stats',
         subparser.add_argument('-s', '--stats', dest='stats',
                                action='store_true', default=False,
                                action='store_true', default=False,
                                help='print statistics for the created archive')
                                help='print statistics for the created archive')
+        subparser.add_argument('-p', '--progress', dest='progress',
+                               action='store_true', default=False,
+                               help='print progress while creating the archive')
         subparser.add_argument('-e', '--exclude', dest='excludes',
         subparser.add_argument('-e', '--exclude', dest='excludes',
                                type=ExcludePattern, action='append',
                                type=ExcludePattern, action='append',
                                metavar="PATTERN", help='exclude paths matching PATTERN')
                                metavar="PATTERN", help='exclude paths matching PATTERN')
@@ -553,6 +660,19 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
         subparser.add_argument('--numeric-owner', dest='numeric_owner',
         subparser.add_argument('--numeric-owner', dest='numeric_owner',
                                action='store_true', default=False,
                                action='store_true', default=False,
                                help='only store numeric user and group identifiers')
                                help='only store numeric user and group identifiers')
+        subparser.add_argument('--timestamp', dest='timestamp',
+                               type=timestamp, default=None,
+                               metavar='yyyy-mm-ddThh:mm:ss',
+                               help='manually specify the archive creation date/time (UTC). '
+                                    'alternatively, give a reference file/directory.')
+        subparser.add_argument('--chunker-params', dest='chunker_params',
+                               type=ChunkerParams, default=CHUNKER_PARAMS,
+                               metavar='CHUNK_MIN_EXP,CHUNK_MAX_EXP,HASH_MASK_BITS,HASH_WINDOW_SIZE',
+                               help='specify the chunker parameters. default: %d,%d,%d,%d' % CHUNKER_PARAMS)
+        subparser.add_argument('-C', '--compression', dest='compression',
+                               type=int, default=0, metavar='N',
+                               help='select compression algorithm and level. 0..9 is supported and means zlib '
+                                    'level 0 (no compression, fast, default) .. zlib level 9 (high compression, slow).')
         subparser.add_argument('archive', metavar='ARCHIVE',
         subparser.add_argument('archive', metavar='ARCHIVE',
                                type=location_validator(archive=True),
                                type=location_validator(archive=True),
                                help='archive to create')
                                help='archive to create')
@@ -565,7 +685,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
         by passing a list of ``PATHs`` as arguments. The file selection can further
         by passing a list of ``PATHs`` as arguments. The file selection can further
         be restricted by using the ``--exclude`` option.
         be restricted by using the ``--exclude`` option.
 
 
-        See "attic help patterns" for more help on exclude patterns.
+        See the output of the "borg help patterns" command for more help on exclude patterns.
         """)
         """)
         subparser = subparsers.add_parser('extract', parents=[common_parser],
         subparser = subparsers.add_parser('extract', parents=[common_parser],
                                           description=self.do_extract.__doc__,
                                           description=self.do_extract.__doc__,
@@ -587,15 +707,36 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
         subparser.add_argument('--strip-components', dest='strip_components',
         subparser.add_argument('--strip-components', dest='strip_components',
                                type=int, default=0, metavar='NUMBER',
                                type=int, default=0, metavar='NUMBER',
                                help='Remove the specified number of leading path elements. Pathnames with fewer elements will be silently skipped.')
                                help='Remove the specified number of leading path elements. Pathnames with fewer elements will be silently skipped.')
+        subparser.add_argument('--stdout', dest='stdout',
+                               action='store_true', default=False,
+                               help='write all extracted data to stdout')
+        subparser.add_argument('--sparse', dest='sparse',
+                               action='store_true', default=False,
+                               help='create holes in output sparse file from all-zero chunks')
         subparser.add_argument('archive', metavar='ARCHIVE',
         subparser.add_argument('archive', metavar='ARCHIVE',
                                type=location_validator(archive=True),
                                type=location_validator(archive=True),
                                help='archive to extract')
                                help='archive to extract')
         subparser.add_argument('paths', metavar='PATH', nargs='*', type=str,
         subparser.add_argument('paths', metavar='PATH', nargs='*', type=str,
                                help='paths to extract')
                                help='paths to extract')
 
 
+        rename_epilog = textwrap.dedent("""
+        This command renames an archive in the repository.
+        """)
+        subparser = subparsers.add_parser('rename', parents=[common_parser],
+                                          description=self.do_rename.__doc__,
+                                          epilog=rename_epilog,
+                                          formatter_class=argparse.RawDescriptionHelpFormatter)
+        subparser.set_defaults(func=self.do_rename)
+        subparser.add_argument('archive', metavar='ARCHIVE',
+                               type=location_validator(archive=True),
+                               help='archive to rename')
+        subparser.add_argument('name', metavar='NEWNAME', type=str,
+                               help='the new archive name to use')
+
         delete_epilog = textwrap.dedent("""
         delete_epilog = textwrap.dedent("""
-        This command deletes an archive from the repository. Any disk space not
-        shared with any other existing archive is also reclaimed.
+        This command deletes an archive from the repository or the complete repository.
+        Disk space is reclaimed accordingly. If you delete the complete repository, the
+        local cache for it (if any) is also deleted.
         """)
         """)
         subparser = subparsers.add_parser('delete', parents=[common_parser],
         subparser = subparsers.add_parser('delete', parents=[common_parser],
                                           description=self.do_delete.__doc__,
                                           description=self.do_delete.__doc__,
@@ -605,9 +746,9 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
         subparser.add_argument('-s', '--stats', dest='stats',
         subparser.add_argument('-s', '--stats', dest='stats',
                                action='store_true', default=False,
                                action='store_true', default=False,
                                help='print statistics for the deleted archive')
                                help='print statistics for the deleted archive')
-        subparser.add_argument('archive', metavar='ARCHIVE',
-                               type=location_validator(archive=True),
-                               help='archive to delete')
+        subparser.add_argument('target', metavar='TARGET',
+                               type=location_validator(),
+                               help='archive or repository to delete')
 
 
         list_epilog = textwrap.dedent("""
         list_epilog = textwrap.dedent("""
         This command lists the contents of a repository or an archive.
         This command lists the contents of a repository or an archive.
@@ -716,27 +857,69 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
 
 
         args = parser.parse_args(args or ['-h'])
         args = parser.parse_args(args or ['-h'])
         self.verbose = args.verbose
         self.verbose = args.verbose
+        os.umask(args.umask)
+        RemoteRepository.remote_path = args.remote_path
+        RemoteRepository.umask = args.umask
         update_excludes(args)
         update_excludes(args)
         return args.func(args)
         return args.func(args)
 
 
 
 
-def main():
+def sig_info_handler(signum, stack):  # pragma: no cover
+    """search the stack for infos about the currently processed file and print them"""
+    for frame in inspect.getouterframes(stack):
+        func, loc = frame[3], frame[0].f_locals
+        if func in ('process_file', '_process', ):  # create op
+            path = loc['path']
+            try:
+                pos = loc['fd'].tell()
+                total = loc['st'].st_size
+            except Exception:
+                pos, total = 0, 0
+            print("{0} {1}/{2}".format(path, format_file_size(pos), format_file_size(total)))
+            break
+        if func in ('extract_item', ):  # extract op
+            path = loc['item'][b'path']
+            try:
+                pos = loc['fd'].tell()
+            except Exception:
+                pos = 0
+            print("{0} {1}/???".format(path, format_file_size(pos)))
+            break
+
+
+def setup_signal_handlers():  # pragma: no cover
+    sigs = []
+    if hasattr(signal, 'SIGUSR1'):
+        sigs.append(signal.SIGUSR1)  # kill -USR1 pid
+    if hasattr(signal, 'SIGINFO'):
+        sigs.append(signal.SIGINFO)  # kill -INFO pid (or ctrl-t)
+    for sig in sigs:
+        signal.signal(sig, sig_info_handler)
+
+
+def main():  # pragma: no cover
     # Make sure stdout and stderr have errors='replace') to avoid unicode
     # Make sure stdout and stderr have errors='replace') to avoid unicode
     # issues when print()-ing unicode file names
     # issues when print()-ing unicode file names
     sys.stdout = io.TextIOWrapper(sys.stdout.buffer, sys.stdout.encoding, 'replace', line_buffering=True)
     sys.stdout = io.TextIOWrapper(sys.stdout.buffer, sys.stdout.encoding, 'replace', line_buffering=True)
     sys.stderr = io.TextIOWrapper(sys.stderr.buffer, sys.stderr.encoding, 'replace', line_buffering=True)
     sys.stderr = io.TextIOWrapper(sys.stderr.buffer, sys.stderr.encoding, 'replace', line_buffering=True)
+    setup_signal_handlers()
     archiver = Archiver()
     archiver = Archiver()
     try:
     try:
         exit_code = archiver.run(sys.argv[1:])
         exit_code = archiver.run(sys.argv[1:])
     except Error as e:
     except Error as e:
-        archiver.print_error(e.get_message())
+        archiver.print_error(e.get_message() + "\n%s" % traceback.format_exc())
         exit_code = e.exit_code
         exit_code = e.exit_code
+    except RemoteRepository.RPCError as e:
+        archiver.print_error('Error: Remote Exception.\n%s' % str(e))
+        exit_code = 1
+    except Exception:
+        archiver.print_error('Error: Local Exception.\n%s' % traceback.format_exc())
+        exit_code = 1
     except KeyboardInterrupt:
     except KeyboardInterrupt:
-        archiver.print_error('Error: Keyboard interrupt')
+        archiver.print_error('Error: Keyboard interrupt.\n%s' % traceback.format_exc())
         exit_code = 1
         exit_code = 1
-    else:
-        if exit_code:
-            archiver.print_error('Exiting with failure status due to previous errors')
+    if exit_code:
+        archiver.print_error('Exiting with failure status due to previous errors')
     sys.exit(exit_code)
     sys.exit(exit_code)
 
 
 if __name__ == '__main__':
 if __name__ == '__main__':

+ 161 - 31
attic/cache.py → borg/cache.py

@@ -1,18 +1,22 @@
 from configparser import RawConfigParser
 from configparser import RawConfigParser
-from attic.remote import cache_if_remote
+from .remote import cache_if_remote
+import errno
 import msgpack
 import msgpack
 import os
 import os
 import sys
 import sys
 from binascii import hexlify
 from binascii import hexlify
 import shutil
 import shutil
+import tarfile
+import tempfile
 
 
 from .key import PlaintextKey
 from .key import PlaintextKey
-from .helpers import Error, get_cache_dir, decode_dict, st_mtime_ns, unhexlify, UpgradableLock, int_to_bigint, \
+from .helpers import Error, get_cache_dir, decode_dict, st_mtime_ns, unhexlify, int_to_bigint, \
     bigint_to_int
     bigint_to_int
+from .locking import UpgradableLock
 from .hashindex import ChunkIndex
 from .hashindex import ChunkIndex
 
 
 
 
-class Cache(object):
+class Cache:
     """Client Side cache
     """Client Side cache
     """
     """
     class RepositoryReplay(Error):
     class RepositoryReplay(Error):
@@ -28,26 +32,28 @@ class Cache(object):
         """Repository encryption method changed since last acccess, refusing to continue
         """Repository encryption method changed since last acccess, refusing to continue
         """
         """
 
 
-    def __init__(self, repository, key, manifest, path=None, sync=True, warn_if_unencrypted=True):
+    def __init__(self, repository, key, manifest, path=None, sync=True, do_files=False, warn_if_unencrypted=True):
         self.lock = None
         self.lock = None
         self.timestamp = None
         self.timestamp = None
+        self.lock = None
         self.txn_active = False
         self.txn_active = False
         self.repository = repository
         self.repository = repository
         self.key = key
         self.key = key
         self.manifest = manifest
         self.manifest = manifest
         self.path = path or os.path.join(get_cache_dir(), hexlify(repository.id).decode('ascii'))
         self.path = path or os.path.join(get_cache_dir(), hexlify(repository.id).decode('ascii'))
+        self.do_files = do_files
         # Warn user before sending data to a never seen before unencrypted repository
         # Warn user before sending data to a never seen before unencrypted repository
         if not os.path.exists(self.path):
         if not os.path.exists(self.path):
             if warn_if_unencrypted and isinstance(key, PlaintextKey):
             if warn_if_unencrypted and isinstance(key, PlaintextKey):
                 if not self._confirm('Warning: Attempting to access a previously unknown unencrypted repository',
                 if not self._confirm('Warning: Attempting to access a previously unknown unencrypted repository',
-                                     'ATTIC_UNKNOWN_UNENCRYPTED_REPO_ACCESS_IS_OK'):
+                                     'BORG_UNKNOWN_UNENCRYPTED_REPO_ACCESS_IS_OK'):
                     raise self.CacheInitAbortedError()
                     raise self.CacheInitAbortedError()
             self.create()
             self.create()
         self.open()
         self.open()
         # Warn user before sending data to a relocated repository
         # Warn user before sending data to a relocated repository
         if self.previous_location and self.previous_location != repository._location.canonical_path():
         if self.previous_location and self.previous_location != repository._location.canonical_path():
             msg = 'Warning: The repository at location {} was previously located at {}'.format(repository._location.canonical_path(), self.previous_location)
             msg = 'Warning: The repository at location {} was previously located at {}'.format(repository._location.canonical_path(), self.previous_location)
-            if not self._confirm(msg, 'ATTIC_RELOCATED_REPO_ACCESS_IS_OK'):
+            if not self._confirm(msg, 'BORG_RELOCATED_REPO_ACCESS_IS_OK'):
                 raise self.RepositoryAccessAborted()
                 raise self.RepositoryAccessAborted()
 
 
         if sync and self.manifest.id != self.manifest_id:
         if sync and self.manifest.id != self.manifest_id:
@@ -77,11 +83,11 @@ class Cache(object):
         return answer and answer in 'Yy'
         return answer and answer in 'Yy'
 
 
     def create(self):
     def create(self):
-        """Create a new empty cache at `path`
+        """Create a new empty cache at `self.path`
         """
         """
         os.makedirs(self.path)
         os.makedirs(self.path)
         with open(os.path.join(self.path, 'README'), 'w') as fd:
         with open(os.path.join(self.path, 'README'), 'w') as fd:
-            fd.write('This is an Attic cache')
+            fd.write('This is a Borg cache')
         config = RawConfigParser()
         config = RawConfigParser()
         config.add_section('cache')
         config.add_section('cache')
         config.set('cache', 'version', '1')
         config.set('cache', 'version', '1')
@@ -90,14 +96,23 @@ class Cache(object):
         with open(os.path.join(self.path, 'config'), 'w') as fd:
         with open(os.path.join(self.path, 'config'), 'w') as fd:
             config.write(fd)
             config.write(fd)
         ChunkIndex().write(os.path.join(self.path, 'chunks').encode('utf-8'))
         ChunkIndex().write(os.path.join(self.path, 'chunks').encode('utf-8'))
-        with open(os.path.join(self.path, 'files'), 'w') as fd:
+        with open(os.path.join(self.path, 'chunks.archive'), 'wb') as fd:
+            pass  # empty file
+        with open(os.path.join(self.path, 'files'), 'wb') as fd:
             pass  # empty file
             pass  # empty file
 
 
+    def destroy(self):
+        """destroy the cache at `self.path`
+        """
+        self.close()
+        os.remove(os.path.join(self.path, 'config'))  # kill config first
+        shutil.rmtree(self.path)
+
     def _do_open(self):
     def _do_open(self):
         self.config = RawConfigParser()
         self.config = RawConfigParser()
         self.config.read(os.path.join(self.path, 'config'))
         self.config.read(os.path.join(self.path, 'config'))
         if self.config.getint('cache', 'version') != 1:
         if self.config.getint('cache', 'version') != 1:
-            raise Exception('%s Does not look like an Attic cache')
+            raise Exception('%s Does not look like a Borg cache')
         self.id = self.config.get('cache', 'repository')
         self.id = self.config.get('cache', 'repository')
         self.manifest_id = unhexlify(self.config.get('cache', 'manifest'))
         self.manifest_id = unhexlify(self.config.get('cache', 'manifest'))
         self.timestamp = self.config.get('cache', 'timestamp', fallback=None)
         self.timestamp = self.config.get('cache', 'timestamp', fallback=None)
@@ -108,8 +123,8 @@ class Cache(object):
 
 
     def open(self):
     def open(self):
         if not os.path.isdir(self.path):
         if not os.path.isdir(self.path):
-            raise Exception('%s Does not look like an Attic cache' % self.path)
-        self.lock = UpgradableLock(os.path.join(self.path, 'config'), exclusive=True)
+            raise Exception('%s Does not look like a Borg cache' % self.path)
+        self.lock = UpgradableLock(os.path.join(self.path, 'lock'), exclusive=True).acquire()
         self.rollback()
         self.rollback()
 
 
     def close(self):
     def close(self):
@@ -129,6 +144,7 @@ class Cache(object):
                 u.feed(data)
                 u.feed(data)
                 for path_hash, item in u:
                 for path_hash, item in u:
                     item[0] += 1
                     item[0] += 1
+                    # in the end, this takes about 240 Bytes per file
                     self.files[path_hash] = msgpack.packb(item)
                     self.files[path_hash] = msgpack.packb(item)
 
 
     def begin_txn(self):
     def begin_txn(self):
@@ -137,6 +153,7 @@ class Cache(object):
         os.mkdir(txn_dir)
         os.mkdir(txn_dir)
         shutil.copy(os.path.join(self.path, 'config'), txn_dir)
         shutil.copy(os.path.join(self.path, 'config'), txn_dir)
         shutil.copy(os.path.join(self.path, 'chunks'), txn_dir)
         shutil.copy(os.path.join(self.path, 'chunks'), txn_dir)
+        shutil.copy(os.path.join(self.path, 'chunks.archive'), txn_dir)
         shutil.copy(os.path.join(self.path, 'files'), txn_dir)
         shutil.copy(os.path.join(self.path, 'files'), txn_dir)
         os.rename(os.path.join(self.path, 'txn.tmp'),
         os.rename(os.path.join(self.path, 'txn.tmp'),
                   os.path.join(self.path, 'txn.active'))
                   os.path.join(self.path, 'txn.active'))
@@ -178,6 +195,7 @@ class Cache(object):
         if os.path.exists(txn_dir):
         if os.path.exists(txn_dir):
             shutil.copy(os.path.join(txn_dir, 'config'), self.path)
             shutil.copy(os.path.join(txn_dir, 'config'), self.path)
             shutil.copy(os.path.join(txn_dir, 'chunks'), self.path)
             shutil.copy(os.path.join(txn_dir, 'chunks'), self.path)
+            shutil.copy(os.path.join(txn_dir, 'chunks.archive'), self.path)
             shutil.copy(os.path.join(txn_dir, 'files'), self.path)
             shutil.copy(os.path.join(txn_dir, 'files'), self.path)
             os.rename(txn_dir, os.path.join(self.path, 'txn.tmp'))
             os.rename(txn_dir, os.path.join(self.path, 'txn.tmp'))
             if os.path.exists(os.path.join(self.path, 'txn.tmp')):
             if os.path.exists(os.path.join(self.path, 'txn.tmp')):
@@ -186,37 +204,145 @@ class Cache(object):
         self._do_open()
         self._do_open()
 
 
     def sync(self):
     def sync(self):
-        """Initializes cache by fetching and reading all archive indicies
+        """Re-synchronize chunks cache with repository.
+
+        If present, uses a compressed tar archive of known backup archive
+        indices, so it only needs to fetch infos from repo and build a chunk
+        index once per backup archive.
+        If out of sync, the tar gets rebuilt from known + fetched chunk infos,
+        so it has complete and current information about all backup archives.
+        Finally, it builds the master chunks index by merging all indices from
+        the tar.
+
+        Note: compression (esp. xz) is very effective in keeping the tar
+              relatively small compared to the files it contains.
         """
         """
-        def add(id, size, csize):
+        in_archive_path = os.path.join(self.path, 'chunks.archive')
+        out_archive_path = os.path.join(self.path, 'chunks.archive.tmp')
+
+        def open_in_archive():
             try:
             try:
-                count, size, csize = self.chunks[id]
-                self.chunks[id] = count + 1, size, csize
+                tf = tarfile.open(in_archive_path, 'r')
+            except OSError as e:
+                if e.errno != errno.ENOENT:
+                    raise
+                # file not found
+                tf = None
+            except tarfile.ReadError:
+                # empty file?
+                tf = None
+            return tf
+
+        def open_out_archive():
+            for compression in ('xz', 'bz2', 'gz'):
+                # xz needs py 3.3, bz2 and gz also work on 3.2
+                try:
+                    tf = tarfile.open(out_archive_path, 'w:'+compression, format=tarfile.PAX_FORMAT)
+                    break
+                except tarfile.CompressionError:
+                    continue
+            else:  # shouldn't happen
+                tf = None
+            return tf
+
+        def close_archive(tf):
+            if tf:
+                tf.close()
+
+        def delete_in_archive():
+            os.unlink(in_archive_path)
+
+        def rename_out_archive():
+            os.rename(out_archive_path, in_archive_path)
+
+        def add(chunk_idx, id, size, csize, incr=1):
+            try:
+                count, size, csize = chunk_idx[id]
+                chunk_idx[id] = count + incr, size, csize
             except KeyError:
             except KeyError:
-                self.chunks[id] = 1, size, csize
-        self.begin_txn()
-        print('Initializing cache...')
-        self.chunks.clear()
-        unpacker = msgpack.Unpacker()
-        repository = cache_if_remote(self.repository)
-        for name, info in self.manifest.archives.items():
-            archive_id = info[b'id']
+                chunk_idx[id] = incr, size, csize
+
+        def transfer_known_idx(archive_id, tf_in, tf_out):
+            archive_id_hex = hexlify(archive_id).decode('ascii')
+            tarinfo = tf_in.getmember(archive_id_hex)
+            archive_name = tarinfo.pax_headers['archive_name']
+            print('Already known archive:', archive_name)
+            f_in = tf_in.extractfile(archive_id_hex)
+            tf_out.addfile(tarinfo, f_in)
+            return archive_name
+
+        def fetch_and_build_idx(archive_id, repository, key, tmp_dir, tf_out):
+            chunk_idx = ChunkIndex()
             cdata = repository.get(archive_id)
             cdata = repository.get(archive_id)
-            data = self.key.decrypt(archive_id, cdata)
-            add(archive_id, len(data), len(cdata))
+            data = key.decrypt(archive_id, cdata)
+            add(chunk_idx, archive_id, len(data), len(cdata))
             archive = msgpack.unpackb(data)
             archive = msgpack.unpackb(data)
             if archive[b'version'] != 1:
             if archive[b'version'] != 1:
                 raise Exception('Unknown archive metadata version')
                 raise Exception('Unknown archive metadata version')
             decode_dict(archive, (b'name',))
             decode_dict(archive, (b'name',))
-            print('Analyzing archive:', archive[b'name'])
-            for key, chunk in zip(archive[b'items'], repository.get_many(archive[b'items'])):
-                data = self.key.decrypt(key, chunk)
-                add(key, len(data), len(chunk))
+            print('Analyzing new archive:', archive[b'name'])
+            unpacker = msgpack.Unpacker()
+            for item_id, chunk in zip(archive[b'items'], repository.get_many(archive[b'items'])):
+                data = key.decrypt(item_id, chunk)
+                add(chunk_idx, item_id, len(data), len(chunk))
                 unpacker.feed(data)
                 unpacker.feed(data)
                 for item in unpacker:
                 for item in unpacker:
+                    if not isinstance(item, dict):
+                        print('Error: Did not get expected metadata dict - archive corrupted!')
+                        continue
                     if b'chunks' in item:
                     if b'chunks' in item:
                         for chunk_id, size, csize in item[b'chunks']:
                         for chunk_id, size, csize in item[b'chunks']:
-                            add(chunk_id, size, csize)
+                            add(chunk_idx, chunk_id, size, csize)
+            archive_id_hex = hexlify(archive_id).decode('ascii')
+            file_tmp = os.path.join(tmp_dir, archive_id_hex).encode('utf-8')
+            chunk_idx.write(file_tmp)
+            tarinfo = tf_out.gettarinfo(file_tmp, archive_id_hex)
+            tarinfo.pax_headers['archive_name'] = archive[b'name']
+            with open(file_tmp, 'rb') as f:
+                tf_out.addfile(tarinfo, f)
+            os.unlink(file_tmp)
+
+        def create_master_idx(chunk_idx, tf_in, tmp_dir):
+            chunk_idx.clear()
+            for tarinfo in tf_in:
+                archive_id_hex = tarinfo.name
+                archive_name = tarinfo.pax_headers['archive_name']
+                print("- extracting archive %s ..." % archive_name)
+                tf_in.extract(archive_id_hex, tmp_dir)
+                chunk_idx_path = os.path.join(tmp_dir, archive_id_hex).encode('utf-8')
+                print("- reading archive ...")
+                archive_chunk_idx = ChunkIndex.read(chunk_idx_path)
+                print("- merging archive ...")
+                chunk_idx.merge(archive_chunk_idx)
+                os.unlink(chunk_idx_path)
+
+        self.begin_txn()
+        print('Synchronizing chunks cache...')
+        # XXX we have to do stuff on disk due to lacking ChunkIndex api
+        with tempfile.TemporaryDirectory(prefix='borg-tmp') as tmp_dir:
+            repository = cache_if_remote(self.repository)
+            out_archive = open_out_archive()
+            in_archive = open_in_archive()
+            if in_archive:
+                known_ids = set(unhexlify(hexid) for hexid in in_archive.getnames())
+            else:
+                known_ids = set()
+            archive_ids = set(info[b'id'] for info in self.manifest.archives.values())
+            print('Rebuilding archive collection. Known: %d Repo: %d Unknown: %d' % (
+                len(known_ids), len(archive_ids), len(archive_ids - known_ids), ))
+            for archive_id in archive_ids & known_ids:
+                transfer_known_idx(archive_id, in_archive, out_archive)
+            close_archive(in_archive)
+            delete_in_archive()  # free disk space
+            for archive_id in archive_ids - known_ids:
+                fetch_and_build_idx(archive_id, repository, self.key, tmp_dir, out_archive)
+            close_archive(out_archive)
+            rename_out_archive()
+            print('Merging collection into master chunks cache...')
+            in_archive = open_in_archive()
+            create_master_idx(self.chunks, in_archive, tmp_dir)
+            close_archive(in_archive)
+            print('Done.')
 
 
     def add_chunk(self, id, data, stats):
     def add_chunk(self, id, data, stats):
         if not self.txn_active:
         if not self.txn_active:
@@ -255,6 +381,8 @@ class Cache(object):
             stats.update(-size, -csize, False)
             stats.update(-size, -csize, False)
 
 
     def file_known_and_unchanged(self, path_hash, st):
     def file_known_and_unchanged(self, path_hash, st):
+        if not self.do_files:
+            return None
         if self.files is None:
         if self.files is None:
             self._read_files()
             self._read_files()
         entry = self.files.get(path_hash)
         entry = self.files.get(path_hash)
@@ -270,6 +398,8 @@ class Cache(object):
             return None
             return None
 
 
     def memorize_file(self, path_hash, st, ids):
     def memorize_file(self, path_hash, st, ids):
+        if not self.do_files:
+            return
         # Entry: Age, inode, size, mtime, chunk ids
         # Entry: Age, inode, size, mtime, chunk ids
         mtime_ns = st_mtime_ns(st)
         mtime_ns = st_mtime_ns(st)
         self.files[path_hash] = msgpack.packb((0, st.st_ino, st.st_size, int_to_bigint(mtime_ns), ids))
         self.files[path_hash] = msgpack.packb((0, st.st_ino, st.st_size, int_to_bigint(mtime_ns), ids))

+ 17 - 7
attic/chunker.pyx → borg/chunker.pyx

@@ -8,8 +8,8 @@ cdef extern from "_chunker.c":
     ctypedef int uint32_t
     ctypedef int uint32_t
     ctypedef struct _Chunker "Chunker":
     ctypedef struct _Chunker "Chunker":
         pass
         pass
-    _Chunker *chunker_init(int window_size, int chunk_mask, int min_size, uint32_t seed)
-    void chunker_set_fd(_Chunker *chunker, object fd)
+    _Chunker *chunker_init(int window_size, int chunk_mask, int min_size, int max_size, uint32_t seed)
+    void chunker_set_fd(_Chunker *chunker, object f, int fd)
     void chunker_free(_Chunker *chunker)
     void chunker_free(_Chunker *chunker)
     object chunker_process(_Chunker *chunker)
     object chunker_process(_Chunker *chunker)
     uint32_t *buzhash_init_table(uint32_t seed)
     uint32_t *buzhash_init_table(uint32_t seed)
@@ -20,11 +20,21 @@ cdef extern from "_chunker.c":
 cdef class Chunker:
 cdef class Chunker:
     cdef _Chunker *chunker
     cdef _Chunker *chunker
 
 
-    def __cinit__(self, window_size, chunk_mask, min_size, seed):
-        self.chunker = chunker_init(window_size, chunk_mask, min_size, seed & 0xffffffff)
+    def __cinit__(self, seed, chunk_min_exp, chunk_max_exp, hash_mask_bits, hash_window_size):
+        min_size = 1 << chunk_min_exp
+        max_size = 1 << chunk_max_exp
+        hash_mask = (1 << hash_mask_bits) - 1
+        self.chunker = chunker_init(hash_window_size, hash_mask, min_size, max_size, seed & 0xffffffff)
 
 
-    def chunkify(self, fd):
-        chunker_set_fd(self.chunker, fd)
+    def chunkify(self, fd, fh=-1):
+        """
+        Cut a file into chunks.
+
+        :param fd: Python file object
+        :param fh: OS-level file handle (if available),
+                   defaults to -1 which means not to use OS-level fd.
+        """
+        chunker_set_fd(self.chunker, fd, fh)
         return self
         return self
 
 
     def __dealloc__(self):
     def __dealloc__(self):
@@ -52,4 +62,4 @@ def buzhash_update(uint32_t sum, unsigned char remove, unsigned char add, size_t
     table = buzhash_init_table(seed & 0xffffffff)
     table = buzhash_init_table(seed & 0xffffffff)
     sum = c_buzhash_update(sum, remove, add, len, table)
     sum = c_buzhash_update(sum, remove, add, len, table)
     free(table)
     free(table)
-    return sum
+    return sum

+ 173 - 0
borg/crypto.pyx

@@ -0,0 +1,173 @@
+"""A thin OpenSSL wrapper
+
+This could be replaced by PyCrypto or something similar when the performance
+of their PBKDF2 implementation is comparable to the OpenSSL version.
+"""
+from libc.stdlib cimport malloc, free
+
+API_VERSION = 2
+
+cdef extern from "openssl/rand.h":
+    int  RAND_bytes(unsigned char *buf, int num)
+
+
+cdef extern from "openssl/evp.h":
+    ctypedef struct EVP_MD:
+        pass
+    ctypedef struct EVP_CIPHER:
+        pass
+    ctypedef struct EVP_CIPHER_CTX:
+        unsigned char *iv
+        pass
+    ctypedef struct ENGINE:
+        pass
+    const EVP_MD *EVP_sha256()
+    const EVP_CIPHER *EVP_aes_256_ctr()
+    void EVP_CIPHER_CTX_init(EVP_CIPHER_CTX *a)
+    void EVP_CIPHER_CTX_cleanup(EVP_CIPHER_CTX *a)
+
+    int EVP_EncryptInit_ex(EVP_CIPHER_CTX *ctx, const EVP_CIPHER *cipher, ENGINE *impl,
+                           const unsigned char *key, const unsigned char *iv)
+    int EVP_DecryptInit_ex(EVP_CIPHER_CTX *ctx, const EVP_CIPHER *cipher, ENGINE *impl,
+                           const unsigned char *key, const unsigned char *iv)
+    int EVP_EncryptUpdate(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl,
+                          const unsigned char *in_, int inl)
+    int EVP_DecryptUpdate(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl,
+                          const unsigned char *in_, int inl)
+    int EVP_EncryptFinal_ex(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl)
+    int EVP_DecryptFinal_ex(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl)
+
+    int PKCS5_PBKDF2_HMAC(const char *password, int passwordlen,
+                          const unsigned char *salt, int saltlen, int iter,
+                          const EVP_MD *digest,
+                          int keylen, unsigned char *out)
+
+import struct
+
+_int = struct.Struct('>I')
+_long = struct.Struct('>Q')
+
+bytes_to_int = lambda x, offset=0: _int.unpack_from(x, offset)[0]
+bytes_to_long = lambda x, offset=0: _long.unpack_from(x, offset)[0]
+long_to_bytes = lambda x: _long.pack(x)
+
+
+def num_aes_blocks(length):
+    """Return the number of AES blocks required to encrypt/decrypt *length* bytes of data.
+       Note: this is only correct for modes without padding, like AES-CTR.
+    """
+    return (length + 15) // 16
+
+
+def pbkdf2_sha256(password, salt, iterations, size):
+    """Password based key derivation function 2 (RFC2898)
+    """
+    cdef unsigned char *key = <unsigned char *>malloc(size)
+    if not key:
+        raise MemoryError
+    try:
+        rv = PKCS5_PBKDF2_HMAC(password, len(password), salt, len(salt), iterations, EVP_sha256(), size, key)
+        if not rv:
+            raise Exception('PKCS5_PBKDF2_HMAC failed')
+        return key[:size]
+    finally:
+        free(key)
+
+
+def get_random_bytes(n):
+    """Return n cryptographically strong pseudo-random bytes
+    """
+    cdef unsigned char *buf = <unsigned char *>malloc(n)
+    if not buf:
+        raise MemoryError
+    try:
+        if RAND_bytes(buf, n) < 1:
+            raise Exception('RAND_bytes failed')
+        return buf[:n]
+    finally:
+        free(buf)
+
+
+cdef class AES:
+    """A thin wrapper around the OpenSSL EVP cipher API
+    """
+    cdef EVP_CIPHER_CTX ctx
+    cdef int is_encrypt
+
+    def __cinit__(self, is_encrypt, key, iv=None):
+        EVP_CIPHER_CTX_init(&self.ctx)
+        self.is_encrypt = is_encrypt
+        # Set cipher type and mode
+        cipher_mode = EVP_aes_256_ctr()
+        if self.is_encrypt:
+            if not EVP_EncryptInit_ex(&self.ctx, cipher_mode, NULL, NULL, NULL):
+                raise Exception('EVP_EncryptInit_ex failed')
+        else:  # decrypt
+            if not EVP_DecryptInit_ex(&self.ctx, cipher_mode, NULL, NULL, NULL):
+                raise Exception('EVP_DecryptInit_ex failed')
+        self.reset(key, iv)
+
+    def __dealloc__(self):
+        EVP_CIPHER_CTX_cleanup(&self.ctx)
+
+    def reset(self, key=None, iv=None):
+        cdef const unsigned char *key2 = NULL
+        cdef const unsigned char *iv2 = NULL
+        if key:
+            key2 = key
+        if iv:
+            iv2 = iv
+        # Initialise key and IV
+        if self.is_encrypt:
+            if not EVP_EncryptInit_ex(&self.ctx, NULL, NULL, key2, iv2):
+                raise Exception('EVP_EncryptInit_ex failed')
+        else:  # decrypt
+            if not EVP_DecryptInit_ex(&self.ctx, NULL, NULL, key2, iv2):
+                raise Exception('EVP_DecryptInit_ex failed')
+
+    @property
+    def iv(self):
+        return self.ctx.iv[:16]
+
+    def encrypt(self, data):
+        cdef int inl = len(data)
+        cdef int ctl = 0
+        cdef int outl = 0
+        # note: modes that use padding, need up to one extra AES block (16b)
+        cdef unsigned char *out = <unsigned char *>malloc(inl+16)
+        if not out:
+            raise MemoryError
+        try:
+            if not EVP_EncryptUpdate(&self.ctx, out, &outl, data, inl):
+                raise Exception('EVP_EncryptUpdate failed')
+            ctl = outl
+            if not EVP_EncryptFinal_ex(&self.ctx, out+ctl, &outl):
+                raise Exception('EVP_EncryptFinal failed')
+            ctl += outl
+            return out[:ctl]
+        finally:
+            free(out)
+
+    def decrypt(self, data):
+        cdef int inl = len(data)
+        cdef int ptl = 0
+        cdef int outl = 0
+        # note: modes that use padding, need up to one extra AES block (16b).
+        # This is what the openssl docs say. I am not sure this is correct,
+        # but OTOH it will not cause any harm if our buffer is a little bigger.
+        cdef unsigned char *out = <unsigned char *>malloc(inl+16)
+        if not out:
+            raise MemoryError
+        try:
+            if not EVP_DecryptUpdate(&self.ctx, out, &outl, data, inl):
+                raise Exception('EVP_DecryptUpdate failed')
+            ptl = outl
+            if EVP_DecryptFinal_ex(&self.ctx, out+ptl, &outl) <= 0:
+                # this error check is very important for modes with padding or
+                # authentication. for them, a failure here means corrupted data.
+                # CTR mode does not use padding nor authentication.
+                raise Exception('EVP_DecryptFinal failed')
+            ptl += outl
+            return out[:ptl]
+        finally:
+            free(out)

+ 17 - 13
attic/fuse.py → borg/fuse.py

@@ -7,9 +7,9 @@ import os
 import stat
 import stat
 import tempfile
 import tempfile
 import time
 import time
-from attic.archive import Archive
-from attic.helpers import daemonize
-from attic.remote import cache_if_remote
+from .archive import Archive
+from .helpers import daemonize
+from .remote import cache_if_remote
 
 
 # Does this version of llfuse support ns precision?
 # Does this version of llfuse support ns precision?
 have_fuse_mtime_ns = hasattr(llfuse.EntryAttributes, 'st_mtime_ns')
 have_fuse_mtime_ns = hasattr(llfuse.EntryAttributes, 'st_mtime_ns')
@@ -17,7 +17,7 @@ have_fuse_mtime_ns = hasattr(llfuse.EntryAttributes, 'st_mtime_ns')
 
 
 class ItemCache:
 class ItemCache:
     def __init__(self):
     def __init__(self):
-        self.fd = tempfile.TemporaryFile()
+        self.fd = tempfile.TemporaryFile(prefix='borg-tmp')
         self.offset = 1000000
         self.offset = 1000000
 
 
     def add(self, item):
     def add(self, item):
@@ -30,11 +30,11 @@ class ItemCache:
         return next(msgpack.Unpacker(self.fd))
         return next(msgpack.Unpacker(self.fd))
 
 
 
 
-class AtticOperations(llfuse.Operations):
-    """Export Attic archive as a fuse filesystem
+class FuseOperations(llfuse.Operations):
+    """Export archive as a fuse filesystem
     """
     """
     def __init__(self, key, repository, manifest, archive):
     def __init__(self, key, repository, manifest, archive):
-        super(AtticOperations, self).__init__()
+        super().__init__()
         self._inode_count = 0
         self._inode_count = 0
         self.key = key
         self.key = key
         self.repository = cache_if_remote(repository)
         self.repository = cache_if_remote(repository)
@@ -43,6 +43,7 @@ class AtticOperations(llfuse.Operations):
         self.contents = defaultdict(dict)
         self.contents = defaultdict(dict)
         self.default_dir = {b'mode': 0o40755, b'mtime': int(time.time() * 1e9), b'uid': os.getuid(), b'gid': os.getgid()}
         self.default_dir = {b'mode': 0o40755, b'mtime': int(time.time() * 1e9), b'uid': os.getuid(), b'gid': os.getgid()}
         self.pending_archives = {}
         self.pending_archives = {}
+        self.accounted_chunks = {}
         self.cache = ItemCache()
         self.cache = ItemCache()
         if archive:
         if archive:
             self.process_archive(archive)
             self.process_archive(archive)
@@ -130,8 +131,13 @@ class AtticOperations(llfuse.Operations):
     def getattr(self, inode):
     def getattr(self, inode):
         item = self.get_item(inode)
         item = self.get_item(inode)
         size = 0
         size = 0
+        dsize = 0
         try:
         try:
-            size = sum(size for _, size, _ in item[b'chunks'])
+            for key, chunksize, _ in item[b'chunks']:
+                size += chunksize
+                if self.accounted_chunks.get(key, inode) == inode:
+                    self.accounted_chunks[key] = inode
+                    dsize += chunksize
         except KeyError:
         except KeyError:
             pass
             pass
         entry = llfuse.EntryAttributes()
         entry = llfuse.EntryAttributes()
@@ -146,7 +152,7 @@ class AtticOperations(llfuse.Operations):
         entry.st_rdev = item.get(b'rdev', 0)
         entry.st_rdev = item.get(b'rdev', 0)
         entry.st_size = size
         entry.st_size = size
         entry.st_blksize = 512
         entry.st_blksize = 512
-        entry.st_blocks = 1
+        entry.st_blocks = dsize / 512
         if have_fuse_mtime_ns:
         if have_fuse_mtime_ns:
             entry.st_atime_ns = item[b'mtime']
             entry.st_atime_ns = item[b'mtime']
             entry.st_mtime_ns = item[b'mtime']
             entry.st_mtime_ns = item[b'mtime']
@@ -220,7 +226,7 @@ class AtticOperations(llfuse.Operations):
         return os.fsencode(item[b'source'])
         return os.fsencode(item[b'source'])
 
 
     def mount(self, mountpoint, extra_options, foreground=False):
     def mount(self, mountpoint, extra_options, foreground=False):
-        options = ['fsname=atticfs', 'ro']
+        options = ['fsname=borgfs', 'ro']
         if extra_options:
         if extra_options:
             options.extend(extra_options.split(','))
             options.extend(extra_options.split(','))
         llfuse.init(self, mountpoint, options)
         llfuse.init(self, mountpoint, options)
@@ -228,7 +234,5 @@ class AtticOperations(llfuse.Operations):
             daemonize()
             daemonize()
         try:
         try:
             llfuse.main(single=True)
             llfuse.main(single=True)
-        except:
+        finally:
             llfuse.close()
             llfuse.close()
-            raise
-        llfuse.close()

+ 18 - 7
attic/hashindex.pyx → borg/hashindex.pyx

@@ -11,7 +11,10 @@ cdef extern from "_hashindex.c":
     HashIndex *hashindex_read(char *path)
     HashIndex *hashindex_read(char *path)
     HashIndex *hashindex_init(int capacity, int key_size, int value_size)
     HashIndex *hashindex_init(int capacity, int key_size, int value_size)
     void hashindex_free(HashIndex *index)
     void hashindex_free(HashIndex *index)
-    void hashindex_summarize(HashIndex *index, long long *total_size, long long *total_csize, long long *unique_size, long long *unique_csize)
+    void hashindex_summarize(HashIndex *index, long long *total_size, long long *total_csize,
+                             long long *unique_size, long long *unique_csize,
+                             long long *total_unique_chunks, long long *total_chunks)
+    void hashindex_merge(HashIndex *index, HashIndex *other)
     int hashindex_get_size(HashIndex *index)
     int hashindex_get_size(HashIndex *index)
     int hashindex_write(HashIndex *index, char *path)
     int hashindex_write(HashIndex *index, char *path)
     void *hashindex_get(HashIndex *index, void *key)
     void *hashindex_get(HashIndex *index, void *key)
@@ -22,15 +25,18 @@ cdef extern from "_hashindex.c":
     int _le32toh(int v)
     int _le32toh(int v)
 
 
 
 
-_NoDefault = object()
+cdef _NoDefault = object()
 
 
+cimport cython
+
+@cython.internal
 cdef class IndexBase:
 cdef class IndexBase:
     cdef HashIndex *index
     cdef HashIndex *index
     key_size = 32
     key_size = 32
 
 
     def __cinit__(self, capacity=0, path=None):
     def __cinit__(self, capacity=0, path=None):
         if path:
         if path:
-            self.index = hashindex_read(<bytes>os.fsencode(path))
+            self.index = hashindex_read(os.fsencode(path))
             if not self.index:
             if not self.index:
                 raise Exception('hashindex_read failed')
                 raise Exception('hashindex_read failed')
         else:
         else:
@@ -47,7 +53,7 @@ cdef class IndexBase:
         return cls(path=path)
         return cls(path=path)
 
 
     def write(self, path):
     def write(self, path):
-        if not hashindex_write(self.index, <bytes>os.fsencode(path)):
+        if not hashindex_write(self.index, os.fsencode(path)):
             raise Exception('hashindex_write failed')
             raise Exception('hashindex_write failed')
 
 
     def clear(self):
     def clear(self):
@@ -179,9 +185,14 @@ cdef class ChunkIndex(IndexBase):
         return iter
         return iter
 
 
     def summarize(self):
     def summarize(self):
-        cdef long long total_size, total_csize, unique_size, unique_csize
-        hashindex_summarize(self.index, &total_size, &total_csize, &unique_size, &unique_csize)
-        return total_size, total_csize, unique_size, unique_csize
+        cdef long long total_size, total_csize, unique_size, unique_csize, total_unique_chunks, total_chunks
+        hashindex_summarize(self.index, &total_size, &total_csize,
+                            &unique_size, &unique_csize,
+                            &total_unique_chunks, &total_chunks)
+        return total_size, total_csize, unique_size, unique_csize, total_unique_chunks, total_chunks
+
+    def merge(self, ChunkIndex other):
+        hashindex_merge(self.index, other.index)
 
 
 
 
 cdef class ChunkKeyIterator:
 cdef class ChunkKeyIterator:

+ 83 - 67
attic/helpers.py → borg/helpers.py

@@ -1,7 +1,7 @@
 import argparse
 import argparse
 import binascii
 import binascii
+from collections import namedtuple
 import grp
 import grp
-import msgpack
 import os
 import os
 import pwd
 import pwd
 import re
 import re
@@ -10,11 +10,12 @@ import time
 from datetime import datetime, timezone, timedelta
 from datetime import datetime, timezone, timedelta
 from fnmatch import translate
 from fnmatch import translate
 from operator import attrgetter
 from operator import attrgetter
-import fcntl
 
 
-import attic.hashindex
-import attic.chunker
-import attic.crypto
+import msgpack
+
+from . import hashindex
+from . import chunker
+from . import crypto
 
 
 
 
 class Error(Exception):
 class Error(Exception):
@@ -27,55 +28,18 @@ class Error(Exception):
 
 
 
 
 class ExtensionModuleError(Error):
 class ExtensionModuleError(Error):
-    """The Attic binary extension modules does not seem to be properly installed"""
-
-
-class UpgradableLock:
-
-    class ReadLockFailed(Error):
-        """Failed to acquire read lock on {}"""
-
-    class WriteLockFailed(Error):
-        """Failed to acquire write lock on {}"""
-
-    def __init__(self, path, exclusive=False):
-        self.path = path
-        try:
-            self.fd = open(path, 'r+')
-        except IOError:
-            self.fd = open(path, 'r')
-        try:
-            if exclusive:
-                fcntl.lockf(self.fd, fcntl.LOCK_EX)
-            else:
-                fcntl.lockf(self.fd, fcntl.LOCK_SH)
-        # Python 3.2 raises IOError, Python3.3+ raises OSError
-        except (IOError, OSError):
-            if exclusive:
-                raise self.WriteLockFailed(self.path)
-            else:
-                raise self.ReadLockFailed(self.path)
-        self.is_exclusive = exclusive
-
-    def upgrade(self):
-        try:
-            fcntl.lockf(self.fd, fcntl.LOCK_EX)
-        # Python 3.2 raises IOError, Python3.3+ raises OSError
-        except (IOError, OSError):
-            raise self.WriteLockFailed(self.path)
-        self.is_exclusive = True
-
-    def release(self):
-        fcntl.lockf(self.fd, fcntl.LOCK_UN)
-        self.fd.close()
+    """The Borg binary extension modules do not seem to be properly installed"""
 
 
 
 
 def check_extension_modules():
 def check_extension_modules():
-    import attic.platform
-    if (attic.hashindex.API_VERSION != 2 or
-        attic.chunker.API_VERSION != 2 or
-        attic.crypto.API_VERSION != 2 or
-        attic.platform.API_VERSION != 2):
+    from . import platform
+    if hashindex.API_VERSION != 2:
+        raise ExtensionModuleError
+    if chunker.API_VERSION != 2:
+        raise ExtensionModuleError
+    if crypto.API_VERSION != 2:
+        raise ExtensionModuleError
+    if platform.API_VERSION != 2:
         raise ExtensionModuleError
         raise ExtensionModuleError
 
 
 
 
@@ -101,7 +65,7 @@ class Manifest:
         m = msgpack.unpackb(data)
         m = msgpack.unpackb(data)
         if not m.get(b'version') == 1:
         if not m.get(b'version') == 1:
             raise ValueError('Invalid manifest version')
             raise ValueError('Invalid manifest version')
-        manifest.archives = dict((k.decode('utf-8'), v) for k,v in m[b'archives'].items())
+        manifest.archives = dict((k.decode('utf-8'), v) for k, v in m[b'archives'].items())
         manifest.timestamp = m.get(b'timestamp')
         manifest.timestamp = m.get(b'timestamp')
         if manifest.timestamp:
         if manifest.timestamp:
             manifest.timestamp = manifest.timestamp.decode('ascii')
             manifest.timestamp = manifest.timestamp.decode('ascii')
@@ -119,6 +83,18 @@ class Manifest:
         self.id = self.key.id_hash(data)
         self.id = self.key.id_hash(data)
         self.repository.put(self.MANIFEST_ID, self.key.encrypt(data))
         self.repository.put(self.MANIFEST_ID, self.key.encrypt(data))
 
 
+    def list_archive_infos(self, sort_by=None, reverse=False):
+        # inexpensive Archive.list_archives replacement if we just need .name, .id, .ts
+        ArchiveInfo = namedtuple('ArchiveInfo', 'name id ts')
+        archives = []
+        for name, values in self.archives.items():
+            ts = parse_timestamp(values[b'time'].decode('utf-8'))
+            id = values[b'id']
+            archives.append(ArchiveInfo(name=name, id=id, ts=ts))
+        if sort_by is not None:
+            archives = sorted(archives, key=attrgetter(sort_by), reverse=reverse)
+        return archives
+
 
 
 def prune_within(archives, within):
 def prune_within(archives, within):
     multiplier = {'H': 1, 'd': 24, 'w': 24*7, 'm': 24*31, 'y': 24*365}
     multiplier = {'H': 1, 'd': 24, 'w': 24*7, 'm': 24*31, 'y': 24*365}
@@ -144,7 +120,8 @@ def prune_split(archives, pattern, n, skip=[]):
             last = period
             last = period
             if a not in skip:
             if a not in skip:
                 keep.append(a)
                 keep.append(a)
-                if len(keep) == n: break
+                if len(keep) == n:
+                    break
     return keep
     return keep
 
 
 
 
@@ -160,23 +137,38 @@ class Statistics:
             self.usize += csize
             self.usize += csize
 
 
     def print_(self, label, cache):
     def print_(self, label, cache):
-        total_size, total_csize, unique_size, unique_csize = cache.chunks.summarize()
+        total_size, total_csize, unique_size, unique_csize, total_unique_chunks, total_chunks = cache.chunks.summarize()
         print()
         print()
         print('                       Original size      Compressed size    Deduplicated size')
         print('                       Original size      Compressed size    Deduplicated size')
         print('%-15s %20s %20s %20s' % (label, format_file_size(self.osize), format_file_size(self.csize), format_file_size(self.usize)))
         print('%-15s %20s %20s %20s' % (label, format_file_size(self.osize), format_file_size(self.csize), format_file_size(self.usize)))
         print('All archives:   %20s %20s %20s' % (format_file_size(total_size), format_file_size(total_csize), format_file_size(unique_csize)))
         print('All archives:   %20s %20s %20s' % (format_file_size(total_size), format_file_size(total_csize), format_file_size(unique_csize)))
+        print()
+        print('                       Unique chunks         Total chunks')
+        print('Chunk index:    %20d %20d' % (total_unique_chunks, total_chunks))
+
+    def show_progress(self, item=None, final=False):
+        if not final:
+            path = remove_surrogates(item[b'path']) if item else ''
+            if len(path) > 43:
+                path = '%s...%s' % (path[:20], path[-20:])
+            msg = '%9s O %9s C %9s D %-43s' % (
+                format_file_size(self.osize), format_file_size(self.csize), format_file_size(self.usize), path)
+        else:
+            msg = ' ' * 79
+        print(msg, end='\r')
+        sys.stdout.flush()
 
 
 
 
 def get_keys_dir():
 def get_keys_dir():
     """Determine where to repository keys and cache"""
     """Determine where to repository keys and cache"""
-    return os.environ.get('ATTIC_KEYS_DIR',
-                          os.path.join(os.path.expanduser('~'), '.attic', 'keys'))
+    return os.environ.get('BORG_KEYS_DIR',
+                          os.path.join(os.path.expanduser('~'), '.borg', 'keys'))
 
 
 
 
 def get_cache_dir():
 def get_cache_dir():
     """Determine where to repository keys and cache"""
     """Determine where to repository keys and cache"""
-    return os.environ.get('ATTIC_CACHE_DIR',
-                          os.path.join(os.path.expanduser('~'), '.cache', 'attic'))
+    return os.environ.get('BORG_CACHE_DIR',
+                          os.path.join(os.path.expanduser('~'), '.cache', 'borg'))
 
 
 
 
 def to_localtime(ts):
 def to_localtime(ts):
@@ -235,7 +227,7 @@ class IncludePattern:
     path match as well.  A trailing slash makes no difference.
     path match as well.  A trailing slash makes no difference.
     """
     """
     def __init__(self, pattern):
     def __init__(self, pattern):
-        self.pattern = pattern.rstrip(os.path.sep)+os.path.sep
+        self.pattern = os.path.normpath(pattern).rstrip(os.path.sep)+os.path.sep
 
 
     def match(self, path):
     def match(self, path):
         return (path+os.path.sep).startswith(self.pattern)
         return (path+os.path.sep).startswith(self.pattern)
@@ -250,9 +242,9 @@ class ExcludePattern(IncludePattern):
     """
     """
     def __init__(self, pattern):
     def __init__(self, pattern):
         if pattern.endswith(os.path.sep):
         if pattern.endswith(os.path.sep):
-            self.pattern = pattern+'*'+os.path.sep
+            self.pattern = os.path.normpath(pattern).rstrip(os.path.sep)+os.path.sep+'*'+os.path.sep
         else:
         else:
-            self.pattern = pattern+os.path.sep+'*'
+            self.pattern = os.path.normpath(pattern)+os.path.sep+'*'
         # fnmatch and re.match both cache compiled regular expressions.
         # fnmatch and re.match both cache compiled regular expressions.
         # Nevertheless, this is about 10 times faster.
         # Nevertheless, this is about 10 times faster.
         self.regex = re.compile(translate(self.pattern))
         self.regex = re.compile(translate(self.pattern))
@@ -264,6 +256,31 @@ class ExcludePattern(IncludePattern):
         return '%s(%s)' % (type(self), self.pattern)
         return '%s(%s)' % (type(self), self.pattern)
 
 
 
 
+def timestamp(s):
+    """Convert a --timestamp=s argument to a datetime object"""
+    try:
+        # is it pointing to a file / directory?
+        ts = os.stat(s).st_mtime
+        return datetime.utcfromtimestamp(ts)
+    except OSError:
+        # didn't work, try parsing as timestamp. UTC, no TZ, no microsecs support.
+        for format in ('%Y-%m-%dT%H:%M:%SZ', '%Y-%m-%dT%H:%M:%S+00:00',
+                       '%Y-%m-%dT%H:%M:%S', '%Y-%m-%d %H:%M:%S',
+                       '%Y-%m-%dT%H:%M', '%Y-%m-%d %H:%M',
+                       '%Y-%m-%d', '%Y-%j',
+                       ):
+            try:
+                return datetime.strptime(s, format)
+            except ValueError:
+                continue
+        raise ValueError
+
+
+def ChunkerParams(s):
+    window_size, chunk_mask, chunk_min, chunk_max = s.split(',')
+    return int(window_size), int(chunk_mask), int(chunk_min), int(chunk_max)
+
+
 def is_cachedir(path):
 def is_cachedir(path):
     """Determines whether the specified path is a cache directory (and
     """Determines whether the specified path is a cache directory (and
     therefore should potentially be excluded from the backup) according to
     therefore should potentially be excluded from the backup) according to
@@ -475,9 +492,9 @@ class Location:
             else:
             else:
                 path = self.path
                 path = self.path
             return 'ssh://{}{}{}{}'.format('{}@'.format(self.user) if self.user else '',
             return 'ssh://{}{}{}{}'.format('{}@'.format(self.user) if self.user else '',
-                                                        self.host,
-                                                        ':{}'.format(self.port) if self.port else '',
-                                                        path)
+                                           self.host,
+                                           ':{}'.format(self.port) if self.port else '',
+                                           path)
 
 
 
 
 def location_validator(archive=None):
 def location_validator(archive=None):
@@ -503,7 +520,7 @@ def write_msgpack(filename, d):
     with open(filename + '.tmp', 'wb') as fd:
     with open(filename + '.tmp', 'wb') as fd:
         msgpack.pack(d, fd)
         msgpack.pack(d, fd)
         fd.flush()
         fd.flush()
-        os.fsync(fd)
+        os.fsync(fd.fileno())
     os.rename(filename + '.tmp', filename)
     os.rename(filename + '.tmp', filename)
 
 
 
 
@@ -552,7 +569,7 @@ def daemonize():
 class StableDict(dict):
 class StableDict(dict):
     """A dict subclass with stable items() ordering"""
     """A dict subclass with stable items() ordering"""
     def items(self):
     def items(self):
-        return sorted(super(StableDict, self).items())
+        return sorted(super().items())
 
 
 
 
 if sys.version < '3.3':
 if sys.version < '3.3':
@@ -588,4 +605,3 @@ def int_to_bigint(value):
     if value.bit_length() > 63:
     if value.bit_length() > 63:
         return value.to_bytes((value.bit_length() + 9) // 8, 'little', signed=True)
         return value.to_bytes((value.bit_length() + 9) // 8, 'little', signed=True)
     return value
     return value
-

+ 197 - 103
attic/key.py → borg/key.py

@@ -1,5 +1,6 @@
 from binascii import hexlify, a2b_base64, b2a_base64
 from binascii import hexlify, a2b_base64, b2a_base64
-from getpass import getpass
+import configparser
+import getpass
 import os
 import os
 import msgpack
 import msgpack
 import textwrap
 import textwrap
@@ -7,8 +8,8 @@ import hmac
 from hashlib import sha256
 from hashlib import sha256
 import zlib
 import zlib
 
 
-from attic.crypto import pbkdf2_sha256, get_random_bytes, AES, bytes_to_long, long_to_bytes, bytes_to_int, num_aes_blocks
-from attic.helpers import IntegrityError, get_keys_dir, Error
+from .crypto import pbkdf2_sha256, get_random_bytes, AES, bytes_to_long, long_to_bytes, bytes_to_int, num_aes_blocks
+from .helpers import IntegrityError, get_keys_dir, Error
 
 
 PREFIX = b'\0' * 8
 PREFIX = b'\0' * 8
 
 
@@ -17,11 +18,17 @@ class UnsupportedPayloadError(Error):
     """Unsupported payload type {}. A newer version is required to access this repository.
     """Unsupported payload type {}. A newer version is required to access this repository.
     """
     """
 
 
+
 class KeyfileNotFoundError(Error):
 class KeyfileNotFoundError(Error):
     """No key file for repository {} found in {}.
     """No key file for repository {} found in {}.
     """
     """
 
 
 
 
+class RepoKeyNotFoundError(Error):
+    """No key entry found in the config of repository {}.
+    """
+
+
 class HMAC(hmac.HMAC):
 class HMAC(hmac.HMAC):
     """Workaround a bug in Python < 3.4 Where HMAC does not accept memoryviews
     """Workaround a bug in Python < 3.4 Where HMAC does not accept memoryviews
     """
     """
@@ -32,27 +39,36 @@ class HMAC(hmac.HMAC):
 def key_creator(repository, args):
 def key_creator(repository, args):
     if args.encryption == 'keyfile':
     if args.encryption == 'keyfile':
         return KeyfileKey.create(repository, args)
         return KeyfileKey.create(repository, args)
-    elif args.encryption == 'passphrase':
+    elif args.encryption == 'repokey':
+        return RepoKey.create(repository, args)
+    elif args.encryption == 'passphrase':  # deprecated, kill in 1.x
         return PassphraseKey.create(repository, args)
         return PassphraseKey.create(repository, args)
     else:
     else:
         return PlaintextKey.create(repository, args)
         return PlaintextKey.create(repository, args)
 
 
 
 
 def key_factory(repository, manifest_data):
 def key_factory(repository, manifest_data):
-    if manifest_data[0] == KeyfileKey.TYPE:
+    key_type = manifest_data[0]
+    if key_type == KeyfileKey.TYPE:
         return KeyfileKey.detect(repository, manifest_data)
         return KeyfileKey.detect(repository, manifest_data)
-    elif manifest_data[0] == PassphraseKey.TYPE:
+    elif key_type == RepoKey.TYPE:
+        return RepoKey.detect(repository, manifest_data)
+    elif key_type == PassphraseKey.TYPE:  # deprecated, kill in 1.x
         return PassphraseKey.detect(repository, manifest_data)
         return PassphraseKey.detect(repository, manifest_data)
-    elif manifest_data[0] == PlaintextKey.TYPE:
+    elif key_type == PlaintextKey.TYPE:
         return PlaintextKey.detect(repository, manifest_data)
         return PlaintextKey.detect(repository, manifest_data)
     else:
     else:
-        raise UnsupportedPayloadError(manifest_data[0])
+        raise UnsupportedPayloadError(key_type)
 
 
 
 
-class KeyBase(object):
+class KeyBase:
+    TYPE = None  # override in subclasses
 
 
-    def __init__(self):
+    def __init__(self, repository):
         self.TYPE_STR = bytes([self.TYPE])
         self.TYPE_STR = bytes([self.TYPE])
+        self.repository = repository
+        self.target = None  # key location file path / repo obj
+        self.compression_level = 0
 
 
     def id_hash(self, data):
     def id_hash(self, data):
         """Return HMAC hash using the "id" HMAC key
         """Return HMAC hash using the "id" HMAC key
@@ -72,18 +88,18 @@ class PlaintextKey(KeyBase):
 
 
     @classmethod
     @classmethod
     def create(cls, repository, args):
     def create(cls, repository, args):
-        print('Encryption NOT enabled.\nUse the "--encryption=passphrase|keyfile" to enable encryption.')
-        return cls()
+        print('Encryption NOT enabled.\nUse the "--encryption=repokey|keyfile|passphrase" to enable encryption.')
+        return cls(repository)
 
 
     @classmethod
     @classmethod
     def detect(cls, repository, manifest_data):
     def detect(cls, repository, manifest_data):
-        return cls()
+        return cls(repository)
 
 
     def id_hash(self, data):
     def id_hash(self, data):
         return sha256(data).digest()
         return sha256(data).digest()
 
 
     def encrypt(self, data):
     def encrypt(self, data):
-        return b''.join([self.TYPE_STR, zlib.compress(data)])
+        return b''.join([self.TYPE_STR, zlib.compress(data, self.compression_level)])
 
 
     def decrypt(self, id, data):
     def decrypt(self, id, data):
         if data[0] != self.TYPE:
         if data[0] != self.TYPE:
@@ -115,7 +131,7 @@ class AESKeyBase(KeyBase):
         return HMAC(self.id_key, data, sha256).digest()
         return HMAC(self.id_key, data, sha256).digest()
 
 
     def encrypt(self, data):
     def encrypt(self, data):
-        data = zlib.compress(data)
+        data = zlib.compress(data, self.compression_level)
         self.enc_cipher.reset()
         self.enc_cipher.reset()
         data = b''.join((self.enc_cipher.iv[8:], self.enc_cipher.encrypt(data)))
         data = b''.join((self.enc_cipher.iv[8:], self.enc_cipher.encrypt(data)))
         hmac = HMAC(self.enc_hmac_key, data, sha256).digest()
         hmac = HMAC(self.enc_hmac_key, data, sha256).digest()
@@ -149,42 +165,69 @@ class AESKeyBase(KeyBase):
             self.chunk_seed = self.chunk_seed - 0xffffffff - 1
             self.chunk_seed = self.chunk_seed - 0xffffffff - 1
 
 
     def init_ciphers(self, enc_iv=b''):
     def init_ciphers(self, enc_iv=b''):
-        self.enc_cipher = AES(self.enc_key, enc_iv)
-        self.dec_cipher = AES(self.enc_key)
+        self.enc_cipher = AES(is_encrypt=True, key=self.enc_key, iv=enc_iv)
+        self.dec_cipher = AES(is_encrypt=False, key=self.enc_key)
+
+
+class Passphrase(str):
+    @classmethod
+    def env_passphrase(cls, default=None):
+        passphrase = os.environ.get('BORG_PASSPHRASE', default)
+        if passphrase is not None:
+            return cls(passphrase)
+
+    @classmethod
+    def getpass(cls, prompt):
+        return cls(getpass.getpass(prompt))
+
+    @classmethod
+    def new(cls, allow_empty=False):
+        passphrase = cls.env_passphrase()
+        if passphrase is not None:
+            return passphrase
+        while True:
+            passphrase = cls.getpass('Enter new passphrase: ')
+            if allow_empty or passphrase:
+                passphrase2 = cls.getpass('Enter same passphrase again: ')
+                if passphrase == passphrase2:
+                    print('Remember your passphrase. Your data will be inaccessible without it.')
+                    return passphrase
+                else:
+                    print('Passphrases do not match')
+            else:
+                print('Passphrase must not be blank')
+
+    def __repr__(self):
+        return '<Passphrase "***hidden***">'
+
+    def kdf(self, salt, iterations, length):
+        return pbkdf2_sha256(self.encode('utf-8'), salt, iterations, length)
 
 
 
 
 class PassphraseKey(AESKeyBase):
 class PassphraseKey(AESKeyBase):
+    # This mode is DEPRECATED and will be killed at 1.0 release.
+    # With this mode:
+    # - you can never ever change your passphrase for existing repos.
+    # - you can never ever use a different iterations count for existing repos.
     TYPE = 0x01
     TYPE = 0x01
-    iterations = 100000
+    iterations = 100000  # must not be changed ever!
 
 
     @classmethod
     @classmethod
     def create(cls, repository, args):
     def create(cls, repository, args):
-        key = cls()
-        passphrase = os.environ.get('ATTIC_PASSPHRASE')
-        if passphrase is not None:
-            passphrase2 = passphrase
-        else:
-            passphrase, passphrase2 = 1, 2
-        while passphrase != passphrase2:
-            passphrase = getpass('Enter passphrase: ')
-            if not passphrase:
-                print('Passphrase must not be blank')
-                continue
-            passphrase2 = getpass('Enter same passphrase again: ')
-            if passphrase != passphrase2:
-                print('Passphrases do not match')
+        key = cls(repository)
+        print('WARNING: "passphrase" mode is deprecated and will be removed in 1.0.')
+        print('If you want something similar (but with less issues), use "repokey" mode.')
+        passphrase = Passphrase.new(allow_empty=False)
         key.init(repository, passphrase)
         key.init(repository, passphrase)
-        if passphrase:
-            print('Remember your passphrase. Your data will be inaccessible without it.')
         return key
         return key
 
 
     @classmethod
     @classmethod
     def detect(cls, repository, manifest_data):
     def detect(cls, repository, manifest_data):
         prompt = 'Enter passphrase for %s: ' % repository._location.orig
         prompt = 'Enter passphrase for %s: ' % repository._location.orig
-        key = cls()
-        passphrase = os.environ.get('ATTIC_PASSPHRASE')
+        key = cls(repository)
+        passphrase = Passphrase.env_passphrase()
         if passphrase is None:
         if passphrase is None:
-            passphrase = getpass(prompt)
+            passphrase = Passphrase.getpass(prompt)
         while True:
         while True:
             key.init(repository, passphrase)
             key.init(repository, passphrase)
             try:
             try:
@@ -193,44 +236,40 @@ class PassphraseKey(AESKeyBase):
                 key.init_ciphers(PREFIX + long_to_bytes(key.extract_nonce(manifest_data) + num_blocks))
                 key.init_ciphers(PREFIX + long_to_bytes(key.extract_nonce(manifest_data) + num_blocks))
                 return key
                 return key
             except IntegrityError:
             except IntegrityError:
-                passphrase = getpass(prompt)
+                passphrase = Passphrase.getpass(prompt)
+
+    def change_passphrase(self):
+        class ImmutablePassphraseError(Error):
+            """The passphrase for this encryption key type can't be changed."""
+
+        raise ImmutablePassphraseError
 
 
     def init(self, repository, passphrase):
     def init(self, repository, passphrase):
-        self.init_from_random_data(pbkdf2_sha256(passphrase.encode('utf-8'), repository.id, self.iterations, 100))
+        self.init_from_random_data(passphrase.kdf(repository.id, self.iterations, 100))
         self.init_ciphers()
         self.init_ciphers()
 
 
 
 
-class KeyfileKey(AESKeyBase):
-    FILE_ID = 'ATTIC KEY'
-    TYPE = 0x00
-
+class KeyfileKeyBase(AESKeyBase):
     @classmethod
     @classmethod
     def detect(cls, repository, manifest_data):
     def detect(cls, repository, manifest_data):
-        key = cls()
-        path = cls.find_key_file(repository)
-        prompt = 'Enter passphrase for key file %s: ' % path
-        passphrase = os.environ.get('ATTIC_PASSPHRASE', '')
-        while not key.load(path, passphrase):
-            passphrase = getpass(prompt)
+        key = cls(repository)
+        target = key.find_key()
+        prompt = 'Enter passphrase for key %s: ' % target
+        passphrase = Passphrase.env_passphrase(default='')
+        while not key.load(target, passphrase):
+            passphrase = Passphrase.getpass(prompt)
         num_blocks = num_aes_blocks(len(manifest_data) - 41)
         num_blocks = num_aes_blocks(len(manifest_data) - 41)
         key.init_ciphers(PREFIX + long_to_bytes(key.extract_nonce(manifest_data) + num_blocks))
         key.init_ciphers(PREFIX + long_to_bytes(key.extract_nonce(manifest_data) + num_blocks))
         return key
         return key
 
 
-    @classmethod
-    def find_key_file(cls, repository):
-        id = hexlify(repository.id).decode('ascii')
-        keys_dir = get_keys_dir()
-        for name in os.listdir(keys_dir):
-            filename = os.path.join(keys_dir, name)
-            with open(filename, 'r') as fd:
-                line = fd.readline().strip()
-                if line and line.startswith(cls.FILE_ID) and line[10:] == id:
-                    return filename
-        raise KeyfileNotFoundError(repository._location.canonical_path(), get_keys_dir())
+    def find_key(self):
+        raise NotImplementedError
+
+    def load(self, target, passphrase):
+        raise NotImplementedError
 
 
-    def load(self, filename, passphrase):
-        with open(filename, 'r') as fd:
-            cdata = a2b_base64(''.join(fd.readlines()[1:]).encode('ascii'))  # .encode needed for Python 3.[0-2]
+    def _load(self, key_data, passphrase):
+        cdata = a2b_base64(key_data.encode('ascii'))  # .encode needed for Python 3.[0-2]
         data = self.decrypt_key_file(cdata, passphrase)
         data = self.decrypt_key_file(cdata, passphrase)
         if data:
         if data:
             key = msgpack.unpackb(data)
             key = msgpack.unpackb(data)
@@ -241,25 +280,24 @@ class KeyfileKey(AESKeyBase):
             self.enc_hmac_key = key[b'enc_hmac_key']
             self.enc_hmac_key = key[b'enc_hmac_key']
             self.id_key = key[b'id_key']
             self.id_key = key[b'id_key']
             self.chunk_seed = key[b'chunk_seed']
             self.chunk_seed = key[b'chunk_seed']
-            self.path = filename
             return True
             return True
+        return False
 
 
     def decrypt_key_file(self, data, passphrase):
     def decrypt_key_file(self, data, passphrase):
         d = msgpack.unpackb(data)
         d = msgpack.unpackb(data)
         assert d[b'version'] == 1
         assert d[b'version'] == 1
         assert d[b'algorithm'] == b'sha256'
         assert d[b'algorithm'] == b'sha256'
-        key = pbkdf2_sha256(passphrase.encode('utf-8'), d[b'salt'], d[b'iterations'], 32)
-        data = AES(key).decrypt(d[b'data'])
-        if HMAC(key, data, sha256).digest() != d[b'hash']:
-            return None
-        return data
+        key = passphrase.kdf(d[b'salt'], d[b'iterations'], 32)
+        data = AES(is_encrypt=False, key=key).decrypt(d[b'data'])
+        if HMAC(key, data, sha256).digest() == d[b'hash']:
+            return data
 
 
     def encrypt_key_file(self, data, passphrase):
     def encrypt_key_file(self, data, passphrase):
         salt = get_random_bytes(32)
         salt = get_random_bytes(32)
         iterations = 100000
         iterations = 100000
-        key = pbkdf2_sha256(passphrase.encode('utf-8'), salt, iterations, 32)
+        key = passphrase.kdf(salt, iterations, 32)
         hash = HMAC(key, data, sha256).digest()
         hash = HMAC(key, data, sha256).digest()
-        cdata = AES(key).encrypt(data)
+        cdata = AES(is_encrypt=True, key=key).encrypt(data)
         d = {
         d = {
             'version': 1,
             'version': 1,
             'salt': salt,
             'salt': salt,
@@ -270,7 +308,7 @@ class KeyfileKey(AESKeyBase):
         }
         }
         return msgpack.packb(d)
         return msgpack.packb(d)
 
 
-    def save(self, path, passphrase):
+    def _save(self, passphrase):
         key = {
         key = {
             'version': 1,
             'version': 1,
             'repository_id': self.repository_id,
             'repository_id': self.repository_id,
@@ -280,45 +318,101 @@ class KeyfileKey(AESKeyBase):
             'chunk_seed': self.chunk_seed,
             'chunk_seed': self.chunk_seed,
         }
         }
         data = self.encrypt_key_file(msgpack.packb(key), passphrase)
         data = self.encrypt_key_file(msgpack.packb(key), passphrase)
-        with open(path, 'w') as fd:
-            fd.write('%s %s\n' % (self.FILE_ID, hexlify(self.repository_id).decode('ascii')))
-            fd.write('\n'.join(textwrap.wrap(b2a_base64(data).decode('ascii'))))
-            fd.write('\n')
-        self.path = path
+        key_data = '\n'.join(textwrap.wrap(b2a_base64(data).decode('ascii')))
+        return key_data
 
 
     def change_passphrase(self):
     def change_passphrase(self):
-        passphrase, passphrase2 = 1, 2
-        while passphrase != passphrase2:
-            passphrase = getpass('New passphrase: ')
-            passphrase2 = getpass('Enter same passphrase again: ')
-            if passphrase != passphrase2:
-                print('Passphrases do not match')
-        self.save(self.path, passphrase)
-        print('Key file "%s" updated' % self.path)
+        passphrase = Passphrase.new(allow_empty=True)
+        self.save(self.target, passphrase)
+        print('Key updated')
 
 
     @classmethod
     @classmethod
     def create(cls, repository, args):
     def create(cls, repository, args):
+        passphrase = Passphrase.new(allow_empty=True)
+        key = cls(repository)
+        key.repository_id = repository.id
+        key.init_from_random_data(get_random_bytes(100))
+        key.init_ciphers()
+        target = key.get_new_target(args)
+        key.save(target, passphrase)
+        print('Key in "%s" created.' % target)
+        print('Keep this key safe. Your data will be inaccessible without it.')
+        return key
+
+    def save(self, target, passphrase):
+        raise NotImplementedError
+
+    def get_new_target(self, args):
+        raise NotImplementedError
+
+
+class KeyfileKey(KeyfileKeyBase):
+    TYPE = 0x00
+    FILE_ID = 'BORG_KEY'
+
+    def find_key(self):
+        id = hexlify(self.repository.id).decode('ascii')
+        keys_dir = get_keys_dir()
+        for name in os.listdir(keys_dir):
+            filename = os.path.join(keys_dir, name)
+            with open(filename, 'r') as fd:
+                line = fd.readline().strip()
+                if line.startswith(self.FILE_ID) and line[len(self.FILE_ID)+1:] == id:
+                    return filename
+        raise KeyfileNotFoundError(self.repository._location.canonical_path(), get_keys_dir())
+
+    def get_new_target(self, args):
         filename = args.repository.to_key_filename()
         filename = args.repository.to_key_filename()
         path = filename
         path = filename
         i = 1
         i = 1
         while os.path.exists(path):
         while os.path.exists(path):
             i += 1
             i += 1
             path = filename + '.%d' % i
             path = filename + '.%d' % i
-        passphrase = os.environ.get('ATTIC_PASSPHRASE')
-        if passphrase is not None:
-            passphrase2 = passphrase
-        else:
-            passphrase, passphrase2 = 1, 2
-        while passphrase != passphrase2:
-            passphrase = getpass('Enter passphrase (empty for no passphrase):')
-            passphrase2 = getpass('Enter same passphrase again: ')
-            if passphrase != passphrase2:
-                print('Passphrases do not match')
-        key = cls()
-        key.repository_id = repository.id
-        key.init_from_random_data(get_random_bytes(100))
-        key.init_ciphers()
-        key.save(path, passphrase)
-        print('Key file "%s" created.' % key.path)
-        print('Keep this file safe. Your data will be inaccessible without it.')
-        return key
+        return path
+
+    def load(self, target, passphrase):
+        with open(target, 'r') as fd:
+            key_data = ''.join(fd.readlines()[1:])
+        success = self._load(key_data, passphrase)
+        if success:
+            self.target = target
+        return success
+
+    def save(self, target, passphrase):
+        key_data = self._save(passphrase)
+        with open(target, 'w') as fd:
+            fd.write('%s %s\n' % (self.FILE_ID, hexlify(self.repository_id).decode('ascii')))
+            fd.write(key_data)
+            fd.write('\n')
+        self.target = target
+
+
+class RepoKey(KeyfileKeyBase):
+    TYPE = 0x03
+
+    def find_key(self):
+        loc = self.repository._location.canonical_path()
+        try:
+            self.repository.load_key()
+            return loc
+        except configparser.NoOptionError:
+            raise RepoKeyNotFoundError(loc)
+
+    def get_new_target(self, args):
+        return self.repository
+
+    def load(self, target, passphrase):
+        # what we get in target is just a repo location, but we already have the repo obj:
+        target = self.repository
+        key_data = target.load_key()
+        key_data = key_data.decode('utf-8')  # remote repo: msgpack issue #99, getting bytes
+        success = self._load(key_data, passphrase)
+        if success:
+            self.target = target
+        return success
+
+    def save(self, target, passphrase):
+        key_data = self._save(passphrase)
+        key_data = key_data.encode('utf-8')  # remote repo: msgpack issue #99, giving bytes
+        target.save_key(key_data)
+        self.target = target

+ 286 - 0
borg/locking.py

@@ -0,0 +1,286 @@
+import errno
+import json
+import os
+import socket
+import threading
+import time
+
+from borg.helpers import Error
+
+ADD, REMOVE = 'add', 'remove'
+SHARED, EXCLUSIVE = 'shared', 'exclusive'
+
+
+def get_id():
+    """Get identification tuple for 'us'"""
+    hostname = socket.gethostname()
+    pid = os.getpid()
+    tid = threading.current_thread().ident & 0xffffffff
+    return hostname, pid, tid
+
+
+class TimeoutTimer:
+    """
+    A timer for timeout checks (can also deal with no timeout, give timeout=None [default]).
+    It can also compute and optionally execute a reasonable sleep time (e.g. to avoid
+    polling too often or to support thread/process rescheduling).
+    """
+    def __init__(self, timeout=None, sleep=None):
+        """
+        Initialize a timer.
+
+        :param timeout: time out interval [s] or None (no timeout)
+        :param sleep: sleep interval [s] (>= 0: do sleep call, <0: don't call sleep)
+                      or None (autocompute: use 10% of timeout, or 1s for no timeout)
+        """
+        if timeout is not None and timeout < 0:
+            raise ValueError("timeout must be >= 0")
+        self.timeout_interval = timeout
+        if sleep is None:
+            if timeout is None:
+                sleep = 1.0
+            else:
+                sleep = timeout / 10.0
+        self.sleep_interval = sleep
+        self.start_time = None
+        self.end_time = None
+
+    def __repr__(self):
+        return "<%s: start=%r end=%r timeout=%r sleep=%r>" % (
+            self.__class__.__name__, self.start_time, self.end_time,
+            self.timeout_interval, self.sleep_interval)
+
+    def start(self):
+        self.start_time = time.time()
+        if self.timeout_interval is not None:
+            self.end_time = self.start_time + self.timeout_interval
+        return self
+
+    def sleep(self):
+        if self.sleep_interval >= 0:
+            time.sleep(self.sleep_interval)
+
+    def timed_out(self):
+        return self.end_time is not None and time.time() >= self.end_time
+
+    def timed_out_or_sleep(self):
+        if self.timed_out():
+            return True
+        else:
+            self.sleep()
+            return False
+
+
+class ExclusiveLock:
+    """An exclusive Lock based on mkdir fs operation being atomic"""
+    class LockError(Error):
+        """Failed to acquire the lock {}."""
+
+    class LockTimeout(LockError):
+        """Failed to create/acquire the lock {} (timeout)."""
+
+    class LockFailed(LockError):
+        """Failed to create/acquire the lock {} ({})."""
+
+    class UnlockError(Error):
+        """Failed to release the lock {}."""
+
+    class NotLocked(UnlockError):
+        """Failed to release the lock {} (was not locked)."""
+
+    class NotMyLock(UnlockError):
+        """Failed to release the lock {} (was/is locked, but not by me)."""
+
+    def __init__(self, path, timeout=None, sleep=None, id=None):
+        self.timeout = timeout
+        self.sleep = sleep
+        self.path = os.path.abspath(path)
+        self.id = id or get_id()
+        self.unique_name  = os.path.join(self.path, "%s.%d-%x" % self.id)
+
+    def __enter__(self):
+        return self.acquire()
+
+    def __exit__(self, *exc):
+        self.release()
+
+    def __repr__(self):
+        return "<%s: %r>" % (self.__class__.__name__, self.unique_name)
+
+    def acquire(self, timeout=None, sleep=None):
+        if timeout is None:
+            timeout = self.timeout
+        if sleep is None:
+            sleep = self.sleep
+        timer = TimeoutTimer(timeout, sleep).start()
+        while True:
+            try:
+                os.mkdir(self.path)
+            except OSError as err:
+                if err.errno == errno.EEXIST:  # already locked
+                    if self.by_me():
+                        return self
+                    if timer.timed_out_or_sleep():
+                        raise self.LockTimeout(self.path)
+                else:
+                    raise self.LockFailed(self.path, str(err))
+            else:
+                with open(self.unique_name, "wb"):
+                    pass
+                return self
+
+    def release(self):
+        if not self.is_locked():
+            raise self.NotLocked(self.path)
+        if not self.by_me():
+            raise self.NotMyLock(self.path)
+        os.unlink(self.unique_name)
+        os.rmdir(self.path)
+
+    def is_locked(self):
+        return os.path.exists(self.path)
+
+    def by_me(self):
+        return os.path.exists(self.unique_name)
+
+    def break_lock(self):
+        if self.is_locked():
+            for name in os.listdir(self.path):
+                os.unlink(os.path.join(self.path, name))
+            os.rmdir(self.path)
+
+
+class LockRoster:
+    """
+    A Lock Roster to track shared/exclusive lockers.
+
+    Note: you usually should call the methods with an exclusive lock held,
+    to avoid conflicting access by multiple threads/processes/machines.
+    """
+    def __init__(self, path, id=None):
+        self.path = path
+        self.id = id or get_id()
+
+    def load(self):
+        try:
+            with open(self.path) as f:
+                data = json.load(f)
+        except IOError as err:
+            if err.errno != errno.ENOENT:
+                raise
+            data = {}
+        return data
+
+    def save(self, data):
+        with open(self.path, "w") as f:
+            json.dump(data, f)
+
+    def remove(self):
+        os.unlink(self.path)
+
+    def get(self, key):
+        roster = self.load()
+        return set(tuple(e) for e in roster.get(key, []))
+
+    def modify(self, key, op):
+        roster = self.load()
+        try:
+            elements = set(tuple(e) for e in roster[key])
+        except KeyError:
+            elements = set()
+        if op == ADD:
+            elements.add(self.id)
+        elif op == REMOVE:
+            elements.remove(self.id)
+        else:
+            raise ValueError('Unknown LockRoster op %r' % op)
+        roster[key] = list(list(e) for e in elements)
+        self.save(roster)
+
+
+class UpgradableLock:
+    """
+    A Lock for a resource that can be accessed in a shared or exclusive way.
+    Typically, write access to a resource needs an exclusive lock (1 writer,
+    noone is allowed reading) and read access to a resource needs a shared
+    lock (multiple readers are allowed).
+    """
+    class SharedLockFailed(Error):
+        """Failed to acquire shared lock [{}]"""
+
+    class ExclusiveLockFailed(Error):
+        """Failed to acquire write lock [{}]"""
+
+    def __init__(self, path, exclusive=False, sleep=None, id=None):
+        self.path = path
+        self.is_exclusive = exclusive
+        self.sleep = sleep
+        self.id = id or get_id()
+        # globally keeping track of shared and exclusive lockers:
+        self._roster = LockRoster(path + '.roster', id=id)
+        # an exclusive lock, used for:
+        # - holding while doing roster queries / updates
+        # - holding while the UpgradableLock itself is exclusive
+        self._lock = ExclusiveLock(path + '.exclusive', id=id)
+
+    def __enter__(self):
+        return self.acquire()
+
+    def __exit__(self, *exc):
+        self.release()
+
+    def __repr__(self):
+        return "<%s: %r>" % (self.__class__.__name__, self.id)
+
+    def acquire(self, exclusive=None, remove=None, sleep=None):
+        if exclusive is None:
+            exclusive = self.is_exclusive
+        sleep = sleep or self.sleep or 0.2
+        try:
+            if exclusive:
+                self._wait_for_readers_finishing(remove, sleep)
+                self._roster.modify(EXCLUSIVE, ADD)
+            else:
+                with self._lock:
+                    if remove is not None:
+                        self._roster.modify(remove, REMOVE)
+                    self._roster.modify(SHARED, ADD)
+            self.is_exclusive = exclusive
+            return self
+        except ExclusiveLock.LockError as err:
+            msg = str(err)
+            if exclusive:
+                raise self.ExclusiveLockFailed(msg)
+            else:
+                raise self.SharedLockFailed(msg)
+
+    def _wait_for_readers_finishing(self, remove, sleep):
+        while True:
+            self._lock.acquire()
+            if remove is not None:
+                self._roster.modify(remove, REMOVE)
+                remove = None
+            if len(self._roster.get(SHARED)) == 0:
+                return  # we are the only one and we keep the lock!
+            self._lock.release()
+            time.sleep(sleep)
+
+    def release(self):
+        if self.is_exclusive:
+            self._roster.modify(EXCLUSIVE, REMOVE)
+            self._lock.release()
+        else:
+            with self._lock:
+                self._roster.modify(SHARED, REMOVE)
+
+    def upgrade(self):
+        if not self.is_exclusive:
+            self.acquire(exclusive=True, remove=SHARED)
+
+    def downgrade(self):
+        if self.is_exclusive:
+            self.acquire(exclusive=False, remove=EXCLUSIVE)
+
+    def break_lock(self):
+        self._roster.remove()
+        self._lock.break_lock()

+ 0 - 0
attic/lrucache.py → borg/lrucache.py


+ 4 - 3
attic/platform.py → borg/platform.py

@@ -1,15 +1,16 @@
 import sys
 import sys
 
 
 if sys.platform.startswith('linux'):
 if sys.platform.startswith('linux'):
-    from attic.platform_linux import acl_get, acl_set, API_VERSION
+    from .platform_linux import acl_get, acl_set, API_VERSION
 elif sys.platform.startswith('freebsd'):
 elif sys.platform.startswith('freebsd'):
-    from attic.platform_freebsd import acl_get, acl_set, API_VERSION
+    from .platform_freebsd import acl_get, acl_set, API_VERSION
 elif sys.platform == 'darwin':
 elif sys.platform == 'darwin':
-    from attic.platform_darwin import acl_get, acl_set, API_VERSION
+    from .platform_darwin import acl_get, acl_set, API_VERSION
 else:
 else:
     API_VERSION = 2
     API_VERSION = 2
 
 
     def acl_get(path, item, st, numeric_owner=False):
     def acl_get(path, item, st, numeric_owner=False):
         pass
         pass
+
     def acl_set(path, item, numeric_owner=False):
     def acl_set(path, item, numeric_owner=False):
         pass
         pass

+ 1 - 1
attic/platform_darwin.pyx → borg/platform_darwin.pyx

@@ -1,5 +1,5 @@
 import os
 import os
-from attic.helpers import user2uid, group2gid
+from .helpers import user2uid, group2gid
 
 
 API_VERSION = 2
 API_VERSION = 2
 
 

+ 1 - 1
attic/platform_freebsd.pyx → borg/platform_freebsd.pyx

@@ -1,5 +1,5 @@
 import os
 import os
-from attic.helpers import posix_acl_use_stored_uid_gid
+from .helpers import posix_acl_use_stored_uid_gid
 
 
 API_VERSION = 2
 API_VERSION = 2
 
 

+ 1 - 1
attic/platform_linux.pyx → borg/platform_linux.pyx

@@ -1,7 +1,7 @@
 import os
 import os
 import re
 import re
 from stat import S_ISLNK
 from stat import S_ISLNK
-from attic.helpers import posix_acl_use_stored_uid_gid, user2uid, group2gid
+from .helpers import posix_acl_use_stored_uid_gid, user2uid, group2gid
 
 
 API_VERSION = 2
 API_VERSION = 2
 
 

+ 76 - 51
attic/remote.py → borg/remote.py

@@ -3,12 +3,13 @@ import fcntl
 import msgpack
 import msgpack
 import os
 import os
 import select
 import select
-import shutil
 from subprocess import Popen, PIPE
 from subprocess import Popen, PIPE
 import sys
 import sys
 import tempfile
 import tempfile
+import traceback
+
+from . import __version__
 
 
-from .hashindex import NSIndex
 from .helpers import Error, IntegrityError
 from .helpers import Error, IntegrityError
 from .repository import Repository
 from .repository import Repository
 
 
@@ -23,7 +24,27 @@ class PathNotAllowed(Error):
     """Repository path not allowed"""
     """Repository path not allowed"""
 
 
 
 
-class RepositoryServer(object):
+class InvalidRPCMethod(Error):
+    """RPC method is not valid"""
+
+
+class RepositoryServer:  # pragma: no cover
+    rpc_methods = (
+        '__len__',
+        'check',
+        'commit',
+        'delete',
+        'destroy',
+        'get',
+        'list',
+        'negotiate',
+        'open',
+        'put',
+        'repair',
+        'rollback',
+        'save_key',
+        'load_key',
+    )
 
 
     def __init__(self, restrict_to_paths):
     def __init__(self, restrict_to_paths):
         self.repository = None
         self.repository = None
@@ -46,16 +67,22 @@ class RepositoryServer(object):
                 if not data:
                 if not data:
                     return
                     return
                 unpacker.feed(data)
                 unpacker.feed(data)
-                for type, msgid, method, args in unpacker:
+                for unpacked in unpacker:
+                    if not (isinstance(unpacked, tuple) and len(unpacked) == 4):
+                        raise Exception("Unexpected RPC data format.")
+                    type, msgid, method, args = unpacked
                     method = method.decode('ascii')
                     method = method.decode('ascii')
                     try:
                     try:
+                        if method not in self.rpc_methods:
+                            raise InvalidRPCMethod(method)
                         try:
                         try:
                             f = getattr(self, method)
                             f = getattr(self, method)
                         except AttributeError:
                         except AttributeError:
                             f = getattr(self.repository, method)
                             f = getattr(self.repository, method)
                         res = f(*args)
                         res = f(*args)
-                    except Exception as e:
-                        os.write(stdout_fd, msgpack.packb((1, msgid, e.__class__.__name__, e.args)))
+                    except BaseException as e:
+                        exc = "Remote Traceback by Borg %s%s%s" % (__version__, os.linesep, traceback.format_exc())
+                        os.write(stdout_fd, msgpack.packb((1, msgid, e.__class__.__name__, exc)))
                     else:
                     else:
                         os.write(stdout_fd, msgpack.packb((1, msgid, None, res)))
                         os.write(stdout_fd, msgpack.packb((1, msgid, None, res)))
             if es:
             if es:
@@ -79,11 +106,12 @@ class RepositoryServer(object):
         return self.repository.id
         return self.repository.id
 
 
 
 
-class RemoteRepository(object):
+class RemoteRepository:
     extra_test_args = []
     extra_test_args = []
+    remote_path = None
+    umask = None
 
 
     class RPCError(Exception):
     class RPCError(Exception):
-
         def __init__(self, name):
         def __init__(self, name):
             self.name = name
             self.name = name
 
 
@@ -97,9 +125,11 @@ class RemoteRepository(object):
         self.responses = {}
         self.responses = {}
         self.unpacker = msgpack.Unpacker(use_list=False)
         self.unpacker = msgpack.Unpacker(use_list=False)
         self.p = None
         self.p = None
+        # use local umask also for the remote process
+        umask = ['--umask', '%03o' % self.umask]
         if location.host == '__testsuite__':
         if location.host == '__testsuite__':
-            args = [sys.executable, '-m', 'attic.archiver', 'serve'] + self.extra_test_args
-        else:
+            args = [sys.executable, '-m', 'borg.archiver', 'serve'] + umask + self.extra_test_args
+        else:  # pragma: no cover
             args = ['ssh']
             args = ['ssh']
             if location.port:
             if location.port:
                 args += ['-p', str(location.port)]
                 args += ['-p', str(location.port)]
@@ -107,7 +137,7 @@ class RemoteRepository(object):
                 args.append('%s@%s' % (location.user, location.host))
                 args.append('%s@%s' % (location.user, location.host))
             else:
             else:
                 args.append('%s' % location.host)
                 args.append('%s' % location.host)
-            args += ['attic', 'serve']
+            args += [self.remote_path, 'serve'] + umask
         self.p = Popen(args, bufsize=0, stdin=PIPE, stdout=PIPE)
         self.p = Popen(args, bufsize=0, stdin=PIPE, stdout=PIPE)
         self.stdin_fd = self.p.stdin.fileno()
         self.stdin_fd = self.p.stdin.fileno()
         self.stdout_fd = self.p.stdout.fileno()
         self.stdout_fd = self.p.stdout.fileno()
@@ -116,7 +146,10 @@ class RemoteRepository(object):
         self.r_fds = [self.stdout_fd]
         self.r_fds = [self.stdout_fd]
         self.x_fds = [self.stdin_fd, self.stdout_fd]
         self.x_fds = [self.stdin_fd, self.stdout_fd]
 
 
-        version = self.call('negotiate', 1)
+        try:
+            version = self.call('negotiate', 1)
+        except ConnectionClosed:
+            raise Exception('Server immediately closed connection - is Borg installed and working on the server?')
         if version != 1:
         if version != 1:
             raise Exception('Server insisted on using unsupported protocol version %d' % version)
             raise Exception('Server insisted on using unsupported protocol version %d' % version)
         self.id = self.call('open', location.path, create)
         self.id = self.call('open', location.path, create)
@@ -124,6 +157,9 @@ class RemoteRepository(object):
     def __del__(self):
     def __del__(self):
         self.close()
         self.close()
 
 
+    def __repr__(self):
+        return '<%s %s>' % (self.__class__.__name__, self.location.canonical_path())
+
     def call(self, cmd, *args, **kw):
     def call(self, cmd, *args, **kw):
         for resp in self.call_many(cmd, [args], **kw):
         for resp in self.call_many(cmd, [args], **kw):
             return resp
             return resp
@@ -131,6 +167,7 @@ class RemoteRepository(object):
     def call_many(self, cmd, calls, wait=True, is_preloaded=False):
     def call_many(self, cmd, calls, wait=True, is_preloaded=False):
         if not calls:
         if not calls:
             return
             return
+
         def fetch_from_cache(args):
         def fetch_from_cache(args):
             msgid = self.cache[args].pop(0)
             msgid = self.cache[args].pop(0)
             if not self.cache[args]:
             if not self.cache[args]:
@@ -156,9 +193,12 @@ class RemoteRepository(object):
                             raise IntegrityError(res)
                             raise IntegrityError(res)
                         elif error == b'PathNotAllowed':
                         elif error == b'PathNotAllowed':
                             raise PathNotAllowed(*res)
                             raise PathNotAllowed(*res)
-                        if error == b'ObjectNotFound':
+                        elif error == b'ObjectNotFound':
                             raise Repository.ObjectNotFound(res[0], self.location.orig)
                             raise Repository.ObjectNotFound(res[0], self.location.orig)
-                        raise self.RPCError(error)
+                        elif error == b'InvalidRPCMethod':
+                            raise InvalidRPCMethod(*res)
+                        else:
+                            raise self.RPCError(res.decode('utf-8'))
                     else:
                     else:
                         yield res
                         yield res
                         if not waiting_for and not calls:
                         if not waiting_for and not calls:
@@ -167,13 +207,16 @@ class RemoteRepository(object):
                     break
                     break
             r, w, x = select.select(self.r_fds, w_fds, self.x_fds, 1)
             r, w, x = select.select(self.r_fds, w_fds, self.x_fds, 1)
             if x:
             if x:
-                raise Exception('FD exception occured')
+                raise Exception('FD exception occurred')
             if r:
             if r:
                 data = os.read(self.stdout_fd, BUFSIZE)
                 data = os.read(self.stdout_fd, BUFSIZE)
                 if not data:
                 if not data:
                     raise ConnectionClosed()
                     raise ConnectionClosed()
                 self.unpacker.feed(data)
                 self.unpacker.feed(data)
-                for type, msgid, error, res in self.unpacker:
+                for unpacked in self.unpacker:
+                    if not (isinstance(unpacked, tuple) and len(unpacked) == 4):
+                        raise Exception("Unexpected RPC data format.")
+                    type, msgid, error, res = unpacked
                     if msgid in self.ignore_responses:
                     if msgid in self.ignore_responses:
                         self.ignore_responses.remove(msgid)
                         self.ignore_responses.remove(msgid)
                     else:
                     else:
@@ -219,6 +262,9 @@ class RemoteRepository(object):
     def rollback(self, *args):
     def rollback(self, *args):
         return self.call('rollback')
         return self.call('rollback')
 
 
+    def destroy(self):
+        return self.call('destroy')
+
     def __len__(self):
     def __len__(self):
         return self.call('__len__')
         return self.call('__len__')
 
 
@@ -239,6 +285,12 @@ class RemoteRepository(object):
     def delete(self, id_, wait=True):
     def delete(self, id_, wait=True):
         return self.call('delete', id_, wait=wait)
         return self.call('delete', id_, wait=wait)
 
 
+    def save_key(self, keydata):
+        return self.call('save_key', keydata)
+
+    def load_key(self):
+        return self.call('load_key')
+
     def close(self):
     def close(self):
         if self.p:
         if self.p:
             self.p.stdin.close()
             self.p.stdin.close()
@@ -253,56 +305,29 @@ class RemoteRepository(object):
 class RepositoryCache:
 class RepositoryCache:
     """A caching Repository wrapper
     """A caching Repository wrapper
 
 
-    Caches Repository GET operations using a temporary file
+    Caches Repository GET operations using a local temporary Repository.
     """
     """
     def __init__(self, repository):
     def __init__(self, repository):
-        self.tmppath = None
-        self.index = None
-        self.data_fd = None
         self.repository = repository
         self.repository = repository
-        self.entries = {}
-        self.initialize()
+        tmppath = tempfile.mkdtemp(prefix='borg-tmp')
+        self.caching_repo = Repository(tmppath, create=True, exclusive=True)
 
 
     def __del__(self):
     def __del__(self):
-        self.cleanup()
-
-    def initialize(self):
-        self.tmppath = tempfile.mkdtemp()
-        self.index = NSIndex()
-        self.data_fd = open(os.path.join(self.tmppath, 'data'), 'a+b')
-
-    def cleanup(self):
-        del self.index
-        if self.data_fd:
-            self.data_fd.close()
-        if self.tmppath:
-            shutil.rmtree(self.tmppath)
-
-    def load_object(self, offset, size):
-        self.data_fd.seek(offset)
-        data = self.data_fd.read(size)
-        assert len(data) == size
-        return data
-
-    def store_object(self, key, data):
-        self.data_fd.seek(0, os.SEEK_END)
-        self.data_fd.write(data)
-        offset = self.data_fd.tell()
-        self.index[key] = offset - len(data), len(data)
+        self.caching_repo.destroy()
 
 
     def get(self, key):
     def get(self, key):
         return next(self.get_many([key]))
         return next(self.get_many([key]))
 
 
     def get_many(self, keys):
     def get_many(self, keys):
-        unknown_keys = [key for key in keys if not key in self.index]
+        unknown_keys = [key for key in keys if key not in self.caching_repo]
         repository_iterator = zip(unknown_keys, self.repository.get_many(unknown_keys))
         repository_iterator = zip(unknown_keys, self.repository.get_many(unknown_keys))
         for key in keys:
         for key in keys:
             try:
             try:
-                yield self.load_object(*self.index[key])
-            except KeyError:
+                yield self.caching_repo.get(key)
+            except Repository.ObjectNotFound:
                 for key_, data in repository_iterator:
                 for key_, data in repository_iterator:
                     if key_ == key:
                     if key_ == key:
-                        self.store_object(key, data)
+                        self.caching_repo.put(key, data)
                         yield data
                         yield data
                         break
                         break
         # Consume any pending requests
         # Consume any pending requests

+ 82 - 31
attic/repository.py → borg/repository.py

@@ -9,17 +9,19 @@ import sys
 from zlib import crc32
 from zlib import crc32
 
 
 from .hashindex import NSIndex
 from .hashindex import NSIndex
-from .helpers import Error, IntegrityError, read_msgpack, write_msgpack, unhexlify, UpgradableLock
+from .helpers import Error, IntegrityError, read_msgpack, write_msgpack, unhexlify
+from .locking import UpgradableLock
 from .lrucache import LRUCache
 from .lrucache import LRUCache
 
 
 MAX_OBJECT_SIZE = 20 * 1024 * 1024
 MAX_OBJECT_SIZE = 20 * 1024 * 1024
-MAGIC = b'ATTICSEG'
+MAGIC = b'BORG_SEG'
+MAGIC_LEN = len(MAGIC)
 TAG_PUT = 0
 TAG_PUT = 0
 TAG_DELETE = 1
 TAG_DELETE = 1
 TAG_COMMIT = 2
 TAG_COMMIT = 2
 
 
 
 
-class Repository(object):
+class Repository:
     """Filesystem based transactional key value store
     """Filesystem based transactional key value store
 
 
     On disk layout:
     On disk layout:
@@ -33,19 +35,19 @@ class Repository(object):
     DEFAULT_SEGMENTS_PER_DIR = 10000
     DEFAULT_SEGMENTS_PER_DIR = 10000
 
 
     class DoesNotExist(Error):
     class DoesNotExist(Error):
-        """Repository {} does not exist"""
+        """Repository {} does not exist."""
 
 
     class AlreadyExists(Error):
     class AlreadyExists(Error):
-        """Repository {} already exists"""
+        """Repository {} already exists."""
 
 
     class InvalidRepository(Error):
     class InvalidRepository(Error):
-        """{} is not a valid repository"""
+        """{} is not a valid repository."""
 
 
     class CheckNeeded(Error):
     class CheckNeeded(Error):
-        '''Inconsistency detected. Please run "attic check {}"'''
+        """Inconsistency detected. Please run "borg check {}"."""
 
 
     class ObjectNotFound(Error):
     class ObjectNotFound(Error):
-        """Object with key {} not found in repository {}"""
+        """Object with key {} not found in repository {}."""
 
 
     def __init__(self, path, create=False, exclusive=False):
     def __init__(self, path, create=False, exclusive=False):
         self.path = path
         self.path = path
@@ -60,6 +62,9 @@ class Repository(object):
     def __del__(self):
     def __del__(self):
         self.close()
         self.close()
 
 
+    def __repr__(self):
+        return '<%s %s>' % (self.__class__.__name__, self.path)
+
     def create(self, path):
     def create(self, path):
         """Create a new empty repository at `path`
         """Create a new empty repository at `path`
         """
         """
@@ -68,7 +73,7 @@ class Repository(object):
         if not os.path.exists(path):
         if not os.path.exists(path):
             os.mkdir(path)
             os.mkdir(path)
         with open(os.path.join(path, 'README'), 'w') as fd:
         with open(os.path.join(path, 'README'), 'w') as fd:
-            fd.write('This is an Attic repository\n')
+            fd.write('This is a Borg repository\n')
         os.mkdir(os.path.join(path, 'data'))
         os.mkdir(os.path.join(path, 'data'))
         config = RawConfigParser()
         config = RawConfigParser()
         config.add_section('repository')
         config.add_section('repository')
@@ -76,13 +81,34 @@ class Repository(object):
         config.set('repository', 'segments_per_dir', self.DEFAULT_SEGMENTS_PER_DIR)
         config.set('repository', 'segments_per_dir', self.DEFAULT_SEGMENTS_PER_DIR)
         config.set('repository', 'max_segment_size', self.DEFAULT_MAX_SEGMENT_SIZE)
         config.set('repository', 'max_segment_size', self.DEFAULT_MAX_SEGMENT_SIZE)
         config.set('repository', 'id', hexlify(os.urandom(32)).decode('ascii'))
         config.set('repository', 'id', hexlify(os.urandom(32)).decode('ascii'))
-        with open(os.path.join(path, 'config'), 'w') as fd:
+        self.save_config(path, config)
+
+    def save_config(self, path, config):
+        config_path = os.path.join(path, 'config')
+        with open(config_path, 'w') as fd:
             config.write(fd)
             config.write(fd)
 
 
+    def save_key(self, keydata):
+        assert self.config
+        keydata = keydata.decode('utf-8')  # remote repo: msgpack issue #99, getting bytes
+        self.config.set('repository', 'key', keydata)
+        self.save_config(self.path, self.config)
+
+    def load_key(self):
+        keydata = self.config.get('repository', 'key')
+        return keydata.encode('utf-8')  # remote repo: msgpack issue #99, returning bytes
+
+    def destroy(self):
+        """Destroy the repository at `self.path`
+        """
+        self.close()
+        os.remove(os.path.join(self.path, 'config'))  # kill config first
+        shutil.rmtree(self.path)
+
     def get_index_transaction_id(self):
     def get_index_transaction_id(self):
-        indicies = sorted((int(name[6:]) for name in os.listdir(self.path) if name.startswith('index.') and name[6:].isdigit()))
-        if indicies:
-            return indicies[-1]
+        indices = sorted((int(name[6:]) for name in os.listdir(self.path) if name.startswith('index.') and name[6:].isdigit()))
+        if indices:
+            return indices[-1]
         else:
         else:
             return None
             return None
 
 
@@ -105,11 +131,11 @@ class Repository(object):
         self.path = path
         self.path = path
         if not os.path.isdir(path):
         if not os.path.isdir(path):
             raise self.DoesNotExist(path)
             raise self.DoesNotExist(path)
+        self.lock = UpgradableLock(os.path.join(path, 'lock'), exclusive).acquire()
         self.config = RawConfigParser()
         self.config = RawConfigParser()
         self.config.read(os.path.join(self.path, 'config'))
         self.config.read(os.path.join(self.path, 'config'))
-        if not 'repository' in self.config.sections() or self.config.getint('repository', 'version') != 1:
+        if 'repository' not in self.config.sections() or self.config.getint('repository', 'version') != 1:
             raise self.InvalidRepository(path)
             raise self.InvalidRepository(path)
-        self.lock = UpgradableLock(os.path.join(path, 'config'), exclusive)
         self.max_segment_size = self.config.getint('repository', 'max_segment_size')
         self.max_segment_size = self.config.getint('repository', 'max_segment_size')
         self.segments_per_dir = self.config.getint('repository', 'segments_per_dir')
         self.segments_per_dir = self.config.getint('repository', 'segments_per_dir')
         self.id = unhexlify(self.config.get('repository', 'id').strip())
         self.id = unhexlify(self.config.get('repository', 'id').strip())
@@ -138,7 +164,14 @@ class Repository(object):
 
 
     def prepare_txn(self, transaction_id, do_cleanup=True):
     def prepare_txn(self, transaction_id, do_cleanup=True):
         self._active_txn = True
         self._active_txn = True
-        self.lock.upgrade()
+        try:
+            self.lock.upgrade()
+        except UpgradableLock.ExclusiveLockFailed:
+            # if upgrading the lock to exclusive fails, we do not have an
+            # active transaction. this is important for "serve" mode, where
+            # the repository instance lives on - even if exceptions happened.
+            self._active_txn = False
+            raise
         if not self.index:
         if not self.index:
             self.index = self.open_index(transaction_id)
             self.index = self.open_index(transaction_id)
         if transaction_id is None:
         if transaction_id is None:
@@ -241,6 +274,7 @@ class Repository(object):
         the index is consistent with the data stored in the segments.
         the index is consistent with the data stored in the segments.
         """
         """
         error_found = False
         error_found = False
+
         def report_error(msg):
         def report_error(msg):
             nonlocal error_found
             nonlocal error_found
             error_found = True
             error_found = True
@@ -266,8 +300,8 @@ class Repository(object):
                 continue
                 continue
             try:
             try:
                 objects = list(self.io.iter_objects(segment))
                 objects = list(self.io.iter_objects(segment))
-            except (IntegrityError, struct.error):
-                report_error('Error reading segment {}'.format(segment))
+            except IntegrityError as err:
+                report_error('Error reading segment {}: {}'.format(segment, err))
                 objects = []
                 objects = []
                 if repair:
                 if repair:
                     self.io.recover_segment(segment, filename)
                     self.io.recover_segment(segment, filename)
@@ -324,6 +358,11 @@ class Repository(object):
             self.index = self.open_index(self.get_transaction_id())
             self.index = self.open_index(self.get_transaction_id())
         return len(self.index)
         return len(self.index)
 
 
+    def __contains__(self, id):
+        if not self.index:
+            self.index = self.open_index(self.get_transaction_id())
+        return id in self.index
+
     def list(self, limit=None, marker=None):
     def list(self, limit=None, marker=None):
         if not self.index:
         if not self.index:
             self.index = self.open_index(self.get_transaction_id())
             self.index = self.open_index(self.get_transaction_id())
@@ -373,11 +412,11 @@ class Repository(object):
         self.segments.setdefault(segment, 0)
         self.segments.setdefault(segment, 0)
 
 
     def preload(self, ids):
     def preload(self, ids):
-        """Preload objects (only applies to remote repositories
+        """Preload objects (only applies to remote repositories)
         """
         """
 
 
 
 
-class LoggedIO(object):
+class LoggedIO:
 
 
     header_fmt = struct.Struct('<IIB')
     header_fmt = struct.Struct('<IIB')
     assert header_fmt.size == 9
     assert header_fmt.size == 9
@@ -445,7 +484,7 @@ class LoggedIO(object):
         with open(filename, 'rb') as fd:
         with open(filename, 'rb') as fd:
             try:
             try:
                 fd.seek(-self.header_fmt.size, os.SEEK_END)
                 fd.seek(-self.header_fmt.size, os.SEEK_END)
-            except Exception as e:
+            except OSError as e:
                 # return False if segment file is empty or too small
                 # return False if segment file is empty or too small
                 if e.errno == errno.EINVAL:
                 if e.errno == errno.EINVAL:
                     return False
                     return False
@@ -465,7 +504,7 @@ class LoggedIO(object):
                     os.mkdir(dirname)
                     os.mkdir(dirname)
             self._write_fd = open(self.segment_filename(self.segment), 'ab')
             self._write_fd = open(self.segment_filename(self.segment), 'ab')
             self._write_fd.write(MAGIC)
             self._write_fd.write(MAGIC)
-            self.offset = 8
+            self.offset = MAGIC_LEN
         return self._write_fd
         return self._write_fd
 
 
     def get_fd(self, segment):
     def get_fd(self, segment):
@@ -490,19 +529,26 @@ class LoggedIO(object):
     def iter_objects(self, segment, include_data=False):
     def iter_objects(self, segment, include_data=False):
         fd = self.get_fd(segment)
         fd = self.get_fd(segment)
         fd.seek(0)
         fd.seek(0)
-        if fd.read(8) != MAGIC:
-            raise IntegrityError('Invalid segment header')
-        offset = 8
+        if fd.read(MAGIC_LEN) != MAGIC:
+            raise IntegrityError('Invalid segment magic')
+        offset = MAGIC_LEN
         header = fd.read(self.header_fmt.size)
         header = fd.read(self.header_fmt.size)
         while header:
         while header:
-            crc, size, tag = self.header_fmt.unpack(header)
+            try:
+                crc, size, tag = self.header_fmt.unpack(header)
+            except struct.error as err:
+                raise IntegrityError('Invalid segment entry header [offset {}]: {}'.format(offset, err))
             if size > MAX_OBJECT_SIZE:
             if size > MAX_OBJECT_SIZE:
-                raise IntegrityError('Invalid segment object size')
-            rest = fd.read(size - self.header_fmt.size)
+                raise IntegrityError('Invalid segment entry size [offset {}]'.format(offset))
+            length = size - self.header_fmt.size
+            rest = fd.read(length)
+            if len(rest) != length:
+                raise IntegrityError('Segment entry data short read [offset {}]: expected: {}, got {} bytes'.format(
+                                     offset, length, len(rest)))
             if crc32(rest, crc32(memoryview(header)[4:])) & 0xffffffff != crc:
             if crc32(rest, crc32(memoryview(header)[4:])) & 0xffffffff != crc:
-                raise IntegrityError('Segment checksum mismatch')
+                raise IntegrityError('Segment entry checksum mismatch [offset {}]'.format(offset))
             if tag not in (TAG_PUT, TAG_DELETE, TAG_COMMIT):
             if tag not in (TAG_PUT, TAG_DELETE, TAG_COMMIT):
-                raise IntegrityError('Invalid segment entry header')
+                raise IntegrityError('Invalid segment entry tag [offset {}]'.format(offset))
             key = None
             key = None
             if tag in (TAG_PUT, TAG_DELETE):
             if tag in (TAG_PUT, TAG_DELETE):
                 key = rest[:32]
                 key = rest[:32]
@@ -579,6 +625,11 @@ class LoggedIO(object):
         if self._write_fd:
         if self._write_fd:
             self.segment += 1
             self.segment += 1
             self.offset = 0
             self.offset = 0
-            os.fsync(self._write_fd)
+            self._write_fd.flush()
+            os.fsync(self._write_fd.fileno())
+            if hasattr(os, 'posix_fadvise'):  # python >= 3.3, only on UNIX
+                # tell the OS that it does not need to cache what we just wrote,
+                # avoids spoiling the cache for the OS and other processes.
+                os.posix_fadvise(self._write_fd.fileno(), 0, 0, os.POSIX_FADV_DONTNEED)
             self._write_fd.close()
             self._write_fd.close()
             self._write_fd = None
             self._write_fd = None

+ 6 - 33
attic/testsuite/__init__.py → borg/testsuite/__init__.py

@@ -6,8 +6,8 @@ import sys
 import sysconfig
 import sysconfig
 import time
 import time
 import unittest
 import unittest
-from attic.helpers import st_mtime_ns
-from attic.xattr import get_all
+from ..helpers import st_mtime_ns
+from ..xattr import get_all
 
 
 try:
 try:
     import llfuse
     import llfuse
@@ -19,7 +19,7 @@ except ImportError:
 has_lchflags = hasattr(os, 'lchflags')
 has_lchflags = hasattr(os, 'lchflags')
 
 
 
 
-# The mtime get/set precison varies on different OS and Python versions
+# The mtime get/set precision varies on different OS and Python versions
 if 'HAVE_FUTIMENS' in getattr(posix, '_have_functions', []):
 if 'HAVE_FUTIMENS' in getattr(posix, '_have_functions', []):
     st_mtime_ns_round = 0
     st_mtime_ns_round = 0
 elif 'HAVE_UTIMES' in sysconfig.get_config_vars():
 elif 'HAVE_UTIMES' in sysconfig.get_config_vars():
@@ -32,7 +32,7 @@ has_mtime_ns = sys.version >= '3.3'
 utime_supports_fd = os.utime in getattr(os, 'supports_fd', {})
 utime_supports_fd = os.utime in getattr(os, 'supports_fd', {})
 
 
 
 
-class AtticTestCase(unittest.TestCase):
+class BaseTestCase(unittest.TestCase):
     """
     """
     """
     """
     assert_in = unittest.TestCase.assertIn
     assert_in = unittest.TestCase.assertIn
@@ -68,12 +68,12 @@ class AtticTestCase(unittest.TestCase):
             if has_lchflags:
             if has_lchflags:
                 attrs.append('st_flags')
                 attrs.append('st_flags')
             if not fuse or not os.path.isdir(path1):
             if not fuse or not os.path.isdir(path1):
-                # dir nlink is always 1 on our fuse fileystem
+                # dir nlink is always 1 on our fuse filesystem
                 attrs.append('st_nlink')
                 attrs.append('st_nlink')
             d1 = [filename] + [getattr(s1, a) for a in attrs]
             d1 = [filename] + [getattr(s1, a) for a in attrs]
             d2 = [filename] + [getattr(s2, a) for a in attrs]
             d2 = [filename] + [getattr(s2, a) for a in attrs]
             if not os.path.islink(path1) or utime_supports_fd:
             if not os.path.islink(path1) or utime_supports_fd:
-                # Older versions of llfuse does not support ns precision properly
+                # Older versions of llfuse do not support ns precision properly
                 if fuse and not have_fuse_mtime_ns:
                 if fuse and not have_fuse_mtime_ns:
                     d1.append(round(st_mtime_ns(s1), -4))
                     d1.append(round(st_mtime_ns(s1), -4))
                     d2.append(round(st_mtime_ns(s2), -4))
                     d2.append(round(st_mtime_ns(s2), -4))
@@ -94,30 +94,3 @@ class AtticTestCase(unittest.TestCase):
                 return
                 return
             time.sleep(.1)
             time.sleep(.1)
         raise Exception('wait_for_mount(%s) timeout' % path)
         raise Exception('wait_for_mount(%s) timeout' % path)
-
-
-def get_tests(suite):
-    """Generates a sequence of tests from a test suite
-    """
-    for item in suite:
-        try:
-            # TODO: This could be "yield from..." with Python 3.3+
-            for i in get_tests(item):
-                yield i
-        except TypeError:
-            yield item
-
-
-class TestLoader(unittest.TestLoader):
-    """A customzied test loader that properly detects and filters our test cases
-    """
-
-    def loadTestsFromName(self, pattern, module=None):
-        suite = self.discover('attic.testsuite', '*.py')
-        tests = unittest.TestSuite()
-        for test in get_tests(suite):
-            if pattern.lower() in test.id().lower():
-                tests.addTest(test)
-        return tests
-
-

+ 32 - 6
attic/testsuite/archive.py → borg/testsuite/archive.py

@@ -1,7 +1,12 @@
+from datetime import datetime, timezone
+
 import msgpack
 import msgpack
-from attic.testsuite import AtticTestCase
-from attic.archive import CacheChunkBuffer, RobustUnpacker
-from attic.key import PlaintextKey
+from mock import Mock
+
+from ..archive import Archive, CacheChunkBuffer, RobustUnpacker
+from ..key import PlaintextKey
+from ..helpers import Manifest
+from . import BaseTestCase
 
 
 
 
 class MockCache:
 class MockCache:
@@ -14,12 +19,33 @@ class MockCache:
         return id, len(data), len(data)
         return id, len(data), len(data)
 
 
 
 
-class ChunkBufferTestCase(AtticTestCase):
+class ArchiveTimestampTestCase(BaseTestCase):
+
+    def _test_timestamp_parsing(self, isoformat, expected):
+        repository = Mock()
+        key = PlaintextKey(repository)
+        manifest = Manifest(repository, key)
+        a = Archive(repository, key, manifest, 'test', create=True)
+        a.metadata = {b'time': isoformat}
+        self.assert_equal(a.ts, expected)
+
+    def test_with_microseconds(self):
+        self._test_timestamp_parsing(
+            '1970-01-01T00:00:01.000001',
+            datetime(1970, 1, 1, 0, 0, 1, 1, timezone.utc))
+
+    def test_without_microseconds(self):
+        self._test_timestamp_parsing(
+            '1970-01-01T00:00:01',
+            datetime(1970, 1, 1, 0, 0, 1, 0, timezone.utc))
+
+
+class ChunkBufferTestCase(BaseTestCase):
 
 
     def test(self):
     def test(self):
         data = [{b'foo': 1}, {b'bar': 2}]
         data = [{b'foo': 1}, {b'bar': 2}]
         cache = MockCache()
         cache = MockCache()
-        key = PlaintextKey()
+        key = PlaintextKey(None)
         chunks = CacheChunkBuffer(cache, key, None)
         chunks = CacheChunkBuffer(cache, key, None)
         for d in data:
         for d in data:
             chunks.add(d)
             chunks.add(d)
@@ -32,7 +58,7 @@ class ChunkBufferTestCase(AtticTestCase):
         self.assert_equal(data, list(unpacker))
         self.assert_equal(data, list(unpacker))
 
 
 
 
-class RobustUnpackerTestCase(AtticTestCase):
+class RobustUnpackerTestCase(BaseTestCase):
 
 
     def make_chunks(self, items):
     def make_chunks(self, items):
         return b''.join(msgpack.packb({'path': item}) for item in items)
         return b''.join(msgpack.packb({'path': item}) for item in items)

+ 675 - 0
borg/testsuite/archiver.py

@@ -0,0 +1,675 @@
+from binascii import hexlify
+from configparser import RawConfigParser
+import os
+from io import StringIO
+import stat
+import subprocess
+import sys
+import shutil
+import tempfile
+import time
+import unittest
+from hashlib import sha256
+
+from mock import patch
+
+from .. import xattr
+from ..archive import Archive, ChunkBuffer, CHUNK_MAX_EXP
+from ..archiver import Archiver
+from ..cache import Cache
+from ..crypto import bytes_to_long, num_aes_blocks
+from ..helpers import Manifest
+from ..remote import RemoteRepository, PathNotAllowed
+from ..repository import Repository
+from . import BaseTestCase
+
+try:
+    import llfuse
+    has_llfuse = True or llfuse  # avoids "unused import"
+except ImportError:
+    has_llfuse = False
+
+has_lchflags = hasattr(os, 'lchflags')
+
+src_dir = os.path.join(os.getcwd(), os.path.dirname(__file__), '..')
+
+
+class changedir:
+    def __init__(self, dir):
+        self.dir = dir
+
+    def __enter__(self):
+        self.old = os.getcwd()
+        os.chdir(self.dir)
+
+    def __exit__(self, *args, **kw):
+        os.chdir(self.old)
+
+
+class environment_variable:
+    def __init__(self, **values):
+        self.values = values
+        self.old_values = {}
+
+    def __enter__(self):
+        for k, v in self.values.items():
+            self.old_values[k] = os.environ.get(k)
+            os.environ[k] = v
+
+    def __exit__(self, *args, **kw):
+        for k, v in self.old_values.items():
+            if v is not None:
+                os.environ[k] = v
+
+
+class ArchiverTestCaseBase(BaseTestCase):
+
+    prefix = ''
+
+    def setUp(self):
+        os.environ['BORG_CHECK_I_KNOW_WHAT_I_AM_DOING'] = '1'
+        self.archiver = Archiver()
+        self.tmpdir = tempfile.mkdtemp()
+        self.repository_path = os.path.join(self.tmpdir, 'repository')
+        self.repository_location = self.prefix + self.repository_path
+        self.input_path = os.path.join(self.tmpdir, 'input')
+        self.output_path = os.path.join(self.tmpdir, 'output')
+        self.keys_path = os.path.join(self.tmpdir, 'keys')
+        self.cache_path = os.path.join(self.tmpdir, 'cache')
+        self.exclude_file_path = os.path.join(self.tmpdir, 'excludes')
+        os.environ['BORG_KEYS_DIR'] = self.keys_path
+        os.environ['BORG_CACHE_DIR'] = self.cache_path
+        os.mkdir(self.input_path)
+        os.mkdir(self.output_path)
+        os.mkdir(self.keys_path)
+        os.mkdir(self.cache_path)
+        with open(self.exclude_file_path, 'wb') as fd:
+            fd.write(b'input/file2\n# A comment line, then a blank line\n\n')
+        self._old_wd = os.getcwd()
+        os.chdir(self.tmpdir)
+
+    def tearDown(self):
+        shutil.rmtree(self.tmpdir)
+        os.chdir(self._old_wd)
+
+    def cmd(self, *args, **kw):
+        exit_code = kw.get('exit_code', 0)
+        fork = kw.get('fork', False)
+        if fork:
+            try:
+                output = subprocess.check_output((sys.executable, '-m', 'borg.archiver') + args)
+                ret = 0
+            except subprocess.CalledProcessError as e:
+                output = e.output
+                ret = e.returncode
+            output = os.fsdecode(output)
+            if ret != exit_code:
+                print(output)
+            self.assert_equal(exit_code, ret)
+            return output
+        args = list(args)
+        stdin, stdout, stderr = sys.stdin, sys.stdout, sys.stderr
+        try:
+            sys.stdin = StringIO()
+            output = StringIO()
+            sys.stdout = sys.stderr = output
+            ret = self.archiver.run(args)
+            sys.stdin, sys.stdout, sys.stderr = stdin, stdout, stderr
+            if ret != exit_code:
+                print(output.getvalue())
+            self.assert_equal(exit_code, ret)
+            return output.getvalue()
+        finally:
+            sys.stdin, sys.stdout, sys.stderr = stdin, stdout, stderr
+
+    def create_src_archive(self, name):
+        self.cmd('create', self.repository_location + '::' + name, src_dir)
+
+
+class ArchiverTestCase(ArchiverTestCaseBase):
+
+    def create_regular_file(self, name, size=0, contents=None):
+        filename = os.path.join(self.input_path, name)
+        if not os.path.exists(os.path.dirname(filename)):
+            os.makedirs(os.path.dirname(filename))
+        with open(filename, 'wb') as fd:
+            if contents is None:
+                contents = b'X' * size
+            fd.write(contents)
+
+    def create_test_files(self):
+        """Create a minimal test case including all supported file types
+        """
+        # File
+        self.create_regular_file('empty', size=0)
+        # next code line raises OverflowError on 32bit cpu (raspberry pi 2):
+        # 2600-01-01 > 2**64 ns
+        # os.utime('input/empty', (19880895600, 19880895600))
+        # thus, we better test with something not that far in future:
+        # 2038-01-19 (1970 + 2^31 - 1 seconds) is the 32bit "deadline":
+        os.utime('input/empty', (2**31 - 1, 2**31 - 1))
+        self.create_regular_file('file1', size=1024 * 80)
+        self.create_regular_file('flagfile', size=1024)
+        # Directory
+        self.create_regular_file('dir2/file2', size=1024 * 80)
+        # File owner
+        os.chown('input/file1', 100, 200)
+        # File mode
+        os.chmod('input/file1', 0o7755)
+        os.chmod('input/dir2', 0o555)
+        # Block device
+        os.mknod('input/bdev', 0o600 | stat.S_IFBLK, os.makedev(10, 20))
+        # Char device
+        os.mknod('input/cdev', 0o600 | stat.S_IFCHR, os.makedev(30, 40))
+        # Hard link
+        os.link(os.path.join(self.input_path, 'file1'),
+                os.path.join(self.input_path, 'hardlink'))
+        # Symlink
+        os.symlink('somewhere', os.path.join(self.input_path, 'link1'))
+        if xattr.is_enabled(self.input_path):
+            xattr.setxattr(os.path.join(self.input_path, 'file1'), 'user.foo', b'bar')
+            # XXX this always fails for me
+            # ubuntu 14.04, on a TMP dir filesystem with user_xattr, using fakeroot
+            # same for newer ubuntu and centos.
+            # if this is supported just on specific platform, platform should be checked first,
+            # so that the test setup for all tests using it does not fail here always for others.
+            # xattr.setxattr(os.path.join(self.input_path, 'link1'), 'user.foo_symlink', b'bar_symlink', follow_symlinks=False)
+        # FIFO node
+        os.mkfifo(os.path.join(self.input_path, 'fifo1'))
+        if has_lchflags:
+            os.lchflags(os.path.join(self.input_path, 'flagfile'), stat.UF_NODUMP)
+
+    def test_basic_functionality(self):
+        self.create_test_files()
+        self.cmd('init', self.repository_location)
+        self.cmd('create', self.repository_location + '::test', 'input')
+        self.cmd('create', '--stats', self.repository_location + '::test.2', 'input')
+        with changedir('output'):
+            self.cmd('extract', self.repository_location + '::test')
+        self.assert_equal(len(self.cmd('list', self.repository_location).splitlines()), 2)
+        item_count = 10 if has_lchflags else 11  # one file is UF_NODUMP
+        self.assert_equal(len(self.cmd('list', self.repository_location + '::test').splitlines()), item_count)
+        if has_lchflags:
+            # remove the file we did not backup, so input and output become equal
+            os.remove(os.path.join('input', 'flagfile'))
+        self.assert_dirs_equal('input', 'output/input')
+        info_output = self.cmd('info', self.repository_location + '::test')
+        item_count = 3 if has_lchflags else 4  # one file is UF_NODUMP
+        self.assert_in('Number of files: %d' % item_count, info_output)
+        shutil.rmtree(self.cache_path)
+        with environment_variable(BORG_UNKNOWN_UNENCRYPTED_REPO_ACCESS_IS_OK='1'):
+            info_output2 = self.cmd('info', self.repository_location + '::test')
+        # info_output2 starts with some "initializing cache" text but should
+        # end the same way as info_output
+        assert info_output2.endswith(info_output)
+
+    def _extract_repository_id(self, path):
+        return Repository(self.repository_path).id
+
+    def _set_repository_id(self, path, id):
+        config = RawConfigParser()
+        config.read(os.path.join(path, 'config'))
+        config.set('repository', 'id', hexlify(id).decode('ascii'))
+        with open(os.path.join(path, 'config'), 'w') as fd:
+            config.write(fd)
+        return Repository(self.repository_path).id
+
+    def test_sparse_file(self):
+        # no sparse file support on Mac OS X
+        sparse_support = sys.platform != 'darwin'
+        filename = os.path.join(self.input_path, 'sparse')
+        content = b'foobar'
+        hole_size = 5 * (1 << CHUNK_MAX_EXP)  # 5 full chunker buffers
+        with open(filename, 'wb') as fd:
+            # create a file that has a hole at the beginning and end (if the
+            # OS and filesystem supports sparse files)
+            fd.seek(hole_size, 1)
+            fd.write(content)
+            fd.seek(hole_size, 1)
+            pos = fd.tell()
+            fd.truncate(pos)
+        total_len = hole_size + len(content) + hole_size
+        st = os.stat(filename)
+        self.assert_equal(st.st_size, total_len)
+        if sparse_support and hasattr(st, 'st_blocks'):
+            self.assert_true(st.st_blocks * 512 < total_len / 10)  # is input sparse?
+        self.cmd('init', self.repository_location)
+        self.cmd('create', self.repository_location + '::test', 'input')
+        with changedir('output'):
+            self.cmd('extract', '--sparse', self.repository_location + '::test')
+        self.assert_dirs_equal('input', 'output/input')
+        filename = os.path.join(self.output_path, 'input', 'sparse')
+        with open(filename, 'rb') as fd:
+            # check if file contents are as expected
+            self.assert_equal(fd.read(hole_size), b'\0' * hole_size)
+            self.assert_equal(fd.read(len(content)), content)
+            self.assert_equal(fd.read(hole_size), b'\0' * hole_size)
+        st = os.stat(filename)
+        self.assert_equal(st.st_size, total_len)
+        if sparse_support and hasattr(st, 'st_blocks'):
+            self.assert_true(st.st_blocks * 512 < total_len / 10)  # is output sparse?
+
+    def test_unusual_filenames(self):
+        filenames = ['normal', 'with some blanks', '(with_parens)', ]
+        for filename in filenames:
+            filename = os.path.join(self.input_path, filename)
+            with open(filename, 'wb') as fd:
+                pass
+        self.cmd('init', self.repository_location)
+        self.cmd('create', self.repository_location + '::test', 'input')
+        for filename in filenames:
+            with changedir('output'):
+                self.cmd('extract', self.repository_location + '::test', os.path.join('input', filename))
+            assert os.path.exists(os.path.join('output', 'input', filename))
+
+    def test_repository_swap_detection(self):
+        self.create_test_files()
+        os.environ['BORG_PASSPHRASE'] = 'passphrase'
+        self.cmd('init', '--encryption=passphrase', self.repository_location)
+        repository_id = self._extract_repository_id(self.repository_path)
+        self.cmd('create', self.repository_location + '::test', 'input')
+        shutil.rmtree(self.repository_path)
+        self.cmd('init', '--encryption=none', self.repository_location)
+        self._set_repository_id(self.repository_path, repository_id)
+        self.assert_equal(repository_id, self._extract_repository_id(self.repository_path))
+        self.assert_raises(Cache.EncryptionMethodMismatch, lambda: self.cmd('create', self.repository_location + '::test.2', 'input'))
+
+    def test_repository_swap_detection2(self):
+        self.create_test_files()
+        self.cmd('init', '--encryption=none', self.repository_location + '_unencrypted')
+        os.environ['BORG_PASSPHRASE'] = 'passphrase'
+        self.cmd('init', '--encryption=passphrase', self.repository_location + '_encrypted')
+        self.cmd('create', self.repository_location + '_encrypted::test', 'input')
+        shutil.rmtree(self.repository_path + '_encrypted')
+        os.rename(self.repository_path + '_unencrypted', self.repository_path + '_encrypted')
+        self.assert_raises(Cache.RepositoryAccessAborted, lambda: self.cmd('create', self.repository_location + '_encrypted::test.2', 'input'))
+
+    def test_strip_components(self):
+        self.cmd('init', self.repository_location)
+        self.create_regular_file('dir/file')
+        self.cmd('create', self.repository_location + '::test', 'input')
+        with changedir('output'):
+            self.cmd('extract', self.repository_location + '::test', '--strip-components', '3')
+            self.assert_true(not os.path.exists('file'))
+            with self.assert_creates_file('file'):
+                self.cmd('extract', self.repository_location + '::test', '--strip-components', '2')
+            with self.assert_creates_file('dir/file'):
+                self.cmd('extract', self.repository_location + '::test', '--strip-components', '1')
+            with self.assert_creates_file('input/dir/file'):
+                self.cmd('extract', self.repository_location + '::test', '--strip-components', '0')
+
+    def test_extract_include_exclude(self):
+        self.cmd('init', self.repository_location)
+        self.create_regular_file('file1', size=1024 * 80)
+        self.create_regular_file('file2', size=1024 * 80)
+        self.create_regular_file('file3', size=1024 * 80)
+        self.create_regular_file('file4', size=1024 * 80)
+        self.cmd('create', '--exclude=input/file4', self.repository_location + '::test', 'input')
+        with changedir('output'):
+            self.cmd('extract', self.repository_location + '::test', 'input/file1', )
+        self.assert_equal(sorted(os.listdir('output/input')), ['file1'])
+        with changedir('output'):
+            self.cmd('extract', '--exclude=input/file2', self.repository_location + '::test')
+        self.assert_equal(sorted(os.listdir('output/input')), ['file1', 'file3'])
+        with changedir('output'):
+            self.cmd('extract', '--exclude-from=' + self.exclude_file_path, self.repository_location + '::test')
+        self.assert_equal(sorted(os.listdir('output/input')), ['file1', 'file3'])
+
+    def test_exclude_caches(self):
+        self.cmd('init', self.repository_location)
+        self.create_regular_file('file1', size=1024 * 80)
+        self.create_regular_file('cache1/CACHEDIR.TAG', contents=b'Signature: 8a477f597d28d172789f06886806bc55 extra stuff')
+        self.create_regular_file('cache2/CACHEDIR.TAG', contents=b'invalid signature')
+        self.cmd('create', '--exclude-caches', self.repository_location + '::test', 'input')
+        with changedir('output'):
+            self.cmd('extract', self.repository_location + '::test')
+        self.assert_equal(sorted(os.listdir('output/input')), ['cache2', 'file1'])
+        self.assert_equal(sorted(os.listdir('output/input/cache2')), ['CACHEDIR.TAG'])
+
+    def test_path_normalization(self):
+        self.cmd('init', self.repository_location)
+        self.create_regular_file('dir1/dir2/file', size=1024 * 80)
+        with changedir('input/dir1/dir2'):
+            self.cmd('create', self.repository_location + '::test', '../../../input/dir1/../dir1/dir2/..')
+        output = self.cmd('list', self.repository_location + '::test')
+        self.assert_not_in('..', output)
+        self.assert_in(' input/dir1/dir2/file', output)
+
+    def test_exclude_normalization(self):
+        self.cmd('init', self.repository_location)
+        self.create_regular_file('file1', size=1024 * 80)
+        self.create_regular_file('file2', size=1024 * 80)
+        with changedir('input'):
+            self.cmd('create', '--exclude=file1', self.repository_location + '::test1', '.')
+        with changedir('output'):
+            self.cmd('extract', self.repository_location + '::test1')
+        self.assert_equal(sorted(os.listdir('output')), ['file2'])
+        with changedir('input'):
+            self.cmd('create', '--exclude=./file1', self.repository_location + '::test2', '.')
+        with changedir('output'):
+            self.cmd('extract', self.repository_location + '::test2')
+        self.assert_equal(sorted(os.listdir('output')), ['file2'])
+        self.cmd('create', '--exclude=input/./file1', self.repository_location + '::test3', 'input')
+        with changedir('output'):
+            self.cmd('extract', self.repository_location + '::test3')
+        self.assert_equal(sorted(os.listdir('output/input')), ['file2'])
+
+    def test_repeated_files(self):
+        self.create_regular_file('file1', size=1024 * 80)
+        self.cmd('init', self.repository_location)
+        self.cmd('create', self.repository_location + '::test', 'input', 'input')
+
+    def test_overwrite(self):
+        self.create_regular_file('file1', size=1024 * 80)
+        self.create_regular_file('dir2/file2', size=1024 * 80)
+        self.cmd('init', self.repository_location)
+        self.cmd('create', self.repository_location + '::test', 'input')
+        # Overwriting regular files and directories should be supported
+        os.mkdir('output/input')
+        os.mkdir('output/input/file1')
+        os.mkdir('output/input/dir2')
+        with changedir('output'):
+            self.cmd('extract', self.repository_location + '::test')
+        self.assert_dirs_equal('input', 'output/input')
+        # But non-empty dirs should fail
+        os.unlink('output/input/file1')
+        os.mkdir('output/input/file1')
+        os.mkdir('output/input/file1/dir')
+        with changedir('output'):
+            self.cmd('extract', self.repository_location + '::test', exit_code=1)
+
+    def test_rename(self):
+        self.create_regular_file('file1', size=1024 * 80)
+        self.create_regular_file('dir2/file2', size=1024 * 80)
+        self.cmd('init', self.repository_location)
+        self.cmd('create', self.repository_location + '::test', 'input')
+        self.cmd('create', self.repository_location + '::test.2', 'input')
+        self.cmd('extract', '--dry-run', self.repository_location + '::test')
+        self.cmd('extract', '--dry-run', self.repository_location + '::test.2')
+        self.cmd('rename', self.repository_location + '::test', 'test.3')
+        self.cmd('extract', '--dry-run', self.repository_location + '::test.2')
+        self.cmd('rename', self.repository_location + '::test.2', 'test.4')
+        self.cmd('extract', '--dry-run', self.repository_location + '::test.3')
+        self.cmd('extract', '--dry-run', self.repository_location + '::test.4')
+        # Make sure both archives have been renamed
+        repository = Repository(self.repository_path)
+        manifest, key = Manifest.load(repository)
+        self.assert_equal(len(manifest.archives), 2)
+        self.assert_in('test.3', manifest.archives)
+        self.assert_in('test.4', manifest.archives)
+
+    def test_delete(self):
+        self.create_regular_file('file1', size=1024 * 80)
+        self.create_regular_file('dir2/file2', size=1024 * 80)
+        self.cmd('init', self.repository_location)
+        self.cmd('create', self.repository_location + '::test', 'input')
+        self.cmd('create', self.repository_location + '::test.2', 'input')
+        self.cmd('extract', '--dry-run', self.repository_location + '::test')
+        self.cmd('extract', '--dry-run', self.repository_location + '::test.2')
+        self.cmd('delete', self.repository_location + '::test')
+        self.cmd('extract', '--dry-run', self.repository_location + '::test.2')
+        self.cmd('delete', '--stats', self.repository_location + '::test.2')
+        # Make sure all data except the manifest has been deleted
+        repository = Repository(self.repository_path)
+        self.assert_equal(len(repository), 1)
+
+    def test_delete_repo(self):
+        self.create_regular_file('file1', size=1024 * 80)
+        self.create_regular_file('dir2/file2', size=1024 * 80)
+        self.cmd('init', self.repository_location)
+        self.cmd('create', self.repository_location + '::test', 'input')
+        self.cmd('create', self.repository_location + '::test.2', 'input')
+        self.cmd('delete', self.repository_location)
+        # Make sure the repo is gone
+        self.assertFalse(os.path.exists(self.repository_path))
+
+    def test_corrupted_repository(self):
+        self.cmd('init', self.repository_location)
+        self.create_src_archive('test')
+        self.cmd('extract', '--dry-run', self.repository_location + '::test')
+        self.cmd('check', self.repository_location)
+        name = sorted(os.listdir(os.path.join(self.tmpdir, 'repository', 'data', '0')), reverse=True)[0]
+        with open(os.path.join(self.tmpdir, 'repository', 'data', '0', name), 'r+b') as fd:
+            fd.seek(100)
+            fd.write(b'XXXX')
+        self.cmd('check', self.repository_location, exit_code=1)
+
+    def test_readonly_repository(self):
+        self.cmd('init', self.repository_location)
+        self.create_src_archive('test')
+        os.system('chmod -R ugo-w ' + self.repository_path)
+        try:
+            self.cmd('extract', '--dry-run', self.repository_location + '::test')
+        finally:
+            # Restore permissions so shutil.rmtree is able to delete it
+            os.system('chmod -R u+w ' + self.repository_path)
+
+    def test_umask(self):
+        self.create_regular_file('file1', size=1024 * 80)
+        self.cmd('init', self.repository_location)
+        self.cmd('create', self.repository_location + '::test', 'input')
+        mode = os.stat(self.repository_path).st_mode
+        self.assertEqual(stat.S_IMODE(mode), 0o700)
+
+    def test_cmdline_compatibility(self):
+        self.create_regular_file('file1', size=1024 * 80)
+        self.cmd('init', self.repository_location)
+        self.cmd('create', self.repository_location + '::test', 'input')
+        output = self.cmd('verify', '-v', self.repository_location + '::test')
+        self.assert_in('"borg verify" has been deprecated', output)
+        output = self.cmd('prune', self.repository_location, '--hourly=1')
+        self.assert_in('"--hourly" has been deprecated. Use "--keep-hourly" instead', output)
+
+    def test_prune_repository(self):
+        self.cmd('init', self.repository_location)
+        self.cmd('create', self.repository_location + '::test1', src_dir)
+        self.cmd('create', self.repository_location + '::test2', src_dir)
+        output = self.cmd('prune', '-v', '--dry-run', self.repository_location, '--keep-daily=2')
+        self.assert_in('Keeping archive: test2', output)
+        self.assert_in('Would prune:     test1', output)
+        output = self.cmd('list', self.repository_location)
+        self.assert_in('test1', output)
+        self.assert_in('test2', output)
+        self.cmd('prune', self.repository_location, '--keep-daily=2')
+        output = self.cmd('list', self.repository_location)
+        self.assert_not_in('test1', output)
+        self.assert_in('test2', output)
+
+    def test_prune_repository_prefix(self):
+        self.cmd('init', self.repository_location)
+        self.cmd('create', self.repository_location + '::foo-2015-08-12-10:00', src_dir)
+        self.cmd('create', self.repository_location + '::foo-2015-08-12-20:00', src_dir)
+        self.cmd('create', self.repository_location + '::bar-2015-08-12-10:00', src_dir)
+        self.cmd('create', self.repository_location + '::bar-2015-08-12-20:00', src_dir)
+        output = self.cmd('prune', '-v', '--dry-run', self.repository_location, '--keep-daily=2', '--prefix=foo-')
+        self.assert_in('Keeping archive: foo-2015-08-12-20:00', output)
+        self.assert_in('Would prune:     foo-2015-08-12-10:00', output)
+        output = self.cmd('list', self.repository_location)
+        self.assert_in('foo-2015-08-12-10:00', output)
+        self.assert_in('foo-2015-08-12-20:00', output)
+        self.assert_in('bar-2015-08-12-10:00', output)
+        self.assert_in('bar-2015-08-12-20:00', output)
+        self.cmd('prune', self.repository_location, '--keep-daily=2', '--prefix=foo-')
+        output = self.cmd('list', self.repository_location)
+        self.assert_not_in('foo-2015-08-12-10:00', output)
+        self.assert_in('foo-2015-08-12-20:00', output)
+        self.assert_in('bar-2015-08-12-10:00', output)
+        self.assert_in('bar-2015-08-12-20:00', output)
+
+    def test_usage(self):
+        self.assert_raises(SystemExit, lambda: self.cmd())
+        self.assert_raises(SystemExit, lambda: self.cmd('-h'))
+
+    def test_help(self):
+        assert 'Borg' in self.cmd('help')
+        assert 'patterns' in self.cmd('help', 'patterns')
+        assert 'Initialize' in self.cmd('help', 'init')
+        assert 'positional arguments' not in self.cmd('help', 'init', '--epilog-only')
+        assert 'This command initializes' not in self.cmd('help', 'init', '--usage-only')
+
+    @unittest.skipUnless(has_llfuse, 'llfuse not installed')
+    def test_fuse_mount_repository(self):
+        mountpoint = os.path.join(self.tmpdir, 'mountpoint')
+        os.mkdir(mountpoint)
+        self.cmd('init', self.repository_location)
+        self.create_test_files()
+        self.cmd('create', self.repository_location + '::archive', 'input')
+        self.cmd('create', self.repository_location + '::archive2', 'input')
+        try:
+            self.cmd('mount', self.repository_location, mountpoint, fork=True)
+            self.wait_for_mount(mountpoint)
+            self.assert_dirs_equal(self.input_path, os.path.join(mountpoint, 'archive', 'input'))
+            self.assert_dirs_equal(self.input_path, os.path.join(mountpoint, 'archive2', 'input'))
+        finally:
+            if sys.platform.startswith('linux'):
+                os.system('fusermount -u ' + mountpoint)
+            else:
+                os.system('umount ' + mountpoint)
+            os.rmdir(mountpoint)
+            # Give the daemon some time to exit
+            time.sleep(.2)
+
+    @unittest.skipUnless(has_llfuse, 'llfuse not installed')
+    def test_fuse_mount_archive(self):
+        mountpoint = os.path.join(self.tmpdir, 'mountpoint')
+        os.mkdir(mountpoint)
+        self.cmd('init', self.repository_location)
+        self.create_test_files()
+        self.cmd('create', self.repository_location + '::archive', 'input')
+        try:
+            self.cmd('mount', self.repository_location + '::archive', mountpoint, fork=True)
+            self.wait_for_mount(mountpoint)
+            self.assert_dirs_equal(self.input_path, os.path.join(mountpoint, 'input'))
+        finally:
+            if sys.platform.startswith('linux'):
+                os.system('fusermount -u ' + mountpoint)
+            else:
+                os.system('umount ' + mountpoint)
+            os.rmdir(mountpoint)
+            # Give the daemon some time to exit
+            time.sleep(.2)
+
+    def verify_aes_counter_uniqueness(self, method):
+        seen = set()  # Chunks already seen
+        used = set()  # counter values already used
+
+        def verify_uniqueness():
+            repository = Repository(self.repository_path)
+            for key, _ in repository.open_index(repository.get_transaction_id()).iteritems():
+                data = repository.get(key)
+                hash = sha256(data).digest()
+                if hash not in seen:
+                    seen.add(hash)
+                    num_blocks = num_aes_blocks(len(data) - 41)
+                    nonce = bytes_to_long(data[33:41])
+                    for counter in range(nonce, nonce + num_blocks):
+                        self.assert_not_in(counter, used)
+                        used.add(counter)
+
+        self.create_test_files()
+        os.environ['BORG_PASSPHRASE'] = 'passphrase'
+        self.cmd('init', '--encryption=' + method, self.repository_location)
+        verify_uniqueness()
+        self.cmd('create', self.repository_location + '::test', 'input')
+        verify_uniqueness()
+        self.cmd('create', self.repository_location + '::test.2', 'input')
+        verify_uniqueness()
+        self.cmd('delete', self.repository_location + '::test.2')
+        verify_uniqueness()
+        self.assert_equal(used, set(range(len(used))))
+
+    def test_aes_counter_uniqueness_keyfile(self):
+        self.verify_aes_counter_uniqueness('keyfile')
+
+    def test_aes_counter_uniqueness_passphrase(self):
+        self.verify_aes_counter_uniqueness('passphrase')
+
+
+class ArchiverCheckTestCase(ArchiverTestCaseBase):
+
+    def setUp(self):
+        super().setUp()
+        with patch.object(ChunkBuffer, 'BUFFER_SIZE', 10):
+            self.cmd('init', self.repository_location)
+            self.create_src_archive('archive1')
+            self.create_src_archive('archive2')
+
+    def open_archive(self, name):
+        repository = Repository(self.repository_path)
+        manifest, key = Manifest.load(repository)
+        archive = Archive(repository, key, manifest, name)
+        return archive, repository
+
+    def test_check_usage(self):
+        output = self.cmd('check', self.repository_location, exit_code=0)
+        self.assert_in('Starting repository check', output)
+        self.assert_in('Starting archive consistency check', output)
+        output = self.cmd('check', '--repository-only', self.repository_location, exit_code=0)
+        self.assert_in('Starting repository check', output)
+        self.assert_not_in('Starting archive consistency check', output)
+        output = self.cmd('check', '--archives-only', self.repository_location, exit_code=0)
+        self.assert_not_in('Starting repository check', output)
+        self.assert_in('Starting archive consistency check', output)
+
+    def test_missing_file_chunk(self):
+        archive, repository = self.open_archive('archive1')
+        for item in archive.iter_items():
+            if item[b'path'].endswith('testsuite/archiver.py'):
+                repository.delete(item[b'chunks'][-1][0])
+                break
+        repository.commit()
+        self.cmd('check', self.repository_location, exit_code=1)
+        self.cmd('check', '--repair', self.repository_location, exit_code=0)
+        self.cmd('check', self.repository_location, exit_code=0)
+
+    def test_missing_archive_item_chunk(self):
+        archive, repository = self.open_archive('archive1')
+        repository.delete(archive.metadata[b'items'][-5])
+        repository.commit()
+        self.cmd('check', self.repository_location, exit_code=1)
+        self.cmd('check', '--repair', self.repository_location, exit_code=0)
+        self.cmd('check', self.repository_location, exit_code=0)
+
+    def test_missing_archive_metadata(self):
+        archive, repository = self.open_archive('archive1')
+        repository.delete(archive.id)
+        repository.commit()
+        self.cmd('check', self.repository_location, exit_code=1)
+        self.cmd('check', '--repair', self.repository_location, exit_code=0)
+        self.cmd('check', self.repository_location, exit_code=0)
+
+    def test_missing_manifest(self):
+        archive, repository = self.open_archive('archive1')
+        repository.delete(Manifest.MANIFEST_ID)
+        repository.commit()
+        self.cmd('check', self.repository_location, exit_code=1)
+        output = self.cmd('check', '--repair', self.repository_location, exit_code=0)
+        self.assert_in('archive1', output)
+        self.assert_in('archive2', output)
+        self.cmd('check', self.repository_location, exit_code=0)
+
+    def test_extra_chunks(self):
+        self.cmd('check', self.repository_location, exit_code=0)
+        repository = Repository(self.repository_location)
+        repository.put(b'01234567890123456789012345678901', b'xxxx')
+        repository.commit()
+        repository.close()
+        self.cmd('check', self.repository_location, exit_code=1)
+        self.cmd('check', self.repository_location, exit_code=1)
+        self.cmd('check', '--repair', self.repository_location, exit_code=0)
+        self.cmd('check', self.repository_location, exit_code=0)
+        self.cmd('extract', '--dry-run', self.repository_location + '::archive1', exit_code=0)
+
+
+class RemoteArchiverTestCase(ArchiverTestCase):
+    prefix = '__testsuite__:'
+
+    def test_remote_repo_restrict_to_path(self):
+        self.cmd('init', self.repository_location)
+        path_prefix = os.path.dirname(self.repository_path)
+        with patch.object(RemoteRepository, 'extra_test_args', ['--restrict-to-path', '/foo']):
+            self.assert_raises(PathNotAllowed, lambda: self.cmd('init', self.repository_location + '_1'))
+        with patch.object(RemoteRepository, 'extra_test_args', ['--restrict-to-path', path_prefix]):
+            self.cmd('init', self.repository_location + '_2')
+        with patch.object(RemoteRepository, 'extra_test_args', ['--restrict-to-path', '/foo', '--restrict-to-path', path_prefix]):
+            self.cmd('init', self.repository_location + '_3')

+ 31 - 0
borg/testsuite/chunker.py

@@ -0,0 +1,31 @@
+from io import BytesIO
+
+from ..chunker import Chunker, buzhash, buzhash_update
+from ..archive import CHUNK_MAX_EXP
+from . import BaseTestCase
+
+
+class ChunkerTestCase(BaseTestCase):
+
+    def test_chunkify(self):
+        data = b'0' * int(1.5 * (1 << CHUNK_MAX_EXP)) + b'Y'
+        parts = [bytes(c) for c in Chunker(0, 1, CHUNK_MAX_EXP, 2, 2).chunkify(BytesIO(data))]
+        self.assert_equal(len(parts), 2)
+        self.assert_equal(b''.join(parts), data)
+        self.assert_equal([bytes(c) for c in Chunker(0, 1, CHUNK_MAX_EXP, 2, 2).chunkify(BytesIO(b''))], [])
+        self.assert_equal([bytes(c) for c in Chunker(0, 1, CHUNK_MAX_EXP, 2, 2).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'fooba', b'rboobaz', b'fooba', b'rboobaz', b'fooba', b'rboobaz'])
+        self.assert_equal([bytes(c) for c in Chunker(1, 1, CHUNK_MAX_EXP, 2, 2).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'fo', b'obarb', b'oob', b'azf', b'oobarb', b'oob', b'azf', b'oobarb', b'oobaz'])
+        self.assert_equal([bytes(c) for c in Chunker(2, 1, CHUNK_MAX_EXP, 2, 2).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foob', b'ar', b'boobazfoob', b'ar', b'boobazfoob', b'ar', b'boobaz'])
+        self.assert_equal([bytes(c) for c in Chunker(0, 2, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foobarboobaz' * 3])
+        self.assert_equal([bytes(c) for c in Chunker(1, 2, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foobar', b'boobazfo', b'obar', b'boobazfo', b'obar', b'boobaz'])
+        self.assert_equal([bytes(c) for c in Chunker(2, 2, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foob', b'arboobaz', b'foob', b'arboobaz', b'foob', b'arboobaz'])
+        self.assert_equal([bytes(c) for c in Chunker(0, 3, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foobarboobaz' * 3])
+        self.assert_equal([bytes(c) for c in Chunker(1, 3, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foobarbo', b'obazfoobar', b'boobazfo', b'obarboobaz'])
+        self.assert_equal([bytes(c) for c in Chunker(2, 3, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foobarboobaz', b'foobarboobaz', b'foobarboobaz'])
+
+    def test_buzhash(self):
+        self.assert_equal(buzhash(b'abcdefghijklmnop', 0), 3795437769)
+        self.assert_equal(buzhash(b'abcdefghijklmnop', 1), 3795400502)
+        self.assert_equal(buzhash(b'abcdefghijklmnop', 1), buzhash_update(buzhash(b'Xabcdefghijklmno', 1), ord('X'), ord('p'), 16, 1))
+        # Test with more than 31 bytes to make sure our barrel_shift macro works correctly
+        self.assert_equal(buzhash(b'abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz', 0), 566521248)

+ 15 - 10
attic/testsuite/crypto.py → borg/testsuite/crypto.py

@@ -1,9 +1,10 @@
 from binascii import hexlify
 from binascii import hexlify
-from attic.testsuite import AtticTestCase
-from attic.crypto import AES, bytes_to_long, bytes_to_int, long_to_bytes, pbkdf2_sha256, get_random_bytes
 
 
+from ..crypto import AES, bytes_to_long, bytes_to_int, long_to_bytes, pbkdf2_sha256, get_random_bytes
+from . import BaseTestCase
 
 
-class CryptoTestCase(AtticTestCase):
+
+class CryptoTestCase(BaseTestCase):
 
 
     def test_bytes_to_int(self):
     def test_bytes_to_int(self):
         self.assert_equal(bytes_to_int(b'\0\0\0\1'), 1)
         self.assert_equal(bytes_to_int(b'\0\0\0\1'), 1)
@@ -14,11 +15,11 @@ class CryptoTestCase(AtticTestCase):
 
 
     def test_pbkdf2_sha256(self):
     def test_pbkdf2_sha256(self):
         self.assert_equal(hexlify(pbkdf2_sha256(b'password', b'salt', 1, 32)),
         self.assert_equal(hexlify(pbkdf2_sha256(b'password', b'salt', 1, 32)),
-                         b'120fb6cffcf8b32c43e7225256c4f837a86548c92ccc35480805987cb70be17b')
+                          b'120fb6cffcf8b32c43e7225256c4f837a86548c92ccc35480805987cb70be17b')
         self.assert_equal(hexlify(pbkdf2_sha256(b'password', b'salt', 2, 32)),
         self.assert_equal(hexlify(pbkdf2_sha256(b'password', b'salt', 2, 32)),
-                         b'ae4d0c95af6b46d32d0adff928f06dd02a303f8ef3c251dfd6e2d85a95474c43')
+                          b'ae4d0c95af6b46d32d0adff928f06dd02a303f8ef3c251dfd6e2d85a95474c43')
         self.assert_equal(hexlify(pbkdf2_sha256(b'password', b'salt', 4096, 32)),
         self.assert_equal(hexlify(pbkdf2_sha256(b'password', b'salt', 4096, 32)),
-                         b'c5e478d59288c841aa530db6845c4c8d962893a001ce4e11a4963873aa98134a')
+                          b'c5e478d59288c841aa530db6845c4c8d962893a001ce4e11a4963873aa98134a')
 
 
     def test_get_random_bytes(self):
     def test_get_random_bytes(self):
         bytes = get_random_bytes(10)
         bytes = get_random_bytes(10)
@@ -30,11 +31,15 @@ class CryptoTestCase(AtticTestCase):
     def test_aes(self):
     def test_aes(self):
         key = b'X' * 32
         key = b'X' * 32
         data = b'foo' * 10
         data = b'foo' * 10
-        aes = AES(key)
+        # encrypt
+        aes = AES(is_encrypt=True, key=key)
         self.assert_equal(bytes_to_long(aes.iv, 8), 0)
         self.assert_equal(bytes_to_long(aes.iv, 8), 0)
         cdata = aes.encrypt(data)
         cdata = aes.encrypt(data)
         self.assert_equal(hexlify(cdata), b'c6efb702de12498f34a2c2bbc8149e759996d08bf6dc5c610aefc0c3a466')
         self.assert_equal(hexlify(cdata), b'c6efb702de12498f34a2c2bbc8149e759996d08bf6dc5c610aefc0c3a466')
         self.assert_equal(bytes_to_long(aes.iv, 8), 2)
         self.assert_equal(bytes_to_long(aes.iv, 8), 2)
-        self.assert_not_equal(data, aes.decrypt(cdata))
-        aes.reset(iv=b'\0' * 16)
-        self.assert_equal(data, aes.decrypt(cdata))
+        # decrypt
+        aes = AES(is_encrypt=False, key=key)
+        self.assert_equal(bytes_to_long(aes.iv, 8), 0)
+        pdata = aes.decrypt(cdata)
+        self.assert_equal(data, pdata)
+        self.assert_equal(bytes_to_long(aes.iv, 8), 2)

+ 30 - 5
attic/testsuite/hashindex.py → borg/testsuite/hashindex.py

@@ -1,11 +1,17 @@
 import hashlib
 import hashlib
 import os
 import os
 import tempfile
 import tempfile
-from attic.hashindex import NSIndex, ChunkIndex
-from attic.testsuite import AtticTestCase
 
 
+from ..hashindex import NSIndex, ChunkIndex
+from . import BaseTestCase
 
 
-class HashIndexTestCase(AtticTestCase):
+
+def H(x):
+    # make some 32byte long thing that depends on x
+    return bytes('%-0.32d' % x, 'ascii')
+
+
+class HashIndexTestCase(BaseTestCase):
 
 
     def _generic_test(self, cls, make_value, sha):
     def _generic_test(self, cls, make_value, sha):
         idx = cls()
         idx = cls()
@@ -44,10 +50,12 @@ class HashIndexTestCase(AtticTestCase):
         self.assert_equal(len(cls.read(idx_name.name)), 0)
         self.assert_equal(len(cls.read(idx_name.name)), 0)
 
 
     def test_nsindex(self):
     def test_nsindex(self):
-        self._generic_test(NSIndex, lambda x: (x, x), '369a18ae6a52524eb2884a3c0fdc2824947edd017a2688c5d4d7b3510c245ab9')
+        self._generic_test(NSIndex, lambda x: (x, x),
+                           '861d6d60069ea45e39d36bed2bdc1d0c07981e0641955f897ac6848be429abac')
 
 
     def test_chunkindex(self):
     def test_chunkindex(self):
-        self._generic_test(ChunkIndex, lambda x: (x, x, x), 'ed22e8a883400453c0ee79a06c54df72c994a54eeefdc6c0989efdc5ee6d07b7')
+        self._generic_test(ChunkIndex, lambda x: (x, x, x),
+                           '69464bd0ebbc5866b9f95d838bc48617d21bfe3dcf294682a5c21a2ef6b9dc0b')
 
 
     def test_resize(self):
     def test_resize(self):
         n = 2000  # Must be >= MIN_BUCKETS
         n = 2000  # Must be >= MIN_BUCKETS
@@ -75,3 +83,20 @@ class HashIndexTestCase(AtticTestCase):
         second_half = list(idx.iteritems(marker=all[49][0]))
         second_half = list(idx.iteritems(marker=all[49][0]))
         self.assert_equal(len(second_half), 50)
         self.assert_equal(len(second_half), 50)
         self.assert_equal(second_half, all[50:])
         self.assert_equal(second_half, all[50:])
+
+    def test_chunkindex_merge(self):
+        idx1 = ChunkIndex()
+        idx1[H(1)] = 1, 100, 100
+        idx1[H(2)] = 2, 200, 200
+        idx1[H(3)] = 3, 300, 300
+        # no H(4) entry
+        idx2 = ChunkIndex()
+        idx2[H(1)] = 4, 100, 100
+        idx2[H(2)] = 5, 200, 200
+        # no H(3) entry
+        idx2[H(4)] = 6, 400, 400
+        idx1.merge(idx2)
+        assert idx1[H(1)] == (5, 100, 100)
+        assert idx1[H(2)] == (7, 200, 200)
+        assert idx1[H(3)] == (3, 300, 300)
+        assert idx1[H(4)] == (6, 400, 400)

+ 31 - 48
attic/testsuite/helpers.py → borg/testsuite/helpers.py

@@ -1,16 +1,16 @@
 import hashlib
 import hashlib
 from time import mktime, strptime
 from time import mktime, strptime
 from datetime import datetime, timezone, timedelta
 from datetime import datetime, timezone, timedelta
-import os
-import tempfile
-import unittest
-from attic.helpers import adjust_patterns, exclude_path, Location, format_timedelta, IncludePattern, ExcludePattern, make_path_safe, UpgradableLock, prune_within, prune_split, to_localtime, \
-    StableDict, int_to_bigint, bigint_to_int, parse_timestamp
-from attic.testsuite import AtticTestCase
+
 import msgpack
 import msgpack
 
 
+from ..helpers import adjust_patterns, exclude_path, Location, format_timedelta, ExcludePattern, make_path_safe, \
+    prune_within, prune_split, \
+    StableDict, int_to_bigint, bigint_to_int, parse_timestamp
+from . import BaseTestCase
 
 
-class BigIntTestCase(AtticTestCase):
+
+class BigIntTestCase(BaseTestCase):
 
 
     def test_bigint(self):
     def test_bigint(self):
         self.assert_equal(int_to_bigint(0), 0)
         self.assert_equal(int_to_bigint(0), 0)
@@ -22,7 +22,7 @@ class BigIntTestCase(AtticTestCase):
         self.assert_equal(bigint_to_int(int_to_bigint(2**70)), 2**70)
         self.assert_equal(bigint_to_int(int_to_bigint(2**70)), 2**70)
 
 
 
 
-class LocationTestCase(AtticTestCase):
+class LocationTestCase(BaseTestCase):
 
 
     def test(self):
     def test(self):
         self.assert_equal(
         self.assert_equal(
@@ -38,8 +38,8 @@ class LocationTestCase(AtticTestCase):
             "Location(proto='ssh', user='user', host='host', port=None, path='/some/path', archive='archive')"
             "Location(proto='ssh', user='user', host='host', port=None, path='/some/path', archive='archive')"
         )
         )
         self.assert_equal(
         self.assert_equal(
-            repr(Location('mybackup.attic::archive')),
-            "Location(proto='file', user=None, host=None, port=None, path='mybackup.attic', archive='archive')"
+            repr(Location('path::archive')),
+            "Location(proto='file', user=None, host=None, port=None, path='path', archive='archive')"
         )
         )
         self.assert_equal(
         self.assert_equal(
             repr(Location('/some/absolute/path::archive')),
             repr(Location('/some/absolute/path::archive')),
@@ -60,7 +60,7 @@ class LocationTestCase(AtticTestCase):
                               Location(Location(location).canonical_path()).canonical_path())
                               Location(Location(location).canonical_path()).canonical_path())
 
 
 
 
-class FormatTimedeltaTestCase(AtticTestCase):
+class FormatTimedeltaTestCase(BaseTestCase):
 
 
     def test(self):
     def test(self):
         t0 = datetime(2001, 1, 1, 10, 20, 3, 0)
         t0 = datetime(2001, 1, 1, 10, 20, 3, 0)
@@ -71,7 +71,7 @@ class FormatTimedeltaTestCase(AtticTestCase):
         )
         )
 
 
 
 
-class PatternTestCase(AtticTestCase):
+class PatternTestCase(BaseTestCase):
 
 
     files = [
     files = [
         '/etc/passwd', '/etc/hosts', '/home',
         '/etc/passwd', '/etc/hosts', '/home',
@@ -94,7 +94,7 @@ class PatternTestCase(AtticTestCase):
                           ['/etc/passwd', '/etc/hosts', '/home', '/var/log/messages', '/var/log/dmesg'])
                           ['/etc/passwd', '/etc/hosts', '/home', '/var/log/messages', '/var/log/dmesg'])
         self.assert_equal(self.evaluate(['/home/u'], []), [])
         self.assert_equal(self.evaluate(['/home/u'], []), [])
         self.assert_equal(self.evaluate(['/', '/home', '/etc/hosts'], ['/']), [])
         self.assert_equal(self.evaluate(['/', '/home', '/etc/hosts'], ['/']), [])
-        self.assert_equal(self.evaluate(['/home/'], ['/home/user2']), 
+        self.assert_equal(self.evaluate(['/home/'], ['/home/user2']),
                           ['/home', '/home/user/.profile', '/home/user/.bashrc'])
                           ['/home', '/home/user/.profile', '/home/user/.bashrc'])
         self.assert_equal(self.evaluate(['/'], ['*.profile', '/var/log']),
         self.assert_equal(self.evaluate(['/'], ['*.profile', '/var/log']),
                           ['/etc/passwd', '/etc/hosts', '/home', '/home/user/.bashrc', '/home/user2/public_html/index.html'])
                           ['/etc/passwd', '/etc/hosts', '/home', '/home/user/.bashrc', '/home/user2/public_html/index.html'])
@@ -104,7 +104,7 @@ class PatternTestCase(AtticTestCase):
                           ['/etc/passwd', '/etc/hosts', '/var/log/messages', '/var/log/dmesg'])
                           ['/etc/passwd', '/etc/hosts', '/var/log/messages', '/var/log/dmesg'])
 
 
 
 
-class MakePathSafeTestCase(AtticTestCase):
+class MakePathSafeTestCase(BaseTestCase):
 
 
     def test(self):
     def test(self):
         self.assert_equal(make_path_safe('/foo/bar'), 'foo/bar')
         self.assert_equal(make_path_safe('/foo/bar'), 'foo/bar')
@@ -116,25 +116,8 @@ class MakePathSafeTestCase(AtticTestCase):
         self.assert_equal(make_path_safe('/'), '.')
         self.assert_equal(make_path_safe('/'), '.')
         self.assert_equal(make_path_safe('/'), '.')
         self.assert_equal(make_path_safe('/'), '.')
 
 
-class UpgradableLockTestCase(AtticTestCase):
-
-    def test(self):
-        file = tempfile.NamedTemporaryFile()
-        lock = UpgradableLock(file.name)
-        lock.upgrade()
-        lock.upgrade()
-        lock.release()
-
-    @unittest.skipIf(os.getuid() == 0, 'Root can always open files for writing')
-    def test_read_only_lock_file(self):
-        file = tempfile.NamedTemporaryFile()
-        os.chmod(file.name, 0o444)
-        lock = UpgradableLock(file.name)
-        self.assert_raises(UpgradableLock.WriteLockFailed, lock.upgrade)
-        lock.release()
-
 
 
-class MockArchive(object):
+class MockArchive:
 
 
     def __init__(self, ts):
     def __init__(self, ts):
         self.ts = ts
         self.ts = ts
@@ -143,12 +126,12 @@ class MockArchive(object):
         return repr(self.ts)
         return repr(self.ts)
 
 
 
 
-class PruneSplitTestCase(AtticTestCase):
+class PruneSplitTestCase(BaseTestCase):
 
 
     def test(self):
     def test(self):
 
 
         def local_to_UTC(month, day):
         def local_to_UTC(month, day):
-            'Convert noon on the month and day in 2013 to UTC.'
+            """Convert noon on the month and day in 2013 to UTC."""
             seconds = mktime(strptime('2013-%02d-%02d 12:00' % (month, day), '%Y-%m-%d %H:%M'))
             seconds = mktime(strptime('2013-%02d-%02d 12:00' % (month, day), '%Y-%m-%d %H:%M'))
             return datetime.fromtimestamp(seconds, tz=timezone.utc)
             return datetime.fromtimestamp(seconds, tz=timezone.utc)
 
 
@@ -159,8 +142,8 @@ class PruneSplitTestCase(AtticTestCase):
             for ta in test_archives, reversed(test_archives):
             for ta in test_archives, reversed(test_archives):
                 self.assert_equal(set(prune_split(ta, '%Y-%m', n, skip)),
                 self.assert_equal(set(prune_split(ta, '%Y-%m', n, skip)),
                                   subset(test_archives, indices))
                                   subset(test_archives, indices))
-            
-        test_pairs = [(1,1), (2,1), (2,28), (3,1), (3,2), (3,31), (5,1)]
+
+        test_pairs = [(1, 1), (2, 1), (2, 28), (3, 1), (3, 2), (3, 31), (5, 1)]
         test_dates = [local_to_UTC(month, day) for month, day in test_pairs]
         test_dates = [local_to_UTC(month, day) for month, day in test_pairs]
         test_archives = [MockArchive(date) for date in test_dates]
         test_archives = [MockArchive(date) for date in test_dates]
 
 
@@ -172,7 +155,7 @@ class PruneSplitTestCase(AtticTestCase):
         dotest(test_archives, 0, [], [])
         dotest(test_archives, 0, [], [])
 
 
 
 
-class PruneWithinTestCase(AtticTestCase):
+class PruneWithinTestCase(BaseTestCase):
 
 
     def test(self):
     def test(self):
 
 
@@ -183,27 +166,27 @@ class PruneWithinTestCase(AtticTestCase):
             for ta in test_archives, reversed(test_archives):
             for ta in test_archives, reversed(test_archives):
                 self.assert_equal(set(prune_within(ta, within)),
                 self.assert_equal(set(prune_within(ta, within)),
                                   subset(test_archives, indices))
                                   subset(test_archives, indices))
-            
+
         # 1 minute, 1.5 hours, 2.5 hours, 3.5 hours, 25 hours, 49 hours
         # 1 minute, 1.5 hours, 2.5 hours, 3.5 hours, 25 hours, 49 hours
         test_offsets = [60, 90*60, 150*60, 210*60, 25*60*60, 49*60*60]
         test_offsets = [60, 90*60, 150*60, 210*60, 25*60*60, 49*60*60]
         now = datetime.now(timezone.utc)
         now = datetime.now(timezone.utc)
         test_dates = [now - timedelta(seconds=s) for s in test_offsets]
         test_dates = [now - timedelta(seconds=s) for s in test_offsets]
         test_archives = [MockArchive(date) for date in test_dates]
         test_archives = [MockArchive(date) for date in test_dates]
 
 
-        dotest(test_archives, '1H',  [0])
-        dotest(test_archives, '2H',  [0, 1])
-        dotest(test_archives, '3H',  [0, 1, 2])
+        dotest(test_archives, '1H', [0])
+        dotest(test_archives, '2H', [0, 1])
+        dotest(test_archives, '3H', [0, 1, 2])
         dotest(test_archives, '24H', [0, 1, 2, 3])
         dotest(test_archives, '24H', [0, 1, 2, 3])
         dotest(test_archives, '26H', [0, 1, 2, 3, 4])
         dotest(test_archives, '26H', [0, 1, 2, 3, 4])
-        dotest(test_archives, '2d',  [0, 1, 2, 3, 4])
+        dotest(test_archives, '2d', [0, 1, 2, 3, 4])
         dotest(test_archives, '50H', [0, 1, 2, 3, 4, 5])
         dotest(test_archives, '50H', [0, 1, 2, 3, 4, 5])
-        dotest(test_archives, '3d',  [0, 1, 2, 3, 4, 5])
-        dotest(test_archives, '1w',  [0, 1, 2, 3, 4, 5])
-        dotest(test_archives, '1m',  [0, 1, 2, 3, 4, 5])
-        dotest(test_archives, '1y',  [0, 1, 2, 3, 4, 5])
+        dotest(test_archives, '3d', [0, 1, 2, 3, 4, 5])
+        dotest(test_archives, '1w', [0, 1, 2, 3, 4, 5])
+        dotest(test_archives, '1m', [0, 1, 2, 3, 4, 5])
+        dotest(test_archives, '1y', [0, 1, 2, 3, 4, 5])
 
 
 
 
-class StableDictTestCase(AtticTestCase):
+class StableDictTestCase(BaseTestCase):
 
 
     def test(self):
     def test(self):
         d = StableDict(foo=1, bar=2, boo=3, baz=4)
         d = StableDict(foo=1, bar=2, boo=3, baz=4)
@@ -211,7 +194,7 @@ class StableDictTestCase(AtticTestCase):
         self.assert_equal(hashlib.md5(msgpack.packb(d)).hexdigest(), 'fc78df42cd60691b3ac3dd2a2b39903f')
         self.assert_equal(hashlib.md5(msgpack.packb(d)).hexdigest(), 'fc78df42cd60691b3ac3dd2a2b39903f')
 
 
 
 
-class TestParseTimestamp(AtticTestCase):
+class TestParseTimestamp(BaseTestCase):
 
 
     def test(self):
     def test(self):
         self.assert_equal(parse_timestamp('2015-04-19T20:25:00.226410'), datetime(2015, 4, 19, 20, 25, 0, 226410, timezone.utc))
         self.assert_equal(parse_timestamp('2015-04-19T20:25:00.226410'), datetime(2015, 4, 19, 20, 25, 0, 226410, timezone.utc))

+ 15 - 14
attic/testsuite/key.py → borg/testsuite/key.py

@@ -3,19 +3,20 @@ import re
 import shutil
 import shutil
 import tempfile
 import tempfile
 from binascii import hexlify
 from binascii import hexlify
-from attic.crypto import bytes_to_long, num_aes_blocks
-from attic.testsuite import AtticTestCase
-from attic.key import PlaintextKey, PassphraseKey, KeyfileKey
-from attic.helpers import Location, unhexlify
 
 
+from ..crypto import bytes_to_long, num_aes_blocks
+from ..key import PlaintextKey, PassphraseKey, KeyfileKey
+from ..helpers import Location, unhexlify
+from . import BaseTestCase
 
 
-class KeyTestCase(AtticTestCase):
 
 
-    class MockArgs(object):
+class KeyTestCase(BaseTestCase):
+
+    class MockArgs:
         repository = Location(tempfile.mkstemp()[1])
         repository = Location(tempfile.mkstemp()[1])
 
 
     keyfile2_key_file = """
     keyfile2_key_file = """
-        ATTIC KEY 0000000000000000000000000000000000000000000000000000000000000000
+        BORG_KEY 0000000000000000000000000000000000000000000000000000000000000000
         hqppdGVyYXRpb25zzgABhqCkaGFzaNoAIMyonNI+7Cjv0qHi0AOBM6bLGxACJhfgzVD2oq
         hqppdGVyYXRpb25zzgABhqCkaGFzaNoAIMyonNI+7Cjv0qHi0AOBM6bLGxACJhfgzVD2oq
         bIS9SFqWFsZ29yaXRobaZzaGEyNTakc2FsdNoAINNK5qqJc1JWSUjACwFEWGTdM7Nd0a5l
         bIS9SFqWFsZ29yaXRobaZzaGEyNTakc2FsdNoAINNK5qqJc1JWSUjACwFEWGTdM7Nd0a5l
         1uBGPEb+9XM9p3ZlcnNpb24BpGRhdGHaANAYDT5yfPpU099oBJwMomsxouKyx/OG4QIXK2
         1uBGPEb+9XM9p3ZlcnNpb24BpGRhdGHaANAYDT5yfPpU099oBJwMomsxouKyx/OG4QIXK2
@@ -32,13 +33,13 @@ class KeyTestCase(AtticTestCase):
 
 
     def setUp(self):
     def setUp(self):
         self.tmppath = tempfile.mkdtemp()
         self.tmppath = tempfile.mkdtemp()
-        os.environ['ATTIC_KEYS_DIR'] = self.tmppath
+        os.environ['BORG_KEYS_DIR'] = self.tmppath
 
 
     def tearDown(self):
     def tearDown(self):
         shutil.rmtree(self.tmppath)
         shutil.rmtree(self.tmppath)
 
 
-    class MockRepository(object):
-        class _Location(object):
+    class MockRepository:
+        class _Location:
             orig = '/some/place'
             orig = '/some/place'
 
 
         _location = _Location()
         _location = _Location()
@@ -51,7 +52,7 @@ class KeyTestCase(AtticTestCase):
         self.assert_equal(data, key.decrypt(key.id_hash(data), key.encrypt(data)))
         self.assert_equal(data, key.decrypt(key.id_hash(data), key.encrypt(data)))
 
 
     def test_keyfile(self):
     def test_keyfile(self):
-        os.environ['ATTIC_PASSPHRASE'] = 'test'
+        os.environ['BORG_PASSPHRASE'] = 'test'
         key = KeyfileKey.create(self.MockRepository(), self.MockArgs())
         key = KeyfileKey.create(self.MockRepository(), self.MockArgs())
         self.assert_equal(bytes_to_long(key.enc_cipher.iv, 8), 0)
         self.assert_equal(bytes_to_long(key.enc_cipher.iv, 8), 0)
         manifest = key.encrypt(b'XXX')
         manifest = key.encrypt(b'XXX')
@@ -70,14 +71,14 @@ class KeyTestCase(AtticTestCase):
         self.assert_equal(data, key2.decrypt(key.id_hash(data), key.encrypt(data)))
         self.assert_equal(data, key2.decrypt(key.id_hash(data), key.encrypt(data)))
 
 
     def test_keyfile2(self):
     def test_keyfile2(self):
-        with open(os.path.join(os.environ['ATTIC_KEYS_DIR'], 'keyfile'), 'w') as fd:
+        with open(os.path.join(os.environ['BORG_KEYS_DIR'], 'keyfile'), 'w') as fd:
             fd.write(self.keyfile2_key_file)
             fd.write(self.keyfile2_key_file)
-        os.environ['ATTIC_PASSPHRASE'] = 'passphrase'
+        os.environ['BORG_PASSPHRASE'] = 'passphrase'
         key = KeyfileKey.detect(self.MockRepository(), self.keyfile2_cdata)
         key = KeyfileKey.detect(self.MockRepository(), self.keyfile2_cdata)
         self.assert_equal(key.decrypt(self.keyfile2_id, self.keyfile2_cdata), b'payload')
         self.assert_equal(key.decrypt(self.keyfile2_id, self.keyfile2_cdata), b'payload')
 
 
     def test_passphrase(self):
     def test_passphrase(self):
-        os.environ['ATTIC_PASSPHRASE'] = 'test'
+        os.environ['BORG_PASSPHRASE'] = 'test'
         key = PassphraseKey.create(self.MockRepository(), None)
         key = PassphraseKey.create(self.MockRepository(), None)
         self.assert_equal(bytes_to_long(key.enc_cipher.iv, 8), 0)
         self.assert_equal(bytes_to_long(key.enc_cipher.iv, 8), 0)
         self.assert_equal(hexlify(key.id_key), b'793b0717f9d8fb01c751a487e9b827897ceea62409870600013fbc6b4d8d7ca6')
         self.assert_equal(hexlify(key.id_key), b'793b0717f9d8fb01c751a487e9b827897ceea62409870600013fbc6b4d8d7ca6')

+ 121 - 0
borg/testsuite/locking.py

@@ -0,0 +1,121 @@
+import time
+
+import pytest
+
+from ..locking import get_id, TimeoutTimer, ExclusiveLock , UpgradableLock, LockRoster, ADD, REMOVE, SHARED, EXCLUSIVE
+
+
+ID1 = "foo", 1, 1
+ID2 = "bar", 2, 2
+
+def test_id():
+    hostname, pid, tid = get_id()
+    assert isinstance(hostname, str)
+    assert isinstance(pid, int)
+    assert isinstance(tid, int)
+    assert len(hostname) > 0
+    assert pid > 0
+
+
+class TestTimeoutTimer:
+    def test_timeout(self):
+        timeout = 0.5
+        t = TimeoutTimer(timeout).start()
+        assert not t.timed_out()
+        time.sleep(timeout * 1.5)
+        assert t.timed_out()
+
+    def test_notimeout_sleep(self):
+        timeout, sleep = None, 0.5
+        t = TimeoutTimer(timeout, sleep).start()
+        assert not t.timed_out_or_sleep()
+        assert time.time() >= t.start_time + 1 * sleep
+        assert not t.timed_out_or_sleep()
+        assert time.time() >= t.start_time + 2 * sleep
+
+
+@pytest.fixture()
+def lockpath(tmpdir):
+    return str(tmpdir.join('lock'))
+
+
+class TestExclusiveLock:
+    def test_checks(self, lockpath):
+        with ExclusiveLock(lockpath, timeout=1) as lock:
+            assert lock.is_locked() and lock.by_me()
+
+    def test_acquire_break_reacquire(self, lockpath):
+        lock = ExclusiveLock(lockpath, id=ID1).acquire()
+        lock.break_lock()
+        with ExclusiveLock(lockpath, id=ID2):
+            pass
+
+    def test_timeout(self, lockpath):
+        with ExclusiveLock(lockpath, id=ID1):
+            with pytest.raises(ExclusiveLock.LockTimeout):
+                ExclusiveLock(lockpath, id=ID2, timeout=0.1).acquire()
+
+
+class TestUpgradableLock:
+    def test_shared(self, lockpath):
+        lock1 = UpgradableLock(lockpath, exclusive=False, id=ID1).acquire()
+        lock2 = UpgradableLock(lockpath, exclusive=False, id=ID2).acquire()
+        assert len(lock1._roster.get(SHARED)) == 2
+        assert len(lock1._roster.get(EXCLUSIVE)) == 0
+        lock1.release()
+        lock2.release()
+
+    def test_exclusive(self, lockpath):
+        with UpgradableLock(lockpath, exclusive=True, id=ID1) as lock:
+            assert len(lock._roster.get(SHARED)) == 0
+            assert len(lock._roster.get(EXCLUSIVE)) == 1
+
+    def test_upgrade(self, lockpath):
+        with UpgradableLock(lockpath, exclusive=False) as lock:
+            lock.upgrade()
+            lock.upgrade()  # NOP
+            assert len(lock._roster.get(SHARED)) == 0
+            assert len(lock._roster.get(EXCLUSIVE)) == 1
+
+    def test_downgrade(self, lockpath):
+        with UpgradableLock(lockpath, exclusive=True) as lock:
+            lock.downgrade()
+            lock.downgrade()  # NOP
+            assert len(lock._roster.get(SHARED)) == 1
+            assert len(lock._roster.get(EXCLUSIVE)) == 0
+
+    def test_break(self, lockpath):
+        lock = UpgradableLock(lockpath, exclusive=True, id=ID1).acquire()
+        lock.break_lock()
+        assert len(lock._roster.get(SHARED)) == 0
+        assert len(lock._roster.get(EXCLUSIVE)) == 0
+        with UpgradableLock(lockpath, exclusive=True, id=ID2):
+            pass
+
+
+@pytest.fixture()
+def rosterpath(tmpdir):
+    return str(tmpdir.join('roster'))
+
+
+class TestLockRoster:
+    def test_empty(self, rosterpath):
+        roster = LockRoster(rosterpath)
+        empty = roster.load()
+        roster.save(empty)
+        assert empty == {}
+
+    def test_modify_get(self, rosterpath):
+        roster1 = LockRoster(rosterpath, id=ID1)
+        assert roster1.get(SHARED) == set()
+        roster1.modify(SHARED, ADD)
+        assert roster1.get(SHARED) == {ID1, }
+        roster2 = LockRoster(rosterpath, id=ID2)
+        roster2.modify(SHARED, ADD)
+        assert roster2.get(SHARED) == {ID1, ID2, }
+        roster1 = LockRoster(rosterpath, id=ID1)
+        roster1.modify(SHARED, REMOVE)
+        assert roster1.get(SHARED) == {ID2, }
+        roster2 = LockRoster(rosterpath, id=ID2)
+        roster2.modify(SHARED, REMOVE)
+        assert roster2.get(SHARED) == set()

+ 3 - 3
attic/testsuite/lrucache.py → borg/testsuite/lrucache.py

@@ -1,8 +1,8 @@
-from attic.lrucache import LRUCache
-from attic.testsuite import AtticTestCase
+from ..lrucache import LRUCache
+from . import BaseTestCase
 
 
 
 
-class LRUCacheTestCase(AtticTestCase):
+class LRUCacheTestCase(BaseTestCase):
 
 
     def test(self):
     def test(self):
         c = LRUCache(2, dispose=lambda _: None)
         c = LRUCache(2, dispose=lambda _: None)

+ 5 - 5
attic/testsuite/platform.py → borg/testsuite/platform.py

@@ -3,8 +3,9 @@ import shutil
 import sys
 import sys
 import tempfile
 import tempfile
 import unittest
 import unittest
-from attic.platform import acl_get, acl_set
-from attic.testsuite import AtticTestCase
+
+from ..platform import acl_get, acl_set
+from . import BaseTestCase
 
 
 
 
 ACCESS_ACL = """
 ACCESS_ACL = """
@@ -36,7 +37,7 @@ def fakeroot_detected():
 
 
 @unittest.skipUnless(sys.platform.startswith('linux'), 'linux only test')
 @unittest.skipUnless(sys.platform.startswith('linux'), 'linux only test')
 @unittest.skipIf(fakeroot_detected(), 'not compatible with fakeroot')
 @unittest.skipIf(fakeroot_detected(), 'not compatible with fakeroot')
-class PlatformLinuxTestCase(AtticTestCase):
+class PlatformLinuxTestCase(BaseTestCase):
 
 
     def setUp(self):
     def setUp(self):
         self.tmpdir = tempfile.mkdtemp()
         self.tmpdir = tempfile.mkdtemp()
@@ -74,7 +75,7 @@ class PlatformLinuxTestCase(AtticTestCase):
 
 
 @unittest.skipUnless(sys.platform.startswith('darwin'), 'OS X only test')
 @unittest.skipUnless(sys.platform.startswith('darwin'), 'OS X only test')
 @unittest.skipIf(fakeroot_detected(), 'not compatible with fakeroot')
 @unittest.skipIf(fakeroot_detected(), 'not compatible with fakeroot')
-class PlatformDarwinTestCase(AtticTestCase):
+class PlatformDarwinTestCase(BaseTestCase):
 
 
     def setUp(self):
     def setUp(self):
         self.tmpdir = tempfile.mkdtemp()
         self.tmpdir = tempfile.mkdtemp()
@@ -101,4 +102,3 @@ class PlatformDarwinTestCase(AtticTestCase):
         self.set_acl(file2.name, b'!#acl 1\ngroup:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000000:staff:0:allow:read\nuser:FFFFEEEE-DDDD-CCCC-BBBB-AAAA00000000:root:0:allow:read\n', numeric_owner=True)
         self.set_acl(file2.name, b'!#acl 1\ngroup:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000000:staff:0:allow:read\nuser:FFFFEEEE-DDDD-CCCC-BBBB-AAAA00000000:root:0:allow:read\n', numeric_owner=True)
         self.assert_in(b'group:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000000:wheel:0:allow:read', self.get_acl(file2.name)[b'acl_extended'])
         self.assert_in(b'group:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000000:wheel:0:allow:read', self.get_acl(file2.name)[b'acl_extended'])
         self.assert_in(b'group:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000000::0:allow:read', self.get_acl(file2.name, numeric_owner=True)[b'acl_extended'])
         self.assert_in(b'group:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000000::0:allow:read', self.get_acl(file2.name, numeric_owner=True)[b'acl_extended'])
-

+ 22 - 12
attic/testsuite/repository.py → borg/testsuite/repository.py

@@ -1,15 +1,19 @@
 import os
 import os
 import shutil
 import shutil
 import tempfile
 import tempfile
-from attic.testsuite.mock import patch
-from attic.hashindex import NSIndex
-from attic.helpers import Location, IntegrityError, UpgradableLock
-from attic.remote import RemoteRepository
-from attic.repository import Repository
-from attic.testsuite import AtticTestCase
 
 
+from mock import patch
 
 
-class RepositoryTestCaseBase(AtticTestCase):
+from ..hashindex import NSIndex
+from ..helpers import Location, IntegrityError
+from ..locking import UpgradableLock
+from ..remote import RemoteRepository, InvalidRPCMethod
+from ..repository import Repository
+from . import BaseTestCase
+
+
+class RepositoryTestCaseBase(BaseTestCase):
+    key_size = 32
 
 
     def open(self, create=False):
     def open(self, create=False):
         return Repository(os.path.join(self.tmppath, 'repository'), create=create)
         return Repository(os.path.join(self.tmppath, 'repository'), create=create)
@@ -154,11 +158,10 @@ class RepositoryCommitTestCase(RepositoryTestCaseBase):
         for name in os.listdir(self.repository.path):
         for name in os.listdir(self.repository.path):
             if name.startswith('index.'):
             if name.startswith('index.'):
                 os.unlink(os.path.join(self.repository.path, name))
                 os.unlink(os.path.join(self.repository.path, name))
-        with patch.object(UpgradableLock, 'upgrade', side_effect=UpgradableLock.WriteLockFailed) as upgrade:
+        with patch.object(UpgradableLock, 'upgrade', side_effect=UpgradableLock.ExclusiveLockFailed) as upgrade:
             self.reopen()
             self.reopen()
-            self.assert_raises(UpgradableLock.WriteLockFailed, lambda: len(self.repository))
-            upgrade.assert_called_once()
-
+            self.assert_raises(UpgradableLock.ExclusiveLockFailed, lambda: len(self.repository))
+            upgrade.assert_called_once_with()
 
 
     def test_crash_before_write_index(self):
     def test_crash_before_write_index(self):
         self.add_keys()
         self.add_keys()
@@ -308,7 +311,7 @@ class RepositoryCheckTestCase(RepositoryTestCaseBase):
         # Simulate a crash before compact
         # Simulate a crash before compact
         with patch.object(Repository, 'compact_segments') as compact:
         with patch.object(Repository, 'compact_segments') as compact:
             self.repository.commit()
             self.repository.commit()
-            compact.assert_called_once()
+            compact.assert_called_once_with()
         self.reopen()
         self.reopen()
         self.check(repair=True)
         self.check(repair=True)
         self.assert_equal(self.repository.get(bytes(32)), b'data2')
         self.assert_equal(self.repository.get(bytes(32)), b'data2')
@@ -319,8 +322,15 @@ class RemoteRepositoryTestCase(RepositoryTestCase):
     def open(self, create=False):
     def open(self, create=False):
         return RemoteRepository(Location('__testsuite__:' + os.path.join(self.tmppath, 'repository')), create=create)
         return RemoteRepository(Location('__testsuite__:' + os.path.join(self.tmppath, 'repository')), create=create)
 
 
+    def test_invalid_rpc(self):
+        self.assert_raises(InvalidRPCMethod, lambda: self.repository.call('__init__', None))
+
 
 
 class RemoteRepositoryCheckTestCase(RepositoryCheckTestCase):
 class RemoteRepositoryCheckTestCase(RepositoryCheckTestCase):
 
 
     def open(self, create=False):
     def open(self, create=False):
         return RemoteRepository(Location('__testsuite__:' + os.path.join(self.tmppath, 'repository')), create=create)
         return RemoteRepository(Location('__testsuite__:' + os.path.join(self.tmppath, 'repository')), create=create)
+
+    def test_crash_before_compact(self):
+        # skip this test, we can't mock-patch a Repository class in another process!
+        pass

+ 5 - 3
attic/testsuite/xattr.py → borg/testsuite/xattr.py

@@ -1,11 +1,13 @@
 import os
 import os
 import tempfile
 import tempfile
 import unittest
 import unittest
-from attic.testsuite import AtticTestCase
-from attic.xattr import is_enabled, getxattr, setxattr, listxattr
+
+from ..xattr import is_enabled, getxattr, setxattr, listxattr
+from . import BaseTestCase
+
 
 
 @unittest.skipUnless(is_enabled(), 'xattr not enabled on filesystem')
 @unittest.skipUnless(is_enabled(), 'xattr not enabled on filesystem')
-class XattrTestCase(AtticTestCase):
+class XattrTestCase(BaseTestCase):
 
 
     def setUp(self):
     def setUp(self):
         self.tmpfile = tempfile.NamedTemporaryFile()
         self.tmpfile = tempfile.NamedTemporaryFile()

+ 2 - 2
attic/xattr.py → borg/xattr.py

@@ -8,10 +8,10 @@ from ctypes import CDLL, create_string_buffer, c_ssize_t, c_size_t, c_char_p, c_
 from ctypes.util import find_library
 from ctypes.util import find_library
 
 
 
 
-def is_enabled():
+def is_enabled(path=None):
     """Determine if xattr is enabled on the filesystem
     """Determine if xattr is enabled on the filesystem
     """
     """
-    with tempfile.NamedTemporaryFile() as fd:
+    with tempfile.NamedTemporaryFile(dir=path, prefix='borg-tmp') as fd:
         try:
         try:
             setxattr(fd.fileno(), 'user.name', b'value')
             setxattr(fd.fileno(), 'user.name', b'value')
         except OSError:
         except OSError:

+ 11 - 15
docs/Makefile

@@ -73,17 +73,17 @@ qthelp:
 	@echo
 	@echo
 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
-	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/attic.qhcp"
+	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/borg.qhcp"
 	@echo "To view the help file:"
 	@echo "To view the help file:"
-	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/attic.qhc"
+	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/borg.qhc"
 
 
 devhelp:
 devhelp:
 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
 	@echo
 	@echo
 	@echo "Build finished."
 	@echo "Build finished."
 	@echo "To view the help file:"
 	@echo "To view the help file:"
-	@echo "# mkdir -p $$HOME/.local/share/devhelp/attic"
-	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/attic"
+	@echo "# mkdir -p $$HOME/.local/share/devhelp/borg"
+	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/borg"
 	@echo "# devhelp"
 	@echo "# devhelp"
 
 
 epub:
 epub:
@@ -130,17 +130,13 @@ doctest:
 	@echo "Testing of doctests in the sources finished, look at the " \
 	@echo "Testing of doctests in the sources finished, look at the " \
 	      "results in $(BUILDDIR)/doctest/output.txt."
 	      "results in $(BUILDDIR)/doctest/output.txt."
 
 
-gh-pages: html
-	GH_PAGES_CLONE="`mktemp -d`" && \
-	git clone --branch gh-pages `git rev-parse --show-toplevel` $$GH_PAGES_CLONE && \
-	(cd $$GH_PAGES_CLONE && git rm -r *) && \
-	cp -r _build/html/* $$GH_PAGES_CLONE && \
-	(cd $$GH_PAGES_CLONE && git add -A && git commit -m 'Updated gh-pages' && git push) && \
-	rm -rf $$GH_PAGES_CLONE
+gh-io: html
+	GH_IO_CLONE="`mktemp -d`" && \
+    git clone git@github.com:borgbackup/borgbackup.github.io.git $$GH_IO_CLONE && \
+	(cd $$GH_IO_CLONE && git rm -r *) && \
+	cp -r _build/html/* $$GH_IO_CLONE && \
+	(cd $$GH_IO_CLONE && git add -A && git commit -m 'Updated borgbackup.github.io' && git push) && \
+	rm -rf $$GH_IO_CLONE
 
 
 inotify: html
 inotify: html
 	while inotifywait -r . --exclude usage.rst --exclude '_build/*' ; do make html ; done
 	while inotifywait -r . --exclude usage.rst --exclude '_build/*' ; do make html ; done
-
-upload: html
-	rsync -va -e ssh _build/html/ sushi.edgewall.com:/srv/attic/www/
-

+ 0 - 7
docs/_themes/attic/sidebarlogo.html

@@ -1,7 +0,0 @@
-<link href='//fonts.googleapis.com/css?family=Oswald:700|Inconsolata:400,700'
-          rel='stylesheet' type='text/css'>
-<div class="sidebarlogo">
-  <a href="{{ pathto('index') }}">
-  <div class="title">Attic</div>
-</a>
-</div>

+ 0 - 10
docs/_themes/attic/sidebarusefullinks.html

@@ -1,10 +0,0 @@
-<a href="https://github.com/jborg/attic"><img style="position: fixed; top: 0; right: 0; border: 0;"
-  src="https://s3.amazonaws.com/github/ribbons/forkme_right_gray_6d6d6d.png" alt="Fork me on GitHub"></a>
-
-<h3>Useful Links</h3>
-<ul>
-  <li><a href="https://pypi.python.org/pypi/Attic">Attic @ PyPI</a></li>
-  <li><a href="https://github.com/jborg/attic">Attic @ github</a></li>
-  <li><a href="https://attic-backup.org/">Online Documentation</a></li>
-  <li><a href="https://github.com/jborg/attic/issues">Issue Tracker</a></li>
-</ul>

+ 5 - 0
docs/_themes/local/sidebarlogo.html

@@ -0,0 +1,5 @@
+<div class="sidebarlogo">
+  <a href="{{ pathto('index') }}">
+  <div class="title">Borg</div>
+</a>
+</div>

+ 20 - 0
docs/_themes/local/sidebarusefullinks.html

@@ -0,0 +1,20 @@
+<a href="https://github.com/borgbackup/borg"><img style="position: fixed; top: 0; right: 0; border: 0;"
+  src="https://s3.amazonaws.com/github/ribbons/forkme_right_gray_6d6d6d.png" alt="Fork me on GitHub"></a>
+
+<h3>Useful Links</h3>
+<ul>
+  <li><a href="https://borgbackup.github.io/borgbackup/">Main Web Site</a></li>
+  <li><a href="https://pypi.python.org/pypi/borgbackup">PyPI packages</a></li>
+  <li><a href="https://github.com/borgbackup/borg/issues/147">Binary Packages</a></li>
+  <li><a href="https://github.com/borgbackup/borg/blob/master/CHANGES.rst">Current ChangeLog</a></li>
+  <li><a href="https://github.com/borgbackup/borg">GitHub</a></li>
+  <li><a href="https://github.com/borgbackup/borg/issues">Issue Tracker</a></li>
+  <li><a href="https://www.bountysource.com/teams/borgbackup">Bounties &amp; Fundraisers</a></li>
+  <li><a href="http://librelist.com/browser/borgbackup/">Mailing List</a></li>
+</ul>
+
+<h3>Related Projects</h3>
+<ul>
+  <li><a href="https://borgbackup.github.io/borgweb/">BorgWeb</a></li>
+</ul>
+

+ 65 - 31
docs/_themes/attic/static/attic.css_t → docs/_themes/local/static/local.css_t

@@ -1,12 +1,14 @@
 @import url("basic.css");
 @import url("basic.css");
+@import url(//fonts.googleapis.com/css?family=Black+Ops+One);
 
 
 body {
 body {
-  font-family: Helvetica;
-  background-color: white;
+  font-family: Arial, Helvetica, sans-serif;
+  background-color: black;
   margin: 0;
   margin: 0;
   padding: 0;
   padding: 0;
   position: relative;
   position: relative;
 }
 }
+
 div.related {
 div.related {
   display: none;
   display: none;
   background-color: black;
   background-color: black;
@@ -14,10 +16,12 @@ div.related {
   width: 800px;
   width: 800px;
   margin: 0 auto;
   margin: 0 auto;
 }
 }
+
 div.related a {
 div.related a {
   color: white;
   color: white;
   text-decoration: none;
   text-decoration: none;
 }
 }
+
 div.document {
 div.document {
   width: 1030px;
   width: 1030px;
   margin: 0 auto;
   margin: 0 auto;
@@ -27,114 +31,144 @@ div.documentwrapper {
   float: right;
   float: right;
   width: 760px;
   width: 760px;
   padding: 0 20px 20px 20px;
   padding: 0 20px 20px 20px;
-  background-color: #f3f3f3;
+  color: #00aa00;
+  background-color: #000000;
   margin-bottom: 2em;
   margin-bottom: 2em;
 }
 }
+
 div.sphinxsidebar {
 div.sphinxsidebar {
   margin-left: 0;
   margin-left: 0;
-  parring-right: 20px;
+  padding-right: 20px;
   width: 230px;
   width: 230px;
-  background: #e9e9e9;
+  background: #081008;
   position: absolute;
   position: absolute;
   top: 0;
   top: 0;
   min-height: 100%;
   min-height: 100%;
 }
 }
 
 
 h1, h2, h3 {
 h1, h2, h3 {
-  font-family: "Oswald";
   font-weight: normal;
   font-weight: normal;
-  color: #333;
+  color: #33dd33;
 }
 }
+
 h1 {
 h1 {
   margin: .8em 0 .5em;
   margin: .8em 0 .5em;
+  font-size: 200%;
 }
 }
-h2, h3 {
+
+h2 {
   margin: 1.2em 0 .6em;
   margin: 1.2em 0 .6em;
+  font-size: 140%;
 }
 }
-h1 { font-size: 200%;}
-h2 { font-size: 140%;}
-h3 { font-size: 110%;}
+
+h3 {
+  margin: 1.2em 0 .6em;
+  font-size: 110%;
+}
+
 ul {
 ul {
   padding-left: 1.2em;
   padding-left: 1.2em;
   margin-bottom: .3em;
   margin-bottom: .3em;
 }
 }
+
 ul ul {
 ul ul {
   font-size: 95%;
   font-size: 95%;
 }
 }
+
 li {
 li {
   margin: .1em 0;
   margin: .1em 0;
 }
 }
-a:link, a:visited {
-  color: #00608f;
+
+a:link {
+  color: #dddd00;
   text-decoration: none;
   text-decoration: none;
 }
 }
+
+a:visited {
+  color: #990000;
+  text-decoration: none;
+}
+
 a:hover {
 a:hover {
-  color: #00B0E4;
-  border-bottom: 1px dotted #00B0E4;
+  color: #dd0000;
+  border-bottom: 1px dotted #dd0000;
 }
 }
 
 
 div.sphinxsidebar a:link, div.sphinxsidebar a:visited {
 div.sphinxsidebar a:link, div.sphinxsidebar a:visited {
-  color: #555;
   border-bottom: 1px dotted #555;
   border-bottom: 1px dotted #555;
 }
 }
 
 
+div.sphinxsidebar {
+  color: #00aa00;
+  background: 0000000;
+}
+
 div.sphinxsidebar input {
 div.sphinxsidebar input {
-  border: 1px solid #ccc;
+  color: #00cc00;
+  background: 0000000;
+  border: 1px solid #444444;
 }
 }
 
 
 pre {
 pre {
   padding: 10px 20px;
   padding: 10px 20px;
-  background: white;
-  color: #222;
+  background: #101010;
+  color: #22cc22;
   line-height: 1.5em;
   line-height: 1.5em;
   border-bottom: 2px solid black;
   border-bottom: 2px solid black;
-  font-family: "Inconsolata";
 }
 }
+
 pre a:link,
 pre a:link,
 pre a:visited {
 pre a:visited {
-  color: #00B0E4;
+  color: #00b0e4;
 }
 }
 
 
 div.sidebarlogo .title {
 div.sidebarlogo .title {
-  font-family: "Oswald";
+  font-family: 'Black Ops One', cursive;
   font-size: 500%;
   font-size: 500%;
 }
 }
+
+div.sidebarlogo a {
+  color: #00dd00;
+}
+
 div.sidebarlogo .subtitle {
 div.sidebarlogo .subtitle {
   font-style: italic;
   font-style: italic;
   color: #777;
   color: #777;
 }
 }
+
 tt span.pre {
 tt span.pre {
   font-size: 110%;
   font-size: 110%;
 }
 }
+
 dt {
 dt {
-  font-family: "Oswald";
   font-size: 95%;
   font-size: 95%;
 }
 }
 
 
 div.admonition p.admonition-title + p {
 div.admonition p.admonition-title + p {
-    display: inline;
+  display: inline;
 }
 }
 
 
 div.admonition p {
 div.admonition p {
-    margin-bottom: 5px;
+  margin-bottom: 5px;
 }
 }
 
 
 p.admonition-title {
 p.admonition-title {
-    display: inline;
+  display: inline;
 }
 }
 
 
 p.admonition-title:after {
 p.admonition-title:after {
-    content: ":";
+  content: ":";
 }
 }
 
 
 div.note {
 div.note {
-  background-color: #ff5;
-  border-bottom: 2px solid #d22;
+  background-color: #002211;
+  border-bottom: 2px solid #22dd22;
 }
 }
 
 
 div.seealso {
 div.seealso {
-  background-color: #ffe;
-  border: 1px solid #ff6;
+  background-color: #0fe;
+  border: 1px solid #0f6;
   border-radius: .4em;
   border-radius: .4em;
   box-shadow: 2px 2px #dd6;
   box-shadow: 2px 2px #dd6;
 }
 }
+

+ 1 - 1
docs/_themes/attic/theme.conf → docs/_themes/local/theme.conf

@@ -1,6 +1,6 @@
 [theme]
 [theme]
 inherit = basic
 inherit = basic
-stylesheet = attic.css
+stylesheet = local.css
 pygments_style = tango
 pygments_style = tango
 
 
 [options]
 [options]

+ 4 - 0
docs/changes.rst

@@ -0,0 +1,4 @@
+.. include:: global.rst.inc
+.. _changelog:
+
+.. include:: ../CHANGES.rst

+ 20 - 20
docs/conf.py

@@ -1,6 +1,6 @@
-    # -*- coding: utf-8 -*-
+# -*- coding: utf-8 -*-
 #
 #
-# Attic documentation build configuration file, created by
+# documentation build configuration file, created by
 # sphinx-quickstart on Sat Sep 10 18:18:25 2011.
 # sphinx-quickstart on Sat Sep 10 18:18:25 2011.
 #
 #
 # This file is execfile()d with the current directory set to its containing dir.
 # This file is execfile()d with the current directory set to its containing dir.
@@ -11,12 +11,13 @@
 # All configuration values have a default; values that are commented out
 # All configuration values have a default; values that are commented out
 # serve to show the default.
 # serve to show the default.
 
 
-import sys, os, attic
-
 # If extensions (or modules to document with autodoc) are in another directory,
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
 # documentation root, use os.path.abspath to make it absolute, like shown here.
-#sys.path.insert(0, os.path.abspath('.'))
+import sys, os
+sys.path.insert(0, os.path.abspath('..'))
+
+from borg import __version__ as sw_version
 
 
 # -- General configuration -----------------------------------------------------
 # -- General configuration -----------------------------------------------------
 
 
@@ -40,15 +41,15 @@ source_suffix = '.rst'
 master_doc = 'index'
 master_doc = 'index'
 
 
 # General information about the project.
 # General information about the project.
-project = 'Attic - Deduplicating Archiver'
-copyright = '2010-2014, Jonas Borgström'
+project = 'Borg - Deduplicating Archiver'
+copyright = '2010-2014, Jonas Borgström, 2015 The Borg Collective (see AUTHORS file)'
 
 
 # The version info for the project you're documenting, acts as replacement for
 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
 # |version| and |release|, also used in various other places throughout the
 # built documents.
 # built documents.
 #
 #
 # The short X.Y version.
 # The short X.Y version.
-version = attic.__version__.split('-')[0]
+version = sw_version.split('-')[0]
 # The full version, including alpha/beta/rc tags.
 # The full version, including alpha/beta/rc tags.
 release = version
 release = version
 
 
@@ -91,7 +92,7 @@ pygments_style = 'sphinx'
 
 
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
 # a list of builtin themes.
-html_theme = 'attic'
+html_theme = 'local'
 
 
 # Theme options are theme-specific and customize the look and feel of a theme
 # Theme options are theme-specific and customize the look and feel of a theme
 # further.  For a list of options available for each theme, see the
 # further.  For a list of options available for each theme, see the
@@ -120,7 +121,7 @@ html_theme_path = ['_themes']
 # Add any paths that contain custom static files (such as style sheets) here,
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
 # so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
+html_static_path = []
 
 
 # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
 # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
 # using the given strftime format.
 # using the given strftime format.
@@ -133,7 +134,7 @@ html_static_path = ['_static']
 # Custom sidebar templates, maps document names to template names.
 # Custom sidebar templates, maps document names to template names.
 html_sidebars = {
 html_sidebars = {
     'index': ['sidebarlogo.html', 'sidebarusefullinks.html', 'searchbox.html'],
     'index': ['sidebarlogo.html', 'sidebarusefullinks.html', 'searchbox.html'],
-    '**': ['sidebarlogo.html', 'localtoc.html', 'relations.html', 'sidebarusefullinks.html', 'searchbox.html']
+    '**': ['sidebarlogo.html', 'relations.html', 'searchbox.html', 'localtoc.html', 'sidebarusefullinks.html']
 }
 }
 # Additional templates that should be rendered to pages, maps page names to
 # Additional templates that should be rendered to pages, maps page names to
 # template names.
 # template names.
@@ -166,7 +167,7 @@ html_show_copyright = False
 #html_file_suffix = None
 #html_file_suffix = None
 
 
 # Output file base name for HTML help builder.
 # Output file base name for HTML help builder.
-htmlhelp_basename = 'atticdoc'
+htmlhelp_basename = 'borgdoc'
 
 
 
 
 # -- Options for LaTeX output --------------------------------------------------
 # -- Options for LaTeX output --------------------------------------------------
@@ -180,8 +181,8 @@ htmlhelp_basename = 'atticdoc'
 # Grouping the document tree into LaTeX files. List of tuples
 # Grouping the document tree into LaTeX files. List of tuples
 # (source start file, target name, title, author, documentclass [howto/manual]).
 # (source start file, target name, title, author, documentclass [howto/manual]).
 latex_documents = [
 latex_documents = [
-  ('index', 'Attic.tex', 'Attic Documentation',
-   'Jonas Borgström', 'manual'),
+  ('index', 'Borg.tex', 'Borg Documentation',
+   'see "AUTHORS" file', 'manual'),
 ]
 ]
 
 
 # The name of an image file (relative to this directory) to place at the top of
 # The name of an image file (relative to this directory) to place at the top of
@@ -213,14 +214,13 @@ latex_documents = [
 # One entry per manual page. List of tuples
 # One entry per manual page. List of tuples
 # (source start file, name, description, authors, manual section).
 # (source start file, name, description, authors, manual section).
 #man_pages = [
 #man_pages = [
-#    ('man', 'attic', 'Attic',
-#     ['Jonas Borgström'], 1)
+#    ('man', 'borg', 'Borg',
+#     ['see "AUTHORS" file'], 1)
 #]
 #]
 
 
 extensions = ['sphinx.ext.extlinks']
 extensions = ['sphinx.ext.extlinks']
 
 
 extlinks = {
 extlinks = {
-    'issue': ('https://github.com/jborg/attic/issues/%s', '#'),
-    'targz_url': ('https://pypi.python.org/packages/source/A/Attic/%%s-%s.tar.gz' % version, None),
-    'artifacts': ('https://attic-backup.org/downloads/releases/%s/%%s' % version, '')
-        }
+    'issue': ('https://github.com/borgbackup/borg/issues/%s', '#'),
+    'targz_url': ('https://pypi.python.org/packages/source/b/borgbackup/%%s-%s.tar.gz' % version, None),
+}

+ 67 - 0
docs/development.rst

@@ -0,0 +1,67 @@
+.. include:: global.rst.inc
+.. _development:
+
+Development
+===========
+
+This chapter will get you started with |project_name|' development.
+
+|project_name| is written in Python (with a little bit of Cython and C for
+the performance critical parts).
+
+
+Building a development environment
+----------------------------------
+
+First, just install borg into a virtual env as described before.
+
+To install some additional packages needed for running the tests, activate your
+virtual env and run::
+
+  pip install -r requirements.d/development.txt
+
+
+Running the tests
+-----------------
+
+The tests are in the borg/testsuite package.
+
+To run them, you need to have fakeroot, tox and pytest installed.
+
+To run the test suite use the following command::
+
+  fakeroot -u tox  # run all tests
+
+Some more advanced examples::
+
+  # verify a changed tox.ini (run this after any change to tox.ini):
+  fakeroot -u tox --recreate
+
+  fakeroot -u tox -e py32  # run all tests, but only on python 3.2
+
+  fakeroot -u tox borg.testsuite.locking  # only run 1 test module
+
+  fakeroot -u tox borg.testsuite.locking -- -k '"not Timer"'  # exclude some tests
+
+  fakeroot -u tox borg.testsuite -- -v  # verbose py.test
+
+Important notes:
+
+- Without fakeroot -u some tests will fail.
+- When using -- to give options to py.test, you MUST also give borg.testsuite[.module].
+
+Building the docs with Sphinx
+-----------------------------
+
+The documentation (in reStructuredText format, .rst) is in docs/.
+
+To build the html version of it, you need to have sphinx installed::
+
+  pip3 install sphinx
+
+Now run::
+
+  cd docs/
+  make html
+
+Then point a web browser at docs/_build/html/index.html.

+ 100 - 8
docs/faq.rst

@@ -7,35 +7,127 @@ Frequently asked questions
 Which platforms are supported?
 Which platforms are supported?
     Currently Linux, FreeBSD and MacOS X are supported.
     Currently Linux, FreeBSD and MacOS X are supported.
 
 
+    You can try your luck on other POSIX-like systems, like Cygwin,
+    other BSDs, etc. but they are not officially supported.
 
 
 Can I backup VM disk images?
 Can I backup VM disk images?
     Yes, the :ref:`deduplication <deduplication_def>` technique used by |project_name|
     Yes, the :ref:`deduplication <deduplication_def>` technique used by |project_name|
     makes sure only the modified parts of the file are stored.
     makes sure only the modified parts of the file are stored.
+    Also, we have optional simple sparse file support for extract.
 
 
-Which file attributes are preserved?
-    The following attributes are preserved:
+Can I backup from multiple servers into a single repository?
+    Yes, but in order for the deduplication used by |project_name| to work, it
+    needs to keep a local cache containing checksums of all file
+    chunks already stored in the repository. This cache is stored in
+    ``~/.cache/borg/``.  If |project_name| detects that a repository has been
+    modified since the local cache was updated it will need to rebuild
+    the cache. This rebuild can be quite time consuming.
 
 
+    So, yes it's possible. But it will be most efficient if a single
+    repository is only modified from one place. Also keep in mind that
+    |project_name| will keep an exclusive lock on the repository while creating
+    or deleting archives, which may make *simultaneous* backups fail.
+
+Which file types, attributes, etc. are preserved?
+    * Directories
+    * Regular files
+    * Hardlinks (considering all files in the same archive)
+    * Symlinks (stored as symlink, the symlink is not followed)
+    * Character and block device files
+    * FIFOs ("named pipes")
     * Name
     * Name
     * Contents
     * Contents
     * Time of last modification (nanosecond precision with Python >= 3.3)
     * Time of last modification (nanosecond precision with Python >= 3.3)
     * User ID of owner
     * User ID of owner
     * Group ID of owner
     * Group ID of owner
-    * Unix Permission
-    * Extended attributes (xattrs)
+    * Unix Mode/Permissions (u/g/o permissions, suid, sgid, sticky)
+    * Extended Attributes (xattrs)
     * Access Control Lists (ACL_) on Linux, OS X and FreeBSD
     * Access Control Lists (ACL_) on Linux, OS X and FreeBSD
     * BSD flags on OS X and FreeBSD
     * BSD flags on OS X and FreeBSD
 
 
+Which file types, attributes, etc. are *not* preserved?
+    * UNIX domain sockets (because it does not make sense - they are meaningless
+      without the running process that created them and the process needs to
+      recreate them in any case). So, don't panic if your backup misses a UDS!
+    * The precise on-disk representation of the holes in a sparse file.
+      Archive creation has no special support for sparse files, holes are
+      backed up as (deduplicated and compressed) runs of zero bytes.
+      Archive extraction has optional support to extract all-zero chunks as
+      holes in a sparse file.
+
 How can I specify the encryption passphrase programmatically?
 How can I specify the encryption passphrase programmatically?
     The encryption passphrase can be specified programmatically using the
     The encryption passphrase can be specified programmatically using the
-    `ATTIC_PASSPHRASE` environment variable. This is convenient when setting up
+    `BORG_PASSPHRASE` environment variable. This is convenient when setting up
     automated encrypted backups. Another option is to use
     automated encrypted backups. Another option is to use
     key file based encryption with a blank passphrase. See
     key file based encryption with a blank passphrase. See
     :ref:`encrypted_repos` for more details.
     :ref:`encrypted_repos` for more details.
 
 
-When backing up to remote servers, is data encrypted before leaving the local machine, or do I have to trust that the remote server isn't malicious?
-    Yes, everything is encrypted before leaving the local machine.
+When backing up to remote encrypted repos, is encryption done locally?
+    Yes, file and directory metadata and data is locally encrypted, before
+    leaving the local machine. We do not mean the transport layer encryption
+    by that, but the data/metadata itself. Transport layer encryption (e.g.
+    when ssh is used as a transport) applies additionally.
 
 
-If a backup stops mid-way, does the already-backed-up data stay there? I.e. does Attic resume backups?
+When backing up to remote servers, do I have to trust the remote server?
+    Yes and No.
+    No, as far as data confidentiality is concerned - if you use encryption,
+    all your files/dirs data and metadata are stored in their encrypted form
+    into the repository.
+    Yes, as an attacker with access to the remote server could delete (or
+    otherwise make unavailable) all your backups.
+
+If a backup stops mid-way, does the already-backed-up data stay there? I.e. does |project_name| resume backups?
     Yes, during a backup a special checkpoint archive named ``<archive-name>.checkpoint`` is saved every 5 minutes
     Yes, during a backup a special checkpoint archive named ``<archive-name>.checkpoint`` is saved every 5 minutes
     containing all the data backed-up until that point. This means that at most 5 minutes worth of data needs to be
     containing all the data backed-up until that point. This means that at most 5 minutes worth of data needs to be
     retransmitted if a backup needs to be restarted.
     retransmitted if a backup needs to be restarted.
+
+If it crashes with a UnicodeError, what can I do?
+    Check if your encoding is set correctly. For most POSIX-like systems, try::
+
+        export LANG=en_US.UTF-8  # or similar, important is correct charset
+
+If I want to run |project_name| on a ARM CPU older than ARM v6?
+    You need to enable the alignment trap handler to fixup misaligned accesses::
+    
+        echo "2" > /proc/cpu/alignment
+
+Can |project_name| add redundancy to the backup data to deal with hardware malfunction?
+    No, it can't. While that at first sounds like a good idea to defend against some
+    defect HDD sectors or SSD flash blocks, dealing with this in a reliable way needs a lot
+    of low-level storage layout information and control which we do not have (and also can't
+    get, even if we wanted).
+
+    So, if you need that, consider RAID1 or a filesystems that offers redundant storage.
+
+Can |project_name| verify data integrity of a backup archive?
+    Yes, if you want to detect accidental data damage (like bit rot), use the ``check``
+    operation. It will notice corruption using CRCs and hashes.
+    If you want to be able to detect malicious tampering also, use a encrypted repo.
+    It will then be able to check using CRCs and HMACs.
+
+Why was Borg forked from Attic?
+    Borg was created in May 2015 in response to the difficulty of
+    getting new code or larger changes incorporated into Attic and
+    establishing a bigger developer community / more open development.
+
+    More details can be found in `ticket 217
+    <https://github.com/jborg/attic/issues/217>`_ that led to the fork.
+
+    Borg intends to be:
+
+    * simple:
+
+      * as simple as possible, but no simpler
+      * do the right thing by default, but offer options
+    * open:
+
+      * welcome feature requests
+      * accept pull requests of good quality and coding style
+      * give feedback on PRs that can't be accepted "as is"
+      * discuss openly, don't work in the dark
+    * changing:
+
+      * Borg is not compatible with Attic
+      * do not break compatibility accidentally, without a good reason
+        or without warning. allow compatibility breaking for other cases.
+      * if major version number changes, it may have incompatible changes

+ 0 - 62
docs/foreword.rst

@@ -1,62 +0,0 @@
-.. include:: global.rst.inc
-.. _foreword:
-
-Foreword
-========
-
-|project_name| is a secure backup program for Linux, FreeBSD and Mac OS X. 
-|project_name| is designed for efficient data storage where only new or
-modified data is stored.
-
-Features
---------
-
-Space efficient storage
-    Variable block size `deduplication`_ is used to reduce the number of bytes 
-    stored by detecting redundant data. Each file is split into a number of
-    variable length chunks and only chunks that have never been seen before
-    are compressed and added to the repository.
-
-Optional data encryption
-    All data can be protected using 256-bit AES_ encryption and data integrity
-    and authenticity is verified using `HMAC-SHA256`_.
-
-Off-site backups
-    |project_name| can store data on any remote host accessible over SSH as
-    long as |project_name| is installed.
-
-Backups mountable as filesystems
-    Backup archives are :ref:`mountable <attic_mount>` as
-    `userspace filesystems`_ for easy backup verification and restores.
-
-
-Glossary
---------
-
-.. _deduplication_def:
-
-Deduplication
-    Deduplication is a technique for improving storage utilization by
-    eliminating redundant data. 
-
-.. _archive_def:
-
-Archive
-    An archive is a collection of files along with metadata that include file
-    permissions, directory structure and various file attributes.
-    Since each archive in a repository must have a unique name a good naming
-    convention is ``hostname-YYYY-MM-DD``.
-
-.. _repository_def:
-
-Repository
-    A repository is a filesystem directory storing data from zero or more
-    archives. The data in a repository is both deduplicated and 
-    optionally encrypted making it both efficient and safe. Repositories are
-    created using :ref:`attic_init` and the contents can be listed using
-    :ref:`attic_list`.
-
-Key file
-    When a repository is initialized a key file containing a password
-    protected encryption key is created. It is vital to keep this file safe
-    since the repository data is totally inaccessible without it.

+ 11 - 10
docs/global.rst.inc

@@ -1,26 +1,27 @@
 .. highlight:: bash
 .. highlight:: bash
-.. |project_name| replace:: ``Attic``
-.. |package_dirname| replace:: Attic-|version|
+.. |project_name| replace:: ``Borg``
+.. |package_dirname| replace:: borgbackup-|version|
 .. |package_filename| replace:: |package_dirname|.tar.gz
 .. |package_filename| replace:: |package_dirname|.tar.gz
-.. |package_url| replace:: https://pypi.python.org/packages/source/A/Attic/|package_filename|
-.. |git_url| replace:: https://github.com/jborg/attic.git
+.. |package_url| replace:: https://pypi.python.org/packages/source/b/borgbackup/|package_filename|
+.. |git_url| replace:: https://github.com/borgbackup/borg.git
+.. _github: https://github.com/borgbackup/borg
+.. _issue tracker: https://github.com/borgbackup/borg/issues
 .. _deduplication: https://en.wikipedia.org/wiki/Data_deduplication
 .. _deduplication: https://en.wikipedia.org/wiki/Data_deduplication
 .. _AES: https://en.wikipedia.org/wiki/Advanced_Encryption_Standard
 .. _AES: https://en.wikipedia.org/wiki/Advanced_Encryption_Standard
 .. _HMAC-SHA256: http://en.wikipedia.org/wiki/HMAC
 .. _HMAC-SHA256: http://en.wikipedia.org/wiki/HMAC
+.. _SHA256: https://en.wikipedia.org/wiki/SHA-256
 .. _PBKDF2: https://en.wikipedia.org/wiki/PBKDF2
 .. _PBKDF2: https://en.wikipedia.org/wiki/PBKDF2
 .. _ACL: https://en.wikipedia.org/wiki/Access_control_list
 .. _ACL: https://en.wikipedia.org/wiki/Access_control_list
-.. _github: https://github.com/jborg/attic
+.. _libacl: http://savannah.nongnu.org/projects/acl/
 .. _OpenSSL: https://www.openssl.org/
 .. _OpenSSL: https://www.openssl.org/
 .. _Python: http://www.python.org/
 .. _Python: http://www.python.org/
+.. _Buzhash: https://en.wikipedia.org/wiki/Buzhash
+.. _msgpack: http://msgpack.org/
 .. _`msgpack-python`: https://pypi.python.org/pypi/msgpack-python/
 .. _`msgpack-python`: https://pypi.python.org/pypi/msgpack-python/
 .. _llfuse: https://pypi.python.org/pypi/llfuse/
 .. _llfuse: https://pypi.python.org/pypi/llfuse/
 .. _homebrew: http://mxcl.github.io/homebrew/
 .. _homebrew: http://mxcl.github.io/homebrew/
-.. _issue tracker: https://github.com/jborg/attic/issues
 .. _userspace filesystems: https://en.wikipedia.org/wiki/Filesystem_in_Userspace
 .. _userspace filesystems: https://en.wikipedia.org/wiki/Filesystem_in_Userspace
 .. _librelist: http://librelist.com/
 .. _librelist: http://librelist.com/
-.. _Debian: http://packages.debian.org/attic
-.. _Ubuntu: http://packages.ubuntu.com/attic
-.. _Arch Linux: https://aur.archlinux.org/packages/attic/
-.. _Slackware: http://slackbuilds.org/result/?search=Attic
 .. _Cython: http://cython.org/
 .. _Cython: http://cython.org/
 .. _virtualenv: https://pypi.python.org/pypi/virtualenv/
 .. _virtualenv: https://pypi.python.org/pypi/virtualenv/
+.. _mailing list discussion about internals: http://librelist.com/browser/attic/2014/5/6/questions-and-suggestions-about-inner-working-of-attic>

+ 7 - 59
docs/index.rst

@@ -1,70 +1,18 @@
 .. include:: global.rst.inc
 .. include:: global.rst.inc
 
 
-Welcome to Attic
-================
-|project_name| is a deduplicating backup program written in Python.
-The main goal of |project_name| is to provide an efficient and secure way
-to backup data. The data deduplication technique used makes |project_name|
-suitable for daily backups since only the changes are stored.
 
 
-
-Easy to use
------------
-Initialize a new backup :ref:`repository <repository_def>` and create your
-first backup :ref:`archive <archive_def>` in two lines::
-
-    $ attic init /somewhere/my-repository.attic
-    $ attic create /somewhere/my-repository.attic::Monday ~/Documents
-    $ attic create --stats /somewhere/my-repository.attic::Tuesday ~/Documents
-    Archive name: Tuesday
-    Archive fingerprint: 387a5e3f9b0e792e91ce87134b0f4bfe17677d9248cb5337f3fbf3a8e157942a
-    Start time: Tue Mar 25 12:00:10 2014
-    End time:   Tue Mar 25 12:00:10 2014
-    Duration: 0.08 seconds
-    Number of files: 358
-                           Original size      Compressed size    Deduplicated size
-    This archive:               57.16 MB             46.78 MB            151.67 kB
-    All archives:              114.02 MB             93.46 MB             44.81 MB
-
-See the :ref:`quickstart` chapter for a more detailed example.
-
-Easy installation
------------------
-You can use pip to install |project_name| quickly and easily::
-
-    $ pip3 install attic
-
-|project_name| is also part of the Debian_, Ubuntu_, `Arch Linux`_ and Slackware_
-distributions of GNU/Linux.
-
-Need more help with installing? See :ref:`installation`.
-
-User's Guide
-============
+Borg Documentation
+==================
 
 
 .. toctree::
 .. toctree::
    :maxdepth: 2
    :maxdepth: 2
 
 
-   foreword
+   intro
    installation
    installation
    quickstart
    quickstart
    usage
    usage
    faq
    faq
-
-Getting help
-============
-
-If you've found a bug or have a concrete feature request, you can add your bug
-report or feature request directly to the project's `issue tracker`_. For more
-general questions or discussions, a post to the mailing list is preferred.
-
-Mailing list
-------------
-
-There is a mailing list for Attic on librelist_ that you can use for feature
-requests and general discussions about Attic. A mailing list archive is
-available `here <http://librelist.com/browser/attic/>`_.
-
-To subscribe to the list, send an email to attic@librelist.com and reply
-to the confirmation mail. Likewise, to unsubscribe, send an email to 
-attic-unsubscribe@librelist.com and reply to the confirmation mail.
+   support
+   changes
+   internals
+   development

+ 144 - 36
docs/installation.rst

@@ -4,58 +4,166 @@
 Installation
 Installation
 ============
 ============
 
 
-|project_name| requires Python_ 3.2 or above to work. Even though Python 3 is
-not the default Python version on most Linux distributions, it is usually
-available as an optional install.
+|project_name| requires:
 
 
-Other dependencies:
-
-* `msgpack-python`_ >= 0.1.10
+* Python_ >= 3.2
 * OpenSSL_ >= 1.0.0
 * OpenSSL_ >= 1.0.0
+* libacl_
+* some python dependencies, see install_requires in setup.py
+
+General notes
+-------------
+Even though Python 3 is not the default Python version on many systems, it is
+usually available as an optional install.
 
 
-The OpenSSL version bundled with Mac OS X and FreeBSD is most likey too old.
-Newer versions are available from homebrew_ on OS X and from FreeBSD ports.
+Virtualenv_ can be used to build and install |project_name| without affecting
+the system Python or requiring root access.
+
+Important:
+if you install into a virtual environment, you need to activate
+the virtual env first (``source borg-env/bin/activate``).
+Alternatively, directly run ``borg-env/bin/borg`` (or symlink that into some
+directory that is in your PATH so you can just run ``borg``).
 
 
 The llfuse_ python package is also required if you wish to mount an
 The llfuse_ python package is also required if you wish to mount an
-archive as a FUSE filesystem.
+archive as a FUSE filesystem. Only FUSE >= 2.8.0 can support llfuse.
 
 
-Virtualenv_ can be used to build and install |project_name|
-without affecting the system Python or requiring root access.
+You only need Cython to compile the .pyx files to the respective .c files
+when using |project_name| code from git. For |project_name| releases, the .c
+files will be bundled, so you won't need Cython to install a release.
+
+Platform notes
+--------------
+FreeBSD: You may need to get a recent enough OpenSSL version from FreeBSD ports.
+
+Mac OS X: You may need to get a recent enough OpenSSL version from homebrew_.
+
+Mac OS X: You need OS X FUSE >= 3.0.
 
 
-Installing from PyPI using pip
-------------------------------
-::
 
 
-    $ pip3 install Attic
+Debian / Ubuntu installation (from git)
+---------------------------------------
+Note: this uses latest, unreleased development code from git.
+While we try not to break master, there are no guarantees on anything.
+
+Some of the steps detailled below might be useful also for non-git installs.
 
 
-Installing from source tarballs
--------------------------------
 .. parsed-literal::
 .. parsed-literal::
 
 
-    $ curl -O :targz_url:`Attic`
-    $ tar -xvzf |package_filename|
-    $ cd |package_dirname|
-    $ python setup.py install
+    # Python 3.x (>= 3.2) + Headers, Py Package Installer
+    apt-get install python3 python3-dev python3-pip
+
+    # we need OpenSSL + Headers for Crypto
+    apt-get install libssl-dev openssl
+
+    # ACL support Headers + Library
+    apt-get install libacl1-dev libacl1
+
+    # if you do not have gcc / make / etc. yet
+    apt-get install build-essential
+
+    # optional: FUSE support - to mount backup archives
+    # in case you get complaints about permission denied on /etc/fuse.conf:
+    # on ubuntu this means your user is not in the "fuse" group. just add
+    # yourself there, log out and log in again.
+    apt-get install libfuse-dev fuse
+
+    # optional: for unit testing
+    apt-get install fakeroot
+
+    # get |project_name| from github, install it
+    git clone |git_url|
+
+    apt-get install python-virtualenv
+    virtualenv --python=python3 borg-env
+    source borg-env/bin/activate   # always before using!
+
+    # install borg + dependencies into virtualenv
+    pip install cython  # compile .pyx -> .c
+    pip install tox pytest  # optional, for running unit tests
+    pip install sphinx  # optional, to build the docs
+    pip install llfuse  # optional, for FUSE support
+    cd borg
+    pip install -e .  # in-place editable mode
+
+    # optional: run all the tests, on all supported Python versions
+    fakeroot -u tox
+
+
+Korora / Fedora 21 installation (from git)
+------------------------------------------
+Note: this uses latest, unreleased development code from git.
+While we try not to break master, there are no guarantees on anything.
+
+Some of the steps detailled below might be useful also for non-git installs.
 
 
-Installing from git
--------------------
 .. parsed-literal::
 .. parsed-literal::
+    # Python 3.x (>= 3.2) + Headers, Py Package Installer
+    sudo dnf install python3 python3-devel python3-pip
+
+    # we need OpenSSL + Headers for Crypto
+    sudo dnf install openssl-devel openssl
+
+    # ACL support Headers + Library
+    sudo dnf install libacl-devel libacl
+    
+    # optional: FUSE support - to mount backup archives
+    sudo dnf install fuse-devel fuse
+    
+    # optional: for unit testing
+    sudo dnf install fakeroot
+    
+    # get |project_name| from github, install it
+    git clone |git_url|
+
+    dnf install python3-virtualenv
+    virtualenv --python=python3 borg-env
+    source borg-env/bin/activate   # always before using!
+
+    # install borg + dependencies into virtualenv
+    pip install cython  # compile .pyx -> .c
+    pip install tox pytest  # optional, for running unit tests
+    pip install sphinx  # optional, to build the docs
+    pip install llfuse  # optional, for FUSE support
+    cd borg
+    pip install -e .  # in-place editable mode
+
+    # optional: run all the tests, on all supported Python versions
+    fakeroot -u tox
+
+
+Cygwin (from git)
+-----------------
+Please note that running under cygwin is rather experimental, stuff has been
+tested with CygWin (x86-64) v2.1.0.
+
+You'll need at least (use the cygwin installer to fetch/install these):
 
 
-    $ git clone |git_url|
-    $ cd attic
-    $ python setup.py install
+::
 
 
-Please note that when installing from git, Cython_ is required to generate some files that
-are normally bundled with the release tarball.
+    python3
+    python3-setuptools
+    python3-cython
+    binutils
+    gcc-core
+    git
+    libopenssl
+    make
+    openssh
+    openssl-devel
 
 
-Packages
---------
+You can then install ``pip`` and ``virtualenv``:
 
 
-|project_name| is also part of the Debian_, Ubuntu_, `Arch Linux`_ and Slackware_
-distributions of GNU/Linux.
+::
+
+    easy_install-3.4 pip
+    pip install virtualenv
+
+And now continue as for Linux (see above).
+
+In case that creation of the virtual env fails, try deleting this file:
+
+::
 
 
-Standalone binaries
--------------------
+    /usr/lib/python3.4/__pycache__/platform.cpython-34.pyc
 
 
-Prebuilt standalone binaries that work on
-most Linux systems can be found :artifacts:`here <>`.

+ 391 - 0
docs/internals.rst

@@ -0,0 +1,391 @@
+.. include:: global.rst.inc
+.. _internals:
+
+Internals
+=========
+
+This page documents the internal data structures and storage
+mechanisms of |project_name|. It is partly based on `mailing list
+discussion about internals`_ and also on static code analysis.
+
+It may not be exactly up to date with the current source code.
+
+Repository and Archives
+-----------------------
+
+|project_name| stores its data in a `Repository`. Each repository can
+hold multiple `Archives`, which represent individual backups that
+contain a full archive of the files specified when the backup was
+performed. Deduplication is performed across multiple backups, both on
+data and metadata, using `Chunks` created by the chunker using the Buzhash_
+algorithm.
+
+Each repository has the following file structure:
+
+README
+  simple text file telling that this is a |project_name| repository
+
+config
+  repository configuration
+
+data/
+  directory where the actual data is stored
+
+hints.%d
+  hints for repository compaction
+
+index.%d
+  repository index
+
+lock.roster and lock.exclusive/*
+  used by the locking system to manage shared and exclusive locks
+
+
+Config file
+-----------
+
+Each repository has a ``config`` file which which is a ``INI``-style file
+and looks like this::
+
+    [repository]
+    version = 1
+    segments_per_dir = 10000
+    max_segment_size = 5242880
+    id = 57d6c1d52ce76a836b532b0e42e677dec6af9fca3673db511279358828a21ed6
+
+This is where the ``repository.id`` is stored. It is a unique
+identifier for repositories. It will not change if you move the
+repository around so you can make a local transfer then decide to move
+the repository to another (even remote) location at a later time.
+
+
+Keys
+----
+The key to address the key/value store is usually computed like this:
+
+key = id = id_hash(unencrypted_data)
+
+The id_hash function is:
+
+* sha256 (no encryption keys available)
+* hmac-sha256 (encryption keys available)
+
+
+Segments and archives
+---------------------
+
+A |project_name| repository is a filesystem based transactional key/value
+store. It makes extensive use of msgpack_ to store data and, unless
+otherwise noted, data is stored in msgpack_ encoded files.
+
+Objects referenced by a key are stored inline in files (`segments`) of approx.
+5MB size in numbered subdirectories of ``repo/data``.
+
+They contain:
+
+* header size
+* crc
+* size
+* tag
+* key
+* data
+
+Segments are built locally, and then uploaded. Those files are
+strictly append-only and modified only once.
+
+Tag is either ``PUT``, ``DELETE``, or ``COMMIT``. A segment file is
+basically a transaction log where each repository operation is
+appended to the file. So if an object is written to the repository a
+``PUT`` tag is written to the file followed by the object id and
+data. If an object is deleted a ``DELETE`` tag is appended
+followed by the object id. A ``COMMIT`` tag is written when a
+repository transaction is committed.  When a repository is opened any
+``PUT`` or ``DELETE`` operations not followed by a ``COMMIT`` tag are
+discarded since they are part of a partial/uncommitted transaction.
+
+
+The manifest
+------------
+
+The manifest is an object with an all-zero key that references all the
+archives.
+It contains:
+
+* version
+* list of archive infos
+* timestamp
+* config
+
+Each archive info contains:
+
+* name
+* id
+* time
+
+It is the last object stored, in the last segment, and is replaced
+each time.
+
+The archive metadata does not contain the file items directly. Only
+references to other objects that contain that data. An archive is an
+object that contains:
+
+* version
+* name
+* list of chunks containing item metadata
+* cmdline
+* hostname
+* username
+* time
+
+Each item represents a file, directory or other fs item and is stored as an
+``item`` dictionary that contains:
+
+* path
+* list of data chunks
+* user
+* group
+* uid
+* gid
+* mode (item type + permissions)
+* source (for links)
+* rdev (for devices)
+* mtime
+* xattrs
+* acl
+* bsdfiles
+
+``ctime`` (change time) is not stored because there is no API to set
+it and it is reset every time an inode's metadata is changed.
+
+All items are serialized using msgpack and the resulting byte stream
+is fed into the same chunker used for regular file data and turned
+into deduplicated chunks. The reference to these chunks is then added
+to the archive metadata.
+
+A chunk is stored as an object as well, of course.
+
+
+Chunks
+------
+
+The |project_name| chunker uses a rolling hash computed by the Buzhash_ algorithm.
+It triggers (chunks) when the last HASH_MASK_BITS bits of the hash are zero,
+producing chunks of 2^HASH_MASK_BITS Bytes on average.
+
+create --chunker-params CHUNK_MIN_EXP,CHUNK_MAX_EXP,HASH_MASK_BITS,HASH_WINDOW_SIZE
+can be used to tune the chunker parameters, the default is:
+
+- CHUNK_MIN_EXP = 10 (minimum chunk size = 2^10 B = 1 kiB)
+- CHUNK_MAX_EXP = 23 (maximum chunk size = 2^23 B = 8 MiB)
+- HASH_MASK_BITS = 16 (statistical medium chunk size ~= 2^16 B = 64 kiB)
+- HASH_WINDOW_SIZE = 4095 [B] (`0xFFF`)
+
+The default parameters are OK for relatively small backup data volumes and
+repository sizes and a lot of available memory (RAM) and disk space for the
+chunk index. If that does not apply, you are advised to tune these parameters
+to keep the chunk count lower than with the defaults.
+
+The buzhash table is altered by XORing it with a seed randomly generated once
+for the archive, and stored encrypted in the keyfile. This is to prevent chunk
+size based fingerprinting attacks on your encrypted repo contents (to guess
+what files you have based on a specific set of chunk sizes).
+
+
+Indexes / Caches
+----------------
+
+The files cache is stored in ``cache/files`` and is indexed on the
+``file path hash``. At backup time, it is used to quickly determine whether we
+need to chunk a given file (or whether it is unchanged and we already have all
+its pieces).
+It contains:
+
+* age
+* file inode number
+* file size
+* file mtime_ns
+* file content chunk hashes
+
+The inode number is stored to make sure we distinguish between
+different files, as a single path may not be unique across different
+archives in different setups.
+
+The files cache is stored as a python associative array storing
+python objects, which generates a lot of overhead.
+
+The chunks cache is stored in ``cache/chunks`` and is indexed on the
+``chunk id_hash``. It is used to determine whether we already have a specific
+chunk, to count references to it and also for statistics.
+It contains:
+
+* reference count
+* size
+* encrypted/compressed size
+
+The repository index is stored in ``repo/index.%d`` and is indexed on the
+``chunk id_hash``. It is used to determine a chunk's location in the repository.
+It contains:
+
+* segment (that contains the chunk)
+* offset (where the chunk is located in the segment)
+
+The repository index file is random access.
+
+Hints are stored in a file (``repo/hints.%d``).
+It contains:
+
+* version
+* list of segments
+* compact
+
+hints and index can be recreated if damaged or lost using ``check --repair``.
+
+The chunks cache and the repository index are stored as hash tables, with
+only one slot per bucket, but that spreads the collisions to the following
+buckets. As a consequence the hash is just a start position for a linear
+search, and if the element is not in the table the index is linearly crossed
+until an empty bucket is found.
+
+When the hash table is almost full at 90%, its size is doubled. When it's
+almost empty at 25%, its size is halved. So operations on it have a variable
+complexity between constant and linear with low factor, and memory overhead
+varies between 10% and 300%.
+
+
+Indexes / Caches memory usage
+-----------------------------
+
+Here is the estimated memory usage of |project_name|:
+
+  chunk_count ~= total_file_size / 2 ^ HASH_MASK_BITS
+
+  repo_index_usage = chunk_count * 40
+
+  chunks_cache_usage = chunk_count * 44
+
+  files_cache_usage = total_file_count * 240 + chunk_count * 80
+
+  mem_usage ~= repo_index_usage + chunks_cache_usage + files_cache_usage
+             = chunk_count * 164 + total_file_count * 240
+
+All units are Bytes.
+
+It is assuming every chunk is referenced exactly once (if you have a lot of
+duplicate chunks, you will have less chunks than estimated above).
+
+It is also assuming that typical chunk size is 2^HASH_MASK_BITS (if you have
+a lot of files smaller than this statistical medium chunk size, you will have
+more chunks than estimated above, because 1 file is at least 1 chunk).
+
+If a remote repository is used the repo index will be allocated on the remote side.
+
+E.g. backing up a total count of 1Mi files with a total size of 1TiB.
+
+a) with create --chunker-params 10,23,16,4095 (default):
+
+  mem_usage  =  2.8GiB
+
+b) with create --chunker-params 10,23,20,4095 (custom):
+
+  mem_usage  =  0.4GiB
+
+Note: there is also the --no-files-cache option to switch off the files cache.
+You'll save some memory, but it will need to read / chunk all the files then as
+it can not skip unmodified files then.
+
+
+Encryption
+----------
+
+AES_ is used in CTR mode (so no need for padding). A 64bit initialization
+vector is used, a `HMAC-SHA256`_ is computed on the encrypted chunk with a
+random 64bit nonce and both are stored in the chunk.
+The header of each chunk is : ``TYPE(1)`` + ``HMAC(32)`` + ``NONCE(8)`` + ``CIPHERTEXT``.
+Encryption and HMAC use two different keys.
+
+In AES CTR mode you can think of the IV as the start value for the counter.
+The counter itself is incremented by one after each 16 byte block.
+The IV/counter is not required to be random but it must NEVER be reused.
+So to accomplish this |project_name| initializes the encryption counter to be
+higher than any previously used counter value before encrypting new data.
+
+To reduce payload size, only 8 bytes of the 16 bytes nonce is saved in the
+payload, the first 8 bytes are always zeros. This does not affect security but
+limits the maximum repository capacity to only 295 exabytes (2**64 * 16 bytes).
+
+Encryption keys are either derived from a passphrase or kept in a key file.
+The passphrase is passed through the ``BORG_PASSPHRASE`` environment variable
+or prompted for interactive usage.
+
+
+Key files
+---------
+
+When initialized with the ``init -e keyfile`` command, |project_name|
+needs an associated file in ``$HOME/.borg/keys`` to read and write
+the repository. The format is based on msgpack_, base64 encoding and
+PBKDF2_ SHA256 hashing, which is then encoded again in a msgpack_.
+
+The internal data structure is as follows:
+
+version
+  currently always an integer, 1
+
+repository_id
+  the ``id`` field in the ``config`` ``INI`` file of the repository.
+
+enc_key
+  the key used to encrypt data with AES (256 bits)
+  
+enc_hmac_key
+  the key used to HMAC the encrypted data (256 bits)
+
+id_key
+  the key used to HMAC the plaintext chunk data to compute the chunk's id
+
+chunk_seed
+  the seed for the buzhash chunking table (signed 32 bit integer)
+
+Those fields are processed using msgpack_. The utf-8 encoded passphrase
+is processed with PBKDF2_ (SHA256_, 100000 iterations, random 256 bit salt)
+to give us a derived key. The derived key is 256 bits long.
+A `HMAC-SHA256`_ checksum of the above fields is generated with the derived
+key, then the derived key is also used to encrypt the above pack of fields.
+Then the result is stored in a another msgpack_ formatted as follows:
+
+version
+  currently always an integer, 1
+
+salt
+  random 256 bits salt used to process the passphrase
+
+iterations
+  number of iterations used to process the passphrase (currently 100000)
+
+algorithm
+  the hashing algorithm used to process the passphrase and do the HMAC
+  checksum (currently the string ``sha256``)
+
+hash
+  the HMAC of the encrypted derived key
+
+data
+  the derived key, encrypted with AES over a PBKDF2_ SHA256 key
+  described above
+
+The resulting msgpack_ is then encoded using base64 and written to the
+key file, wrapped using the standard ``textwrap`` module with a header.
+The header is a single line with a MAGIC string, a space and a hexadecimal
+representation of the repository id.
+
+
+Compression
+-----------
+
+|project_name| currently always pipes all data through a zlib compressor which
+supports compression levels 0 (no compression, fast) to 9 (high compression, slow).
+
+See ``borg create --help`` about how to specify the compression level and its default.
+
+Note: zlib level 0 creates a little bit more output data than it gets as input,
+due to zlib protocol overhead.

+ 7 - 0
docs/intro.rst

@@ -0,0 +1,7 @@
+.. include:: global.rst.inc
+.. _foreword:
+
+Introduction
+============
+
+.. include:: ../README.rst

+ 116 - 0
docs/misc/create_chunker-params.txt

@@ -0,0 +1,116 @@
+About borg create --chunker-params
+==================================
+
+--chunker-params CHUNK_MIN_EXP,CHUNK_MAX_EXP,HASH_MASK_BITS,HASH_WINDOW_SIZE
+
+CHUNK_MIN_EXP and CHUNK_MAX_EXP give the exponent N of the 2^N minimum and
+maximum chunk size. Required: CHUNK_MIN_EXP < CHUNK_MAX_EXP.
+
+Defaults: 10 (2^10 == 1KiB) minimum, 23 (2^23 == 8MiB) maximum.
+
+HASH_MASK_BITS is the number of least-significant bits of the rolling hash
+that need to be zero to trigger a chunk cut.
+Recommended: CHUNK_MIN_EXP + X <= HASH_MASK_BITS <= CHUNK_MAX_EXP - X, X >= 2
+(this allows the rolling hash some freedom to make its cut at a place
+determined by the windows contents rather than the min/max. chunk size).
+
+Default: 16 (statistically, chunks will be about 2^16 == 64kiB in size)
+
+HASH_WINDOW_SIZE: the size of the window used for the rolling hash computation.
+Default: 4095B
+
+
+Trying it out
+=============
+
+I backed up a VM directory to demonstrate how different chunker parameters
+influence repo size, index size / chunk count, compression, deduplication.
+
+repo-sm: ~64kiB chunks (16 bits chunk mask), min chunk size 1kiB (2^10B)
+         (these are attic / borg 0.23 internal defaults)
+
+repo-lg: ~1MiB chunks (20 bits chunk mask), min chunk size 64kiB (2^16B)
+
+repo-xl: 8MiB chunks (2^23B max chunk size), min chunk size 64kiB (2^16B).
+         The chunk mask bits was set to 31, so it (almost) never triggers.
+         This degrades the rolling hash based dedup to a fixed-offset dedup
+         as the cutting point is now (almost) always the end of the buffer
+         (at 2^23B == 8MiB).
+
+The repo index size is an indicator for the RAM needs of Borg.
+In this special case, the total RAM needs are about 2.1x the repo index size.
+You see index size of repo-sm is 16x larger than of repo-lg, which corresponds
+to the ratio of the different target chunk sizes.
+
+Note: RAM needs were not a problem in this specific case (37GB data size).
+      But just imagine, you have 37TB of such data and much less than 42GB RAM,
+      then you'ld definitely want the "lg" chunker params so you only need
+      2.6GB RAM. Or even bigger chunks than shown for "lg" (see "xl").
+
+You also see compression works better for larger chunks, as expected.
+Duplication works worse for larger chunks, also as expected.
+
+small chunks
+============
+
+$ borg info /extra/repo-sm::1
+
+Command line: /home/tw/w/borg-env/bin/borg create --chunker-params 10,23,16,4095 /extra/repo-sm::1 /home/tw/win
+Number of files: 3
+
+                       Original size      Compressed size    Deduplicated size
+This archive:               37.12 GB             14.81 GB             12.18 GB
+All archives:               37.12 GB             14.81 GB             12.18 GB
+
+                       Unique chunks         Total chunks
+Chunk index:                  378374               487316
+
+$ ls -l /extra/repo-sm/index*
+
+-rw-rw-r-- 1 tw tw 20971538 Jun 20 23:39 index.2308
+
+$ du -sk /extra/repo-sm
+11930840    /extra/repo-sm
+
+large chunks
+============
+
+$ borg info /extra/repo-lg::1
+
+Command line: /home/tw/w/borg-env/bin/borg create --chunker-params 16,23,20,4095 /extra/repo-lg::1 /home/tw/win
+Number of files: 3
+
+                       Original size      Compressed size    Deduplicated size
+This archive:               37.10 GB             14.60 GB             13.38 GB
+All archives:               37.10 GB             14.60 GB             13.38 GB
+
+                       Unique chunks         Total chunks
+Chunk index:                   25889                29349
+
+$ ls -l /extra/repo-lg/index*
+
+-rw-rw-r-- 1 tw tw 1310738 Jun 20 23:10 index.2264
+
+$ du -sk /extra/repo-lg
+13073928    /extra/repo-lg
+
+xl chunks
+=========
+
+(borg-env)tw@tux:~/w/borg$ borg info /extra/repo-xl::1
+Command line: /home/tw/w/borg-env/bin/borg create --chunker-params 16,23,31,4095 /extra/repo-xl::1 /home/tw/win
+Number of files: 3
+
+                       Original size      Compressed size    Deduplicated size
+This archive:               37.10 GB             14.59 GB             14.59 GB
+All archives:               37.10 GB             14.59 GB             14.59 GB
+
+                       Unique chunks         Total chunks
+Chunk index:                    4319                 4434
+
+$ ls -l /extra/repo-xl/index*
+-rw-rw-r-- 1 tw tw 327698 Jun 21 00:52 index.2011
+
+$ du -sk /extra/repo-xl/
+14253464    /extra/repo-xl/
+

+ 130 - 0
docs/misc/create_compression.txt

@@ -0,0 +1,130 @@
+data compression
+================
+
+borg create --compression N repo::archive data
+
+Currently, borg only supports zlib compression. There are plans to expand this
+to other, faster or better compression algorithms in the future.
+
+N == 0 -> zlib level 0 == very quick, no compression
+N == 1 -> zlib level 1 == quick, low compression
+...
+N == 9 -> zlib level 9 == slow, high compression
+
+Measurements made on a Haswell Ultrabook, SSD storage, Linux.
+
+
+Example 1: lots of relatively small text files (linux kernel src)
+-----------------------------------------------------------------
+
+N == 1 does a good job here, it saves the additional time needed for
+compression because it needs to store less into storage (see N == 0).
+
+N == 6 is also quite ok, a little slower, a little less repo size.
+6 was the old default of borg.
+
+High compression levels only give a little more compression, but take a lot
+of cpu time.
+
+$ borg create --stats --compression 0
+------------------------------------------------------------------------------ 
+Duration: 50.40 seconds
+Number of files: 72890
+
+                       Original size      Compressed size    Deduplicated size
+This archive:                1.17 GB              1.18 GB              1.01 GB
+
+                       Unique chunks         Total chunks
+Chunk index:                   70263                82309
+------------------------------------------------------------------------------ 
+
+$ borg create --stats --compression 1
+------------------------------------------------------------------------------ 
+Duration: 49.29 seconds
+Number of files: 72890
+
+                       Original size      Compressed size    Deduplicated size
+This archive:                1.17 GB            368.62 MB            295.22 MB
+
+                       Unique chunks         Total chunks
+Chunk index:                   70280                82326
+------------------------------------------------------------------------------
+
+$ borg create --stats --compression 5
+------------------------------------------------------------------------------ 
+Duration: 59.99 seconds
+Number of files: 72890
+
+                       Original size      Compressed size    Deduplicated size
+This archive:                1.17 GB            331.70 MB            262.20 MB
+
+                       Unique chunks         Total chunks
+Chunk index:                   70290                82336
+------------------------------------------------------------------------------
+
+$ borg create --stats --compression 6
+------------------------------------------------------------------------------ 
+Duration: 1 minutes 13.64 seconds
+Number of files: 72890
+
+                       Original size      Compressed size    Deduplicated size
+This archive:                1.17 GB            328.79 MB            259.56 MB
+
+                       Unique chunks         Total chunks
+Chunk index:                   70279                82325
+------------------------------------------------------------------------------
+
+$ borg create --stats --compression 9
+------------------------------------------------------------------------------
+Duration: 3 minutes 1.58 seconds
+Number of files: 72890
+
+                       Original size      Compressed size    Deduplicated size
+This archive:                1.17 GB            326.57 MB            257.57 MB
+
+                       Unique chunks         Total chunks
+Chunk index:                   70292                82338
+------------------------------------------------------------------------------
+
+
+Example 2: large VM disk file (sparse file)
+-------------------------------------------
+
+The file's directory size is 80GB, but a lot of it is sparse (and reads as
+zeros).
+
+$ borg create --stats --compression 0
+------------------------------------------------------------------------------
+Duration: 13 minutes 48.47 seconds
+Number of files: 1
+
+                       Original size      Compressed size    Deduplicated size
+This archive:               80.54 GB             80.55 GB             10.87 GB
+
+                       Unique chunks         Total chunks
+Chunk index:                  147307               177109
+------------------------------------------------------------------------------
+
+$ borg create --stats --compression 1
+------------------------------------------------------------------------------
+Duration: 15 minutes 31.34 seconds
+Number of files: 1
+
+                       Original size      Compressed size    Deduplicated size
+This archive:               80.54 GB              6.68 GB              5.67 GB
+
+                       Unique chunks         Total chunks
+Chunk index:                  147309               177111
+------------------------------------------------------------------------------
+
+$ borg create --stats --compression 6
+------------------------------------------------------------------------------
+Duration: 18 minutes 57.54 seconds
+Number of files: 1
+
+                       Original size      Compressed size    Deduplicated size
+This archive:               80.54 GB              6.19 GB              5.44 GB
+
+                       Unique chunks         Total chunks
+Chunk index:                  147307               177109
+------------------------------------------------------------------------------

+ 33 - 29
docs/quickstart.rst

@@ -13,16 +13,16 @@ A step by step example
 
 
 1. Before a backup can be made a repository has to be initialized::
 1. Before a backup can be made a repository has to be initialized::
 
 
-    $ attic init /somewhere/my-repository.attic
+    $ borg init /mnt/backup
 
 
 2. Backup the ``~/src`` and ``~/Documents`` directories into an archive called
 2. Backup the ``~/src`` and ``~/Documents`` directories into an archive called
    *Monday*::
    *Monday*::
 
 
-    $ attic create /somewhere/my-repository.attic::Monday ~/src ~/Documents
+    $ borg create /mnt/backup::Monday ~/src ~/Documents
 
 
 3. The next day create a new archive called *Tuesday*::
 3. The next day create a new archive called *Tuesday*::
 
 
-    $ attic create --stats /somewhere/my-repository.attic::Tuesday ~/src ~/Documents
+    $ borg create --stats /mnt/backup::Tuesday ~/src ~/Documents
 
 
    This backup will be a lot quicker and a lot smaller since only new never
    This backup will be a lot quicker and a lot smaller since only new never
    before seen data is stored. The ``--stats`` option causes |project_name| to
    before seen data is stored. The ``--stats`` option causes |project_name| to
@@ -42,44 +42,43 @@ A step by step example
 
 
 4. List all archives in the repository::
 4. List all archives in the repository::
 
 
-    $ attic list /somewhere/my-repository.attic
+    $ borg list /mnt/backup
     Monday                               Mon Mar 24 11:59:35 2014
     Monday                               Mon Mar 24 11:59:35 2014
     Tuesday                              Tue Mar 25 12:00:10 2014
     Tuesday                              Tue Mar 25 12:00:10 2014
 
 
 5. List the contents of the *Monday* archive::
 5. List the contents of the *Monday* archive::
 
 
-    $ attic list /somewhere/my-repository.attic::Monday
+    $ borg list /mnt/backup::Monday
     drwxr-xr-x user  group         0 Jan 06 15:22 home/user/Documents
     drwxr-xr-x user  group         0 Jan 06 15:22 home/user/Documents
     -rw-r--r-- user  group      7961 Nov 17  2012 home/user/Documents/Important.doc
     -rw-r--r-- user  group      7961 Nov 17  2012 home/user/Documents/Important.doc
     ...
     ...
 
 
 6. Restore the *Monday* archive::
 6. Restore the *Monday* archive::
 
 
-    $ attic extract /somwhere/my-repository.attic::Monday
+    $ borg extract /mnt/backup::Monday
 
 
 7. Recover disk space by manually deleting the *Monday* archive::
 7. Recover disk space by manually deleting the *Monday* archive::
 
 
-    $ attic delete /somwhere/my-backup.attic::Monday
+    $ borg delete /mnt/backup::Monday
 
 
 .. Note::
 .. Note::
-    Attic is quiet by default. Add the ``-v`` or ``--verbose`` option to
+    Borg is quiet by default. Add the ``-v`` or ``--verbose`` option to
     get progress reporting during command execution.
     get progress reporting during command execution.
 
 
 Automating backups
 Automating backups
 ------------------
 ------------------
 
 
-The following example script backs up ``/home`` and
-``/var/www`` to a remote server. The script also uses the
-:ref:`attic_prune` subcommand to maintain a certain number
-of old archives::
+The following example script backs up ``/home`` and ``/var/www`` to a remote
+server. The script also uses the :ref:`borg_prune` subcommand to maintain a
+certain number of old archives::
 
 
     #!/bin/sh
     #!/bin/sh
-    REPOSITORY=username@remoteserver.com:repository.attic
+    REPOSITORY=username@remoteserver.com:backup
 
 
     # Backup all of /home and /var/www except a few
     # Backup all of /home and /var/www except a few
     # excluded directories
     # excluded directories
-    attic create --stats                            \
-        $REPOSITORY::hostname-`date +%Y-%m-%d`      \
+    borg create --stats                             \
+        $REPOSITORY::`hostname`-`date +%Y-%m-%d`    \
         /home                                       \
         /home                                       \
         /var/www                                    \
         /var/www                                    \
         --exclude /home/*/.cache                    \
         --exclude /home/*/.cache                    \
@@ -88,7 +87,7 @@ of old archives::
 
 
     # Use the `prune` subcommand to maintain 7 daily, 4 weekly
     # Use the `prune` subcommand to maintain 7 daily, 4 weekly
     # and 6 monthly archives.
     # and 6 monthly archives.
-    attic prune -v $REPOSITORY --keep-daily=7 --keep-weekly=4 --keep-monthly=6
+    borg prune -v $REPOSITORY --keep-daily=7 --keep-weekly=4 --keep-monthly=6
 
 
 .. _encrypted_repos:
 .. _encrypted_repos:
 
 
@@ -97,13 +96,13 @@ Repository encryption
 
 
 Repository encryption is enabled at repository creation time::
 Repository encryption is enabled at repository creation time::
 
 
-    $ attic init --encryption=passphrase|keyfile PATH
+    $ borg init --encryption=passphrase|keyfile PATH
 
 
 When repository encryption is enabled all data is encrypted using 256-bit AES_
 When repository encryption is enabled all data is encrypted using 256-bit AES_
 encryption and the integrity and authenticity is verified using `HMAC-SHA256`_.
 encryption and the integrity and authenticity is verified using `HMAC-SHA256`_.
 
 
 All data is encrypted before being written to the repository. This means that
 All data is encrypted before being written to the repository. This means that
-an attacker that manages to compromise the host containing an encrypted
+an attacker who manages to compromise the host containing an encrypted
 archive will not be able to access any of the data.
 archive will not be able to access any of the data.
 
 
 |project_name| supports two different methods to derive the AES and HMAC keys.
 |project_name| supports two different methods to derive the AES and HMAC keys.
@@ -116,11 +115,11 @@ Passphrase based encryption
 
 
     .. Note::
     .. Note::
         For automated backups the passphrase can be specified using the
         For automated backups the passphrase can be specified using the
-        `ATTIC_PASSPHRASE` environment variable.
+        `BORG_PASSPHRASE` environment variable.
 
 
 Key file based encryption
 Key file based encryption
     This method generates random keys at repository initialization time that
     This method generates random keys at repository initialization time that
-    are stored in a password protected file in the ``~/.attic/keys/`` directory.
+    are stored in a password protected file in the ``~/.borg/keys/`` directory.
     The key file is a printable text file. This method is secure and suitable
     The key file is a printable text file. This method is secure and suitable
     for automated backups.
     for automated backups.
 
 
@@ -138,20 +137,25 @@ Remote repositories
 host is accessible using SSH.  This is fastest and easiest when |project_name|
 host is accessible using SSH.  This is fastest and easiest when |project_name|
 is installed on the remote host, in which case the following syntax is used::
 is installed on the remote host, in which case the following syntax is used::
 
 
-  $ attic init user@hostname:repository.attic
+  $ borg init user@hostname:/mnt/backup
 
 
 or::
 or::
 
 
-  $ attic init ssh://user@hostname:port/repository.attic
+  $ borg init ssh://user@hostname:port//mnt/backup
 
 
-If it is not possible to install |project_name| on the remote host, 
+Remote operations over SSH can be automated with SSH keys. You can restrict the
+use of the SSH keypair by prepending a forced command to the SSH public key in
+the remote server's authorized_keys file. Only the forced command will be run
+when the key authenticates a connection. This example will start |project_name| in server
+mode, and limit the |project_name| server to a specific filesystem path::
+
+  command="borg serve --restrict-to-path /mnt/backup" ssh-rsa AAAAB3[...]
+
+If it is not possible to install |project_name| on the remote host,
 it is still possible to use the remote host to store a repository by
 it is still possible to use the remote host to store a repository by
 mounting the remote filesystem, for example, using sshfs::
 mounting the remote filesystem, for example, using sshfs::
 
 
-  $ sshfs user@hostname:/path/to/folder /tmp/mymountpoint
-  $ attic init /tmp/mymountpoint/repository.attic
-  $ fusermount -u /tmp/mymountpoint
+  $ sshfs user@hostname:/mnt /mnt
+  $ borg init /mnt/backup
+  $ fusermount -u /mnt
 
 
-However, be aware that sshfs doesn't fully implement POSIX locks, so
-you must be sure to not have two processes trying to access the same
-repository at the same time.

+ 34 - 0
docs/support.rst

@@ -0,0 +1,34 @@
+.. include:: global.rst.inc
+.. _support:
+
+Support
+=======
+
+Issue Tracker
+-------------
+
+If you've found a bug or have a concrete feature request, please create a new
+ticket on the project's `issue tracker`_ (after checking whether someone else
+already has reported the same thing).
+
+For more general questions or discussions, IRC or mailing list are preferred.
+
+IRC
+---
+Join us on channel #borgbackup on chat.freenode.net.
+
+As usual on IRC, just ask or tell directly and then patiently wait for replies.
+Stay connected.
+
+Mailing list
+------------
+
+There is a mailing list for Borg on librelist_ that you can use for feature
+requests and general discussions about Borg. A mailing list archive is
+available `here <http://librelist.com/browser/borgbackup/>`_.
+
+To subscribe to the list, send an email to borgbackup@librelist.com and reply
+to the confirmation mail.
+
+To unsubscribe, send an email to borgbackup-unsubscribe@librelist.com and reply
+to the confirmation mail.

+ 6 - 6
docs/update_usage.sh

@@ -2,12 +2,12 @@
 if [ ! -d usage ]; then
 if [ ! -d usage ]; then
   mkdir usage
   mkdir usage
 fi
 fi
-for cmd in change-passphrase check create delete extract info init list mount prune; do
+for cmd in change-passphrase check create delete extract info init list mount prune serve; do
   FILENAME="usage/$cmd.rst.inc"
   FILENAME="usage/$cmd.rst.inc"
-  LINE=`echo -n attic $cmd | tr 'a-z- ' '-'`
-  echo -e ".. _attic_$cmd:\n" > $FILENAME
-  echo -e "attic $cmd\n$LINE\n::\n\n" >> $FILENAME
-  attic help $cmd --usage-only | sed -e 's/^/    /' >> $FILENAME
+  LINE=`echo -n borg $cmd | tr 'a-z- ' '-'`
+  echo -e ".. _borg_$cmd:\n" > $FILENAME
+  echo -e "borg $cmd\n$LINE\n::\n\n" >> $FILENAME
+  borg help $cmd --usage-only | sed -e 's/^/    /' >> $FILENAME
   echo -e "\nDescription\n~~~~~~~~~~~\n" >> $FILENAME
   echo -e "\nDescription\n~~~~~~~~~~~\n" >> $FILENAME
-  attic help $cmd --epilog-only >> $FILENAME
+  borg help $cmd --epilog-only >> $FILENAME
 done
 done

+ 170 - 29
docs/usage.rst

@@ -15,6 +15,90 @@ Like most UNIX commands |project_name| is quiet by default but the ``-v`` or
 ``--verbose`` option can be used to get the program to output more status
 ``--verbose`` option can be used to get the program to output more status
 messages as it is processing.
 messages as it is processing.
 
 
+Return codes
+------------
+
+|project_name| can exit with the following return codes (rc):
+
+::
+
+    0      no error, normal termination
+    1      some error occurred (this can be a complete or a partial failure)
+    128+N  killed by signal N (e.g. 137 == kill -9)
+
+
+Note: we are aware that more distinct return codes might be useful, but it is
+not clear yet which return codes should be used for which precise conditions.
+
+See issue #61 for a discussion about that. Depending on the outcome of the
+discussion there, return codes may change in future (the only thing rather sure
+is that 0 will always mean some sort of success and "not 0" will always mean
+some sort of warning / error / failure - but the definition of success might
+change).
+
+Environment Variables
+---------------------
+
+|project_name| uses some environment variables for automation:
+
+::
+
+    Specifying a passphrase:
+        BORG_PASSPHRASE : When set, use the value to answer the passphrase question for encrypted repositories.
+
+    Some "yes" sayers (if set, they automatically confirm that you really want to do X even if there is that warning):
+        BORG_UNKNOWN_UNENCRYPTED_REPO_ACCESS_IS_OK : For "Warning: Attempting to access a previously unknown unencrypted repository"
+        BORG_RELOCATED_REPO_ACCESS_IS_OK : For "Warning: The repository at location ... was previously located at ..."
+        BORG_CHECK_I_KNOW_WHAT_I_AM_DOING : For "Warning: 'check --repair' is an experimental feature that might result in data loss."
+
+    Directories:
+        BORG_KEYS_DIR : Default to '~/.borg/keys'. This directory contains keys for encrypted repositories.
+        BORG_CACHE_DIR : Default to '~/.cache/borg'. This directory contains the local cache and might need a lot
+                         of space for dealing with big repositories).
+
+    Building:
+        BORG_OPENSSL_PREFIX : Adds given OpenSSL header file directory to the default locations (setup.py).
+
+    General:
+        TMPDIR : where temporary files are stored (might need a lot of temporary space for some operations)
+
+
+Please note:
+
+- be very careful when using the "yes" sayers, the warnings with prompt exist for your / your data's security/safety
+- also be very careful when putting your passphrase into a script, make sure it has appropriate file permissions
+  (e.g. mode 600, root:root).
+
+
+Resource Usage
+--------------
+
+|project_name| might use a lot of resources depending on the size of the data set it is dealing with.
+
+CPU: it won't go beyond 100% of 1 core as the code is currently single-threaded.
+
+Memory (RAM): the chunks index and the files index are read into memory for performance reasons.
+
+Temporary files: reading data and metadata from a FUSE mounted repository will consume about the same space as the
+                 deduplicated chunks used to represent them in the repository.
+
+Cache files: chunks index and files index (plus a compressed collection of single-archive chunk indexes).
+
+Chunks index: proportional to the amount of data chunks in your repo. lots of small chunks in your repo implies a big
+              chunks index. you may need to tweak the chunker params (see create options) if you have a lot of data and
+              you want to keep the chunks index at some reasonable size.
+
+Files index: proportional to the amount of files in your last backup. can be switched off (see create options), but
+             next backup will be much slower if you do.
+
+Network: if your repository is remote, all deduplicated (and optionally compressed/encrypted) data of course has to go
+         over the connection (ssh: repo url). if you use a locally mounted network filesystem, additionally some copy
+         operations used for transaction support also go over the connection. if you backup multiple sources to one
+         target repository, additional traffic happens for cache resynchronization.
+
+In case you are interested in more details, please read the internals documentation.
+
+
 .. include:: usage/init.rst.inc
 .. include:: usage/init.rst.inc
 
 
 Examples
 Examples
@@ -22,13 +106,48 @@ Examples
 ::
 ::
 
 
     # Local repository
     # Local repository
-    $ attic init /data/mybackuprepo.attic
+    $ borg init /mnt/backup
+
+    # Remote repository (accesses a remote borg via ssh)
+    $ borg init user@hostname:backup
+
+    # Encrypted remote repository, store the key in the repo
+    $ borg init --encryption=repokey user@hostname:backup
+
+    # Encrypted remote repository, store the key your home dir
+    $ borg init --encryption=keyfile user@hostname:backup
+
+Important notes about encryption:
+
+Use encryption! Repository encryption protects you e.g. against the case that
+an attacker has access to your backup repository.
+
+But be careful with the key / the passphrase:
+
+``--encryption=passphrase`` is DEPRECATED and will be removed in next major release.
+This mode has very fundamental, unfixable problems (like you can never change
+your passphrase or the pbkdf2 iteration count for an existing repository, because
+the encryption / decryption key is directly derived from the passphrase).
 
 
-    # Remote repository
-    $ attic init user@hostname:mybackuprepo.attic
+If you want "passphrase-only" security, just use the ``repokey`` mode. The key will
+be stored inside the repository (in its "config" file). In above mentioned
+attack scenario, the attacker will have the key (but not the passphrase).
 
 
-    # Encrypted remote repository
-    $ attic init --encryption=passphrase user@hostname:mybackuprepo.attic
+If you want "passphrase and having-the-key" security, use the ``keyfile`` mode.
+The key will be stored in your home directory (in ``.borg/keys``). In the attack
+scenario, the attacker who has just access to your repo won't have the key (and
+also not the passphrase).
+
+Make a backup copy of the key file (``keyfile`` mode) or repo config file
+(``repokey`` mode) and keep it at a safe place, so you still have the key in
+case it gets corrupted or lost.
+The backup that is encrypted with that key won't help you with that, of course.
+
+Make sure you use a good passphrase. Not too short, not too simple. The real
+encryption / decryption key is encrypted with / locked by your passphrase.
+If an attacker gets your key, he can't unlock and use it without knowing the
+passphrase. In ``repokey`` and ``keyfile`` modes, you can change your passphrase
+for existing repos.
 
 
 
 
 .. include:: usage/create.rst.inc
 .. include:: usage/create.rst.inc
@@ -38,17 +157,23 @@ Examples
 ::
 ::
 
 
     # Backup ~/Documents into an archive named "my-documents"
     # Backup ~/Documents into an archive named "my-documents"
-    $ attic create /data/myrepo.attic::my-documents ~/Documents
+    $ borg create /mnt/backup::my-documents ~/Documents
 
 
     # Backup ~/Documents and ~/src but exclude pyc files
     # Backup ~/Documents and ~/src but exclude pyc files
-    $ attic create /data/myrepo.attic::my-files   \
-        ~/Documents                               \
-        ~/src                                     \
+    $ borg create /mnt/backup::my-files   \
+        ~/Documents                       \
+        ~/src                             \
         --exclude '*.pyc'
         --exclude '*.pyc'
 
 
     # Backup the root filesystem into an archive named "root-YYYY-MM-DD"
     # Backup the root filesystem into an archive named "root-YYYY-MM-DD"
     NAME="root-`date +%Y-%m-%d`"
     NAME="root-`date +%Y-%m-%d`"
-    $ attic create /data/myrepo.attic::$NAME / --do-not-cross-mountpoints
+    $ borg create /mnt/backup::$NAME / --do-not-cross-mountpoints
+
+    # Backup huge files with little chunk management overhead
+    $ borg create --chunker-params 19,23,21,4095 /mnt/backup::VMs /srv/VMs
+
+    # Backup a raw device (must not be active/in use/mounted at that time)
+    $ dd if=/dev/sda bs=10M | borg create /mnt/backup::my-sda -
 
 
 
 
 .. include:: usage/extract.rst.inc
 .. include:: usage/extract.rst.inc
@@ -58,16 +183,19 @@ Examples
 ::
 ::
 
 
     # Extract entire archive
     # Extract entire archive
-    $ attic extract /data/myrepo::my-files
+    $ borg extract /mnt/backup::my-files
 
 
     # Extract entire archive and list files while processing
     # Extract entire archive and list files while processing
-    $ attic extract -v /data/myrepo::my-files
+    $ borg extract -v /mnt/backup::my-files
 
 
     # Extract the "src" directory
     # Extract the "src" directory
-    $ attic extract /data/myrepo::my-files home/USERNAME/src
+    $ borg extract /mnt/backup::my-files home/USERNAME/src
 
 
     # Extract the "src" directory but exclude object files
     # Extract the "src" directory but exclude object files
-    $ attic extract /data/myrepo::my-files home/USERNAME/src --exclude '*.o'
+    $ borg extract /mnt/backup::my-files home/USERNAME/src --exclude '*.o'
+
+Note: currently, extract always writes into the current working directory ("."),
+      so make sure you ``cd`` to the right place before calling ``borg extract``.
 
 
 .. include:: usage/check.rst.inc
 .. include:: usage/check.rst.inc
 
 
@@ -79,14 +207,14 @@ Examples
 ~~~~~~~~
 ~~~~~~~~
 ::
 ::
 
 
-    $ attic list /data/myrepo
+    $ borg list /mnt/backup
     my-files            Thu Aug  1 23:33:22 2013
     my-files            Thu Aug  1 23:33:22 2013
     my-documents        Thu Aug  1 23:35:43 2013
     my-documents        Thu Aug  1 23:35:43 2013
     root-2013-08-01     Thu Aug  1 23:43:55 2013
     root-2013-08-01     Thu Aug  1 23:43:55 2013
     root-2013-08-02     Fri Aug  2 15:18:17 2013
     root-2013-08-02     Fri Aug  2 15:18:17 2013
     ...
     ...
 
 
-    $ attic list /data/myrepo::root-2013-08-02
+    $ borg list /mnt/backup::root-2013-08-02
     drwxr-xr-x root   root          0 Jun 05 12:06 .
     drwxr-xr-x root   root          0 Jun 05 12:06 .
     lrwxrwxrwx root   root          0 May 31 20:40 bin -> usr/bin
     lrwxrwxrwx root   root          0 May 31 20:40 bin -> usr/bin
     drwxr-xr-x root   root          0 Aug 01 22:08 etc
     drwxr-xr-x root   root          0 Aug 01 22:08 etc
@@ -102,18 +230,18 @@ Examples
 ::
 ::
 
 
     # Keep 7 end of day and 4 additional end of week archives:
     # Keep 7 end of day and 4 additional end of week archives:
-    $ attic prune /data/myrepo --keep-daily=7 --keep-weekly=4
+    $ borg prune /mnt/backup --keep-daily=7 --keep-weekly=4
 
 
     # Same as above but only apply to archive names starting with "foo":
     # Same as above but only apply to archive names starting with "foo":
-    $ attic prune /data/myrepo --keep-daily=7 --keep-weekly=4 --prefix=foo
+    $ borg prune /mnt/backup --keep-daily=7 --keep-weekly=4 --prefix=foo
 
 
     # Keep 7 end of day, 4 additional end of week archives,
     # Keep 7 end of day, 4 additional end of week archives,
     # and an end of month archive for every month:
     # and an end of month archive for every month:
-    $ attic prune /data/myrepo --keep-daily=7 --keep-weekly=4 --keep-monthly=-1
+    $ borg prune /mnt/backup --keep-daily=7 --keep-weekly=4 --keep-monthly=-1
 
 
     # Keep all backups in the last 10 days, 4 additional end of week archives,
     # Keep all backups in the last 10 days, 4 additional end of week archives,
     # and an end of month archive for every month:
     # and an end of month archive for every month:
-    $ attic prune /data/myrepo --keep-within=10d --keep-weekly=4 --keep-monthly=-1
+    $ borg prune /mnt/backup --keep-within=10d --keep-weekly=4 --keep-monthly=-1
 
 
 
 
 .. include:: usage/info.rst.inc
 .. include:: usage/info.rst.inc
@@ -122,13 +250,13 @@ Examples
 ~~~~~~~~
 ~~~~~~~~
 ::
 ::
 
 
-    $ attic info /data/myrepo::root-2013-08-02
+    $ borg info /mnt/backup::root-2013-08-02
     Name: root-2013-08-02
     Name: root-2013-08-02
     Fingerprint: bc3902e2c79b6d25f5d769b335c5c49331e6537f324d8d3badcb9a0917536dbb
     Fingerprint: bc3902e2c79b6d25f5d769b335c5c49331e6537f324d8d3badcb9a0917536dbb
     Hostname: myhostname
     Hostname: myhostname
     Username: root
     Username: root
     Time: Fri Aug  2 15:18:17 2013
     Time: Fri Aug  2 15:18:17 2013
-    Command line: /usr/bin/attic create --stats /data/myrepo::root-2013-08-02 / --do-not-cross-mountpoints
+    Command line: /usr/bin/borg create --stats /mnt/backup::root-2013-08-02 / --do-not-cross-mountpoints
     Number of files: 147429
     Number of files: 147429
     Original size: 5344169493 (4.98 GB)
     Original size: 5344169493 (4.98 GB)
     Compressed size: 1748189642 (1.63 GB)
     Compressed size: 1748189642 (1.63 GB)
@@ -141,7 +269,7 @@ Examples
 ~~~~~~~~
 ~~~~~~~~
 ::
 ::
 
 
-    $ attic mount /data/myrepo::root-2013-08-02 /tmp/mymountpoint
+    $ borg mount /mnt/backup::root-2013-08-02 /tmp/mymountpoint
     $ ls /tmp/mymountpoint
     $ ls /tmp/mymountpoint
     bin  boot  etc  lib  lib64  mnt  opt  root  sbin  srv  usr  var
     bin  boot  etc  lib  lib64  mnt  opt  root  sbin  srv  usr  var
     $ fusermount -u /tmp/mymountpoint
     $ fusermount -u /tmp/mymountpoint
@@ -154,16 +282,29 @@ Examples
 ::
 ::
 
 
     # Create a key file protected repository
     # Create a key file protected repository
-    $ attic init --encryption=keyfile /tmp/encrypted-repo
-    Initializing repository at "/tmp/encrypted-repo"
+    $ borg init --encryption=keyfile /mnt/backup
+    Initializing repository at "/mnt/backup"
     Enter passphrase (empty for no passphrase):
     Enter passphrase (empty for no passphrase):
     Enter same passphrase again: 
     Enter same passphrase again: 
-    Key file "/home/USER/.attic/keys/tmp_encrypted_repo" created.
+    Key file "/home/USER/.borg/keys/mnt_backup" created.
     Keep this file safe. Your data will be inaccessible without it.
     Keep this file safe. Your data will be inaccessible without it.
 
 
     # Change key file passphrase
     # Change key file passphrase
-    $ attic change-passphrase /tmp/encrypted-repo
-    Enter passphrase for key file /home/USER/.attic/keys/tmp_encrypted_repo: 
+    $ borg change-passphrase /mnt/backup
+    Enter passphrase for key file /home/USER/.borg/keys/mnt_backup:
     New passphrase: 
     New passphrase: 
     Enter same passphrase again: 
     Enter same passphrase again: 
-    Key file "/home/USER/.attic/keys/tmp_encrypted_repo" updated
+    Key file "/home/USER/.borg/keys/mnt_backup" updated
+
+
+.. include:: usage/serve.rst.inc
+
+Examples
+~~~~~~~~
+::
+
+    # Allow an SSH keypair to only run |project_name|, and only have access to /mnt/backup.
+    # This will help to secure an automated remote backup system.
+    $ cat ~/.ssh/authorized_keys
+    command="borg serve --restrict-to-path /mnt/backup" ssh-rsa AAAAB3[...]
+

+ 5 - 0
requirements.d/development.txt

@@ -0,0 +1,5 @@
+tox
+mock
+pytest
+pytest-cov<2.0.0
+Cython

+ 0 - 4
scripts/attic

@@ -1,4 +0,0 @@
-#!/usr/bin/env python
-from attic.archiver import main
-main()
-

+ 8 - 0
setup.cfg

@@ -0,0 +1,8 @@
+[pytest]
+python_files = testsuite/*.py
+
+[flake8]
+ignore = E226,F403
+max-line-length = 250
+exclude = versioneer.py,docs/conf.py,borg/_version.py,build,dist,.git,.idea,.cache
+max-complexity = 100

+ 54 - 38
setup.py

@@ -4,27 +4,27 @@ import sys
 from glob import glob
 from glob import glob
 
 
 import versioneer
 import versioneer
-versioneer.versionfile_source = 'attic/_version.py'
-versioneer.versionfile_build = 'attic/_version.py'
+versioneer.VCS = 'git'
+versioneer.style = 'pep440'
+versioneer.versionfile_source = 'borg/_version.py'
+versioneer.versionfile_build = 'borg/_version.py'
 versioneer.tag_prefix = ''
 versioneer.tag_prefix = ''
-versioneer.parentdir_prefix = 'Attic-' # dirname like 'myproject-1.2.0'
+versioneer.parentdir_prefix = 'borgbackup-'  # dirname like 'myproject-1.2.0'
 
 
 min_python = (3, 2)
 min_python = (3, 2)
 if sys.version_info < min_python:
 if sys.version_info < min_python:
-    print("Attic requires Python %d.%d or later" % min_python)
+    print("Borg requires Python %d.%d or later" % min_python)
     sys.exit(1)
     sys.exit(1)
 
 
-try:
-    from setuptools import setup, Extension
-except ImportError:
-    from distutils.core import setup, Extension
 
 
-crypto_source = 'attic/crypto.pyx'
-chunker_source = 'attic/chunker.pyx'
-hashindex_source = 'attic/hashindex.pyx'
-platform_linux_source = 'attic/platform_linux.pyx'
-platform_darwin_source = 'attic/platform_darwin.pyx'
-platform_freebsd_source = 'attic/platform_freebsd.pyx'
+from setuptools import setup, Extension
+
+crypto_source = 'borg/crypto.pyx'
+chunker_source = 'borg/chunker.pyx'
+hashindex_source = 'borg/hashindex.pyx'
+platform_linux_source = 'borg/platform_linux.pyx'
+platform_darwin_source = 'borg/platform_darwin.pyx'
+platform_freebsd_source = 'borg/platform_freebsd.pyx'
 
 
 try:
 try:
     from Cython.Distutils import build_ext
     from Cython.Distutils import build_ext
@@ -32,14 +32,20 @@ try:
 
 
     class Sdist(versioneer.cmd_sdist):
     class Sdist(versioneer.cmd_sdist):
         def __init__(self, *args, **kwargs):
         def __init__(self, *args, **kwargs):
-            for src in glob('attic/*.pyx'):
-                cython_compiler.compile(glob('attic/*.pyx'),
-                                        cython_compiler.default_options)
+            for src in glob('borg/*.pyx'):
+                cython_compiler.compile(src, cython_compiler.default_options)
             versioneer.cmd_sdist.__init__(self, *args, **kwargs)
             versioneer.cmd_sdist.__init__(self, *args, **kwargs)
 
 
         def make_distribution(self):
         def make_distribution(self):
-            self.filelist.extend(['attic/crypto.c', 'attic/chunker.c', 'attic/_chunker.c', 'attic/hashindex.c', 'attic/_hashindex.c', 'attic/platform_linux.c', 'attic/platform_freebsd.c', 'attic/platform_darwin.c'])
-            super(Sdist, self).make_distribution()
+            self.filelist.extend([
+                'borg/crypto.c',
+                'borg/chunker.c', 'borg/_chunker.c',
+                'borg/hashindex.c', 'borg/_hashindex.c',
+                'borg/platform_linux.c',
+                'borg/platform_freebsd.c',
+                'borg/platform_darwin.c',
+            ])
+            super().make_distribution()
 
 
 except ImportError:
 except ImportError:
     class Sdist(versioneer.cmd_sdist):
     class Sdist(versioneer.cmd_sdist):
@@ -54,7 +60,7 @@ except ImportError:
     platform_darwin_source = platform_darwin_source.replace('.pyx', '.c')
     platform_darwin_source = platform_darwin_source.replace('.pyx', '.c')
     from distutils.command.build_ext import build_ext
     from distutils.command.build_ext import build_ext
     if not all(os.path.exists(path) for path in [crypto_source, chunker_source, hashindex_source, platform_linux_source, platform_freebsd_source]):
     if not all(os.path.exists(path) for path in [crypto_source, chunker_source, hashindex_source, platform_linux_source, platform_freebsd_source]):
-        raise ImportError('The GIT version of Attic needs Cython. Install Cython or use a released version')
+        raise ImportError('The GIT version of Borg needs Cython. Install Cython or use a released version')
 
 
 
 
 def detect_openssl(prefixes):
 def detect_openssl(prefixes):
@@ -66,9 +72,9 @@ def detect_openssl(prefixes):
                     return prefix
                     return prefix
 
 
 
 
-possible_openssl_prefixes = ['/usr', '/usr/local', '/usr/local/opt/openssl', '/usr/local/ssl', '/usr/local/openssl', '/usr/local/attic', '/opt/local']
-if os.environ.get('ATTIC_OPENSSL_PREFIX'):
-    possible_openssl_prefixes.insert(0, os.environ.get('ATTIC_OPENSSL_PREFIX'))
+possible_openssl_prefixes = ['/usr', '/usr/local', '/usr/local/opt/openssl', '/usr/local/ssl', '/usr/local/openssl', '/usr/local/borg', '/opt/local']
+if os.environ.get('BORG_OPENSSL_PREFIX'):
+    possible_openssl_prefixes.insert(0, os.environ.get('BORG_OPENSSL_PREFIX'))
 ssl_prefix = detect_openssl(possible_openssl_prefixes)
 ssl_prefix = detect_openssl(possible_openssl_prefixes)
 if not ssl_prefix:
 if not ssl_prefix:
     raise Exception('Unable to find OpenSSL >= 1.0 headers. (Looked here: {})'.format(', '.join(possible_openssl_prefixes)))
     raise Exception('Unable to find OpenSSL >= 1.0 headers. (Looked here: {})'.format(', '.join(possible_openssl_prefixes)))
@@ -83,27 +89,27 @@ cmdclass = versioneer.get_cmdclass()
 cmdclass.update({'build_ext': build_ext, 'sdist': Sdist})
 cmdclass.update({'build_ext': build_ext, 'sdist': Sdist})
 
 
 ext_modules = [
 ext_modules = [
-    Extension('attic.crypto', [crypto_source], libraries=['crypto'], include_dirs=include_dirs, library_dirs=library_dirs),
-    Extension('attic.chunker', [chunker_source]),
-    Extension('attic.hashindex', [hashindex_source])
+    Extension('borg.crypto', [crypto_source], libraries=['crypto'], include_dirs=include_dirs, library_dirs=library_dirs),
+    Extension('borg.chunker', [chunker_source]),
+    Extension('borg.hashindex', [hashindex_source])
 ]
 ]
 if sys.platform.startswith('linux'):
 if sys.platform.startswith('linux'):
-    ext_modules.append(Extension('attic.platform_linux', [platform_linux_source], libraries=['acl']))
+    ext_modules.append(Extension('borg.platform_linux', [platform_linux_source], libraries=['acl']))
 elif sys.platform.startswith('freebsd'):
 elif sys.platform.startswith('freebsd'):
-    ext_modules.append(Extension('attic.platform_freebsd', [platform_freebsd_source]))
+    ext_modules.append(Extension('borg.platform_freebsd', [platform_freebsd_source]))
 elif sys.platform == 'darwin':
 elif sys.platform == 'darwin':
-    ext_modules.append(Extension('attic.platform_darwin', [platform_darwin_source]))
+    ext_modules.append(Extension('borg.platform_darwin', [platform_darwin_source]))
 
 
 setup(
 setup(
-    name='Attic',
+    name='borgbackup',
     version=versioneer.get_version(),
     version=versioneer.get_version(),
-    author='Jonas Borgstrom',
-    author_email='jonas@borgstrom.se',
-    url='https://attic-backup.org/',
-    description='Deduplicated backups',
+    author='The Borg Collective (see AUTHORS file)',
+    author_email='borgbackup@librelist.com',
+    url='https://borgbackup.github.io/',
+    description='Deduplicated, encrypted, authenticated and compressed backups',
     long_description=long_description,
     long_description=long_description,
     license='BSD',
     license='BSD',
-    platforms=['Linux', 'MacOS X'],
+    platforms=['Linux', 'MacOS X', 'FreeBSD', ],
     classifiers=[
     classifiers=[
         'Development Status :: 4 - Beta',
         'Development Status :: 4 - Beta',
         'Environment :: Console',
         'Environment :: Console',
@@ -113,12 +119,22 @@ setup(
         'Operating System :: MacOS :: MacOS X',
         'Operating System :: MacOS :: MacOS X',
         'Operating System :: POSIX :: Linux',
         'Operating System :: POSIX :: Linux',
         'Programming Language :: Python',
         'Programming Language :: Python',
+        'Programming Language :: Python :: 3',
+        'Programming Language :: Python :: 3.2',
+        'Programming Language :: Python :: 3.3',
+        'Programming Language :: Python :: 3.4',
         'Topic :: Security :: Cryptography',
         'Topic :: Security :: Cryptography',
         'Topic :: System :: Archiving :: Backup',
         'Topic :: System :: Archiving :: Backup',
     ],
     ],
-    packages=['attic', 'attic.testsuite'],
-    scripts=['scripts/attic'],
+    packages=['borg', 'borg.testsuite'],
+    entry_points={
+        'console_scripts': [
+            'borg = borg.archiver:main',
+        ]
+    },
     cmdclass=cmdclass,
     cmdclass=cmdclass,
     ext_modules=ext_modules,
     ext_modules=ext_modules,
-    install_requires=['msgpack-python']
+    # msgpack pure python data corruption was fixed in 0.4.6.
+    # Also, we might use some rather recent API features.
+    install_requires=['msgpack-python>=0.4.6']
 )
 )

+ 10 - 6
tox.ini

@@ -1,10 +1,14 @@
+# tox configuration - if you change anything here, run this to verify:
+# fakeroot -u tox --recreate
+
 [tox]
 [tox]
 envlist = py32, py33, py34
 envlist = py32, py33, py34
 
 
 [testenv]
 [testenv]
-# Change dir to avoid import problem
-changedir = docs
-commands = {envpython} -m attic.testsuite.run -bv []
-
-[testenv:py32]
-deps = mock
+# Change dir to avoid import problem for cython code. The directory does
+# not really matter, should be just different from the toplevel dir.
+changedir = {toxworkdir}
+deps = -rrequirements.d/development.txt
+commands = py.test --cov=borg --pyargs {posargs:borg.testsuite}
+# fakeroot -u needs some env vars:
+passenv = *

文件差异内容过多而无法显示
+ 618 - 350
versioneer.py


部分文件因为文件数量过多而无法显示