Browse Source

Merge pull request #1890 from ThomasWaldmann/merge-1.0-maint

Merge 1.0-maint
enkore 8 years ago
parent
commit
701e26af10

+ 5 - 3
README.rst

@@ -89,9 +89,7 @@ Initialize a new backup repository and create a backup archive::
     $ borg init /path/to/repo
     $ borg create /path/to/repo::Saturday1 ~/Documents
 
-Now doing another backup, just to show off the great deduplication:
-
-.. code-block:: none
+Now doing another backup, just to show off the great deduplication::
 
     $ borg create -v --stats /path/to/repo::Saturday2 ~/Documents
     -----------------------------------------------------------------------------
@@ -141,6 +139,8 @@ THIS IS SOFTWARE IN DEVELOPMENT, DECIDE YOURSELF WHETHER IT FITS YOUR NEEDS.
 Security issues should be reported to the `Security contact`_ (or
 see ``docs/suppport.rst`` in the source distribution).
 
+.. start-badges
+
 |doc| |build| |coverage| |bestpractices|
 
 .. |doc| image:: https://readthedocs.org/projects/borgbackup/badge/?version=stable
@@ -162,3 +162,5 @@ see ``docs/suppport.rst`` in the source distribution).
 .. |bestpractices| image:: https://bestpractices.coreinfrastructure.org/projects/271/badge
         :alt: Best Practices Score
         :target: https://bestpractices.coreinfrastructure.org/projects/271
+
+.. end-badges

+ 50 - 66
Vagrantfile

@@ -65,9 +65,9 @@ def packages_darwin
     # install all the (security and other) updates
     sudo softwareupdate --install --all
     # get osxfuse 3.x release code from github:
-    curl -s -L https://github.com/osxfuse/osxfuse/releases/download/osxfuse-3.5.2/osxfuse-3.5.2.dmg >osxfuse.dmg
+    curl -s -L https://github.com/osxfuse/osxfuse/releases/download/osxfuse-3.5.3/osxfuse-3.5.3.dmg >osxfuse.dmg
     MOUNTDIR=$(echo `hdiutil mount osxfuse.dmg | tail -1 | awk '{$1="" ; print $0}'` | xargs -0 echo) \
-    && sudo installer -pkg "${MOUNTDIR}/Extras/FUSE for macOS 3.5.2.pkg" -target /
+    && sudo installer -pkg "${MOUNTDIR}/Extras/FUSE for macOS 3.5.3.pkg" -target /
     sudo chown -R vagrant /usr/local  # brew must be able to create stuff here
     ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)"
     brew update
@@ -172,14 +172,14 @@ def packages_cygwin(version)
     set CYGSETUP=#{setup_exe}
     REM --- Install build version of CygWin in a subfolder
     set OURPATH=%cd%
-	    set CYGBUILD="C:\\cygwin\\CygWin"
-	    set CYGMIRROR=ftp://mirrors.kernel.org/sourceware/cygwin/
-	    set BUILDPKGS=python3,python3-setuptools,binutils,gcc-g++,libopenssl,openssl-devel,git,make,openssh,liblz4-devel,liblz4_1,rsync,curl,python-devel
+    set CYGBUILD="C:\\cygwin\\CygWin"
+    set CYGMIRROR=ftp://mirrors.kernel.org/sourceware/cygwin/
+    set BUILDPKGS=python3,python3-setuptools,binutils,gcc-g++,libopenssl,openssl-devel,git,make,openssh,liblz4-devel,liblz4_1,rsync,curl,python-devel
     %CYGSETUP% -q -B -o -n -R %CYGBUILD% -L -D -s %CYGMIRROR% -P %BUILDPKGS%
     cd /d C:\\cygwin\\CygWin\\bin
     regtool set /HKLM/SYSTEM/CurrentControlSet/Services/OpenSSHd/ImagePath "C:\\cygwin\\CygWin\\bin\\cygrunsrv.exe"
     bash -c "ssh-host-config --no"
-	    ' > /cygdrive/c/cygwin/install.bat
+    ' > /cygdrive/c/cygwin/install.bat
     cd /cygdrive/c/cygwin && cmd.exe /c install.bat
 
     echo "alias mkdir='mkdir -p'" > ~/.profile
@@ -201,7 +201,6 @@ def install_cygwin_venv
   EOF
 end
 
-
 def install_pyenv(boxname)
   return <<-EOF
     curl -s -L https://raw.githubusercontent.com/yyuu/pyenv-installer/master/bin/pyenv-installer | bash
@@ -248,8 +247,8 @@ def build_pyenv_venv(boxname)
   EOF
 end
 
-def install_borg(boxname)
-  return <<-EOF
+def install_borg(fuse)
+  script = <<-EOF
     . ~/.bash_profile
     cd /vagrant/borg
     . borg-env/bin/activate
@@ -260,31 +259,24 @@ def install_borg(boxname)
     rm -f borg/{chunker,crypto,compress,hashindex,platform_linux}.c
     rm -rf borg/__pycache__ borg/support/__pycache__ borg/testsuite/__pycache__
     pip install -r requirements.d/development.txt
-    # by using [fuse], setup.py can handle different fuse requirements:
-    pip install -e .[fuse]
-  EOF
-end
-
-def install_borg_no_fuse(boxname)
-  return <<-EOF
-    . ~/.bash_profile
-    cd /vagrant/borg
-    . borg-env/bin/activate
-    pip install -U wheel  # upgrade wheel, too old for 3.5
-    cd borg
-    # clean up (wrong/outdated) stuff we likely got via rsync:
-    rm -f borg/*.so borg/*.cpy*
-    rm -f borg/{chunker,crypto,compress,hashindex,platform_linux}.c
-    rm -rf borg/__pycache__ borg/support/__pycache__ borg/testsuite/__pycache__
-    pip install -r requirements.d/development.txt
-    pip install -e .
-    # do not install llfuse into the virtualenvs built by tox:
-    sed -i.bak '/fuse.txt/d' tox.ini
   EOF
+  if fuse
+    script += <<-EOF
+      # by using [fuse], setup.py can handle different fuse requirements:
+      pip install -e .[fuse]
+    EOF
+  else
+    script += <<-EOF
+      pip install -e .
+      # do not install llfuse into the virtualenvs built by tox:
+      sed -i.bak '/fuse.txt/d' tox.ini
+    EOF
+  end
+  return script
 end
 
-def install_pyinstaller(boxname)
-  return <<-EOF
+def install_pyinstaller(bootloader)
+  script = <<-EOF
     . ~/.bash_profile
     cd /vagrant/borg
     . borg-env/bin/activate
@@ -292,25 +284,19 @@ def install_pyinstaller(boxname)
     cd pyinstaller
     # develop branch, with fixed / freshly rebuilt bootloaders
     git checkout fresh-bootloader
-    pip install -e .
   EOF
-end
-
-def install_pyinstaller_bootloader(boxname)
-  return <<-EOF
-    . ~/.bash_profile
-    cd /vagrant/borg
-    . borg-env/bin/activate
-    git clone https://github.com/thomaswaldmann/pyinstaller.git
-    cd pyinstaller
-    # develop branch, with fixed / freshly rebuilt bootloaders
-    git checkout fresh-bootloader
-    # build bootloader, if it is not included
-    cd bootloader
-    python ./waf all
-    cd ..
+  if bootloader
+    script += <<-EOF
+      # build bootloader, if it is not included
+      cd bootloader
+      python ./waf all
+      cd ..
+    EOF
+  end
+  script += <<-EOF
     pip install -e .
   EOF
+  return script
 end
 
 def build_binary_with_pyinstaller(boxname)
@@ -347,13 +333,11 @@ end
 def fix_perms
   return <<-EOF
     # . ~/.profile
-
     if id "vagrant" >/dev/null 2>&1; then
       chown -R vagrant /vagrant/borg
     else
       chown -R ubuntu /vagrant/borg
     fi
-
   EOF
 end
 
@@ -381,7 +365,7 @@ Vagrant.configure(2) do |config|
     b.vm.provision "install pyenv", :type => :shell, :privileged => false, :inline => install_pyenv("centos7_64")
     b.vm.provision "install pythons", :type => :shell, :privileged => false, :inline => install_pythons("centos7_64")
     b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_pyenv_venv("centos7_64")
-    b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg("centos7_64")
+    b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg(true)
     b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("centos7_64")
   end
 
@@ -391,7 +375,7 @@ Vagrant.configure(2) do |config|
     b.vm.provision "install pyenv", :type => :shell, :privileged => false, :inline => install_pyenv("centos6_32")
     b.vm.provision "install pythons", :type => :shell, :privileged => false, :inline => install_pythons("centos6_32")
     b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_pyenv_venv("centos6_32")
-    b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg_no_fuse("centos6_32")
+    b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg(false)
     b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("centos6_32")
   end
 
@@ -404,7 +388,7 @@ Vagrant.configure(2) do |config|
     b.vm.provision "install pyenv", :type => :shell, :privileged => false, :inline => install_pyenv("centos6_64")
     b.vm.provision "install pythons", :type => :shell, :privileged => false, :inline => install_pythons("centos6_64")
     b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_pyenv_venv("centos6_64")
-    b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg_no_fuse("centos6_64")
+    b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg(false)
     b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("centos6_64")
   end
 
@@ -415,7 +399,7 @@ Vagrant.configure(2) do |config|
     end
     b.vm.provision "packages debianoid", :type => :shell, :inline => packages_debianoid
     b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_sys_venv("xenial64")
-    b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg("xenial64")
+    b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg(true)
     b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("xenial64")
   end
 
@@ -426,7 +410,7 @@ Vagrant.configure(2) do |config|
     end
     b.vm.provision "packages debianoid", :type => :shell, :inline => packages_debianoid
     b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_sys_venv("trusty64")
-    b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg("trusty64")
+    b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg(true)
     b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("trusty64")
   end
 
@@ -437,7 +421,7 @@ Vagrant.configure(2) do |config|
     end
     b.vm.provision "packages debianoid", :type => :shell, :inline => packages_debianoid
     b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_sys_venv("jessie64")
-    b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg("jessie64")
+    b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg(true)
     b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("jessie64")
   end
 
@@ -448,8 +432,8 @@ Vagrant.configure(2) do |config|
     b.vm.provision "install pyenv", :type => :shell, :privileged => false, :inline => install_pyenv("wheezy32")
     b.vm.provision "install pythons", :type => :shell, :privileged => false, :inline => install_pythons("wheezy32")
     b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_pyenv_venv("wheezy32")
-    b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg("wheezy32")
-    b.vm.provision "install pyinstaller", :type => :shell, :privileged => false, :inline => install_pyinstaller("wheezy32")
+    b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg(true)
+    b.vm.provision "install pyinstaller", :type => :shell, :privileged => false, :inline => install_pyinstaller(false)
     b.vm.provision "build binary with pyinstaller", :type => :shell, :privileged => false, :inline => build_binary_with_pyinstaller("wheezy32")
     b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("wheezy32")
   end
@@ -461,8 +445,8 @@ Vagrant.configure(2) do |config|
     b.vm.provision "install pyenv", :type => :shell, :privileged => false, :inline => install_pyenv("wheezy64")
     b.vm.provision "install pythons", :type => :shell, :privileged => false, :inline => install_pythons("wheezy64")
     b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_pyenv_venv("wheezy64")
-    b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg("wheezy64")
-    b.vm.provision "install pyinstaller", :type => :shell, :privileged => false, :inline => install_pyinstaller("wheezy64")
+    b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg(true)
+    b.vm.provision "install pyinstaller", :type => :shell, :privileged => false, :inline => install_pyinstaller(false)
     b.vm.provision "build binary with pyinstaller", :type => :shell, :privileged => false, :inline => build_binary_with_pyinstaller("wheezy64")
     b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("wheezy64")
   end
@@ -475,8 +459,8 @@ Vagrant.configure(2) do |config|
     b.vm.provision "fix pyenv", :type => :shell, :privileged => false, :inline => fix_pyenv_darwin("darwin64")
     b.vm.provision "install pythons", :type => :shell, :privileged => false, :inline => install_pythons("darwin64")
     b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_pyenv_venv("darwin64")
-    b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg("darwin64")
-    b.vm.provision "install pyinstaller", :type => :shell, :privileged => false, :inline => install_pyinstaller("darwin64")
+    b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg(true)
+    b.vm.provision "install pyinstaller", :type => :shell, :privileged => false, :inline => install_pyinstaller(false)
     b.vm.provision "build binary with pyinstaller", :type => :shell, :privileged => false, :inline => build_binary_with_pyinstaller("darwin64")
     b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("darwin64")
   end
@@ -491,8 +475,8 @@ Vagrant.configure(2) do |config|
     b.vm.provision "install pyenv", :type => :shell, :privileged => false, :inline => install_pyenv("freebsd")
     b.vm.provision "install pythons", :type => :shell, :privileged => false, :inline => install_pythons("freebsd")
     b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_pyenv_venv("freebsd")
-    b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg("freebsd")
-    b.vm.provision "install pyinstaller", :type => :shell, :privileged => false, :inline => install_pyinstaller_bootloader("freebsd")
+    b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg(true)
+    b.vm.provision "install pyinstaller", :type => :shell, :privileged => false, :inline => install_pyinstaller(true)
     b.vm.provision "build binary with pyinstaller", :type => :shell, :privileged => false, :inline => build_binary_with_pyinstaller("freebsd")
     b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("freebsd")
   end
@@ -504,7 +488,7 @@ Vagrant.configure(2) do |config|
     end
     b.vm.provision "packages openbsd", :type => :shell, :inline => packages_openbsd
     b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_sys_venv("openbsd64")
-    b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg_no_fuse("openbsd64")
+    b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg(false)
     b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("openbsd64")
   end
 
@@ -515,7 +499,7 @@ Vagrant.configure(2) do |config|
     end
     b.vm.provision "packages netbsd", :type => :shell, :inline => packages_netbsd
     b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_sys_venv("netbsd64")
-    b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg_no_fuse("netbsd64")
+    b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg(false)
     b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("netbsd64")
   end
 
@@ -542,7 +526,7 @@ Vagrant.configure(2) do |config|
     b.vm.provision :reload
     b.vm.provision "cygwin install pip", :type => :shell, :privileged => false, :inline => install_cygwin_venv
     b.vm.provision "cygwin build env", :type => :shell, :privileged => false, :inline => build_sys_venv("windows10")    
-    b.vm.provision "cygwin install borg", :type => :shell, :privileged => false, :inline => install_borg_no_fuse("windows10")
+    b.vm.provision "cygwin install borg", :type => :shell, :privileged => false, :inline => install_borg(false)
     b.vm.provision "cygwin run tests", :type => :shell, :privileged => false, :inline => run_tests("windows10")
   end
 end

+ 84 - 4
docs/changes.rst

@@ -1,8 +1,25 @@
-Changelog
-=========
+Important notes
+===============
+
+This section is used for infos about e.g. security and corruption issues.
+
+Pre-1.0.9 potential data loss
+-----------------------------
+
+If you have archives in your repository that were made with attic <= 0.13
+(and later migrated to borg), running borg check would report errors in these
+archives. See issue #1837.
+
+The reason for this is a invalid (and useless) metadata key that was
+always added due to a bug in these old attic versions.
+
+If you run borg check --repair, things escalate quickly: all archive items
+with invalid metadata will be killed. Due to that attic bug, that means all
+items in all archives made with these old attic versions.
 
-Important note about pre-1.0.4 potential repo corruption
---------------------------------------------------------
+
+Pre-1.0.4 potential repo corruption
+-----------------------------------
 
 Some external errors (like network or disk I/O errors) could lead to
 corruption of the backup repository due to issue #1138.
@@ -49,6 +66,69 @@ The best check that everything is ok is to run a dry-run extraction::
 
     borg extract -v --dry-run REPO::ARCHIVE
 
+.. _changelog:
+
+Changelog
+=========
+
+Version 1.0.9rc1 (2016-11-27)
+-----------------------------
+
+Bug fixes:
+
+- files cache: fix determination of newest mtime in backup set (which is
+  used in cache cleanup and led to wrong "A" [added] status for unchanged
+  files in next backup), #1860.
+
+- borg check:
+
+  - fix incorrectly reporting attic 0.13 and earlier archives as corrupt
+  - handle repo w/o objects gracefully and also bail out early if repo is
+    *completely* empty, #1815.
+- fix tox/pybuild in 1.0-maint
+- at xattr module import time, loggers are not initialized yet
+
+New features:
+
+- borg umount <mountpoint>
+  exposed already existing umount code via the CLI api, so users can use it,
+  which is more consistent than using borg to mount and fusermount -u (or
+  umount) to un-mount, #1855.
+- implement borg create --noatime --noctime, fixes #1853
+
+Other changes:
+
+- docs:
+
+  - display README correctly on PyPI
+  - improve cache / index docs, esp. files cache docs, fixes #1825
+  - different pattern matching for --exclude, #1779
+  - datetime formatting examples for {now} placeholder, #1822
+  - clarify passphrase mode attic repo upgrade, #1854
+  - clarify --umask usage, #1859
+  - clarify how to choose PR target branch
+  - clarify prune behavior for different archive contents, #1824
+  - fix PDF issues, add logo, fix authors, headings, TOC
+  - move security verification to support section
+  - fix links in standalone README (:ref: tags)
+  - add link to security contact in README
+  - add FAQ about security
+  - move fork differences to FAQ
+  - add more details about resource usage
+- tests: skip remote tests on cygwin, #1268
+- travis:
+
+  - allow OS X failures until the brew cask osxfuse issue is fixed
+  - caskroom osxfuse-beta gone, it's osxfuse now (3.5.3)
+- vagrant:
+
+  - upgrade OSXfuse / FUSE for macOS to 3.5.3
+  - remove llfuse from tox.ini at a central place
+  - do not try to install llfuse on centos6
+  - fix fuse test for darwin, #1546
+  - add windows virtual machine with cygwin
+  - Vagrantfile cleanup / code deduplication
+
 
 Version 1.1.0b2 (2016-10-01)
 ----------------------------

+ 1 - 1
docs/faq.rst

@@ -540,7 +540,7 @@ Here's a (incomplete) list of some major changes:
 * better logging, screen output, progress indication
 * tested on misc. Linux systems, 32 and 64bit, FreeBSD, OpenBSD, NetBSD, Mac OS X
 
-Please read the `ChangeLog`_ (or ``docs/changes.rst`` in the source distribution) for more
+Please read the :ref:`changelog` (or ``docs/changes.rst`` in the source distribution) for more
 information.
 
 Borg is not compatible with original attic (but there is a one-way conversion).

+ 96 - 27
docs/internals.rst

@@ -252,44 +252,94 @@ For some more general usage hints see also ``--chunker-params``.
 Indexes / Caches
 ----------------
 
-The **files cache** is stored in ``cache/files`` and is indexed on the
-``file path hash``. At backup time, it is used to quickly determine whether we
-need to chunk a given file (or whether it is unchanged and we already have all
-its pieces).
-It contains:
+The **files cache** is stored in ``cache/files`` and is used at backup time to
+quickly determine whether a given file is unchanged and we have all its chunks.
+
+The files cache is a key -> value mapping and contains:
+
+* key:
+
+  - full, absolute file path id_hash
+* value:
+
+  - file inode number
+  - file size
+  - file mtime_ns
+  - list of file content chunk id hashes
+  - age (0 [newest], 1, 2, 3, ..., BORG_FILES_CACHE_TTL - 1)
+
+To determine whether a file has not changed, cached values are looked up via
+the key in the mapping and compared to the current file attribute values.
+
+If the file's size, mtime_ns and inode number is still the same, it is
+considered to not have changed. In that case, we check that all file content
+chunks are (still) present in the repository (we check that via the chunks
+cache).
+
+If everything is matching and all chunks are present, the file is not read /
+chunked / hashed again (but still a file metadata item is written to the
+archive, made from fresh file metadata read from the filesystem). This is
+what makes borg so fast when processing unchanged files.
 
-* age
-* file inode number
-* file size
-* file mtime_ns
-* file content chunk hashes
+If there is a mismatch or a chunk is missing, the file is read / chunked /
+hashed. Chunks already present in repo won't be transferred to repo again.
 
-The inode number is stored to make sure we distinguish between
+The inode number is stored and compared to make sure we distinguish between
 different files, as a single path may not be unique across different
 archives in different setups.
 
-The files cache is stored as a python associative array storing
-python objects, which generates a lot of overhead.
+Not all filesystems have stable inode numbers. If that is the case, borg can
+be told to ignore the inode number in the check via --ignore-inode.
 
-The **chunks cache** is stored in ``cache/chunks`` and is indexed on the
-``chunk id_hash``. It is used to determine whether we already have a specific
-chunk, to count references to it and also for statistics.
-It contains:
+The age value is used for cache management. If a file is "seen" in a backup
+run, its age is reset to 0, otherwise its age is incremented by one.
+If a file was not seen in BORG_FILES_CACHE_TTL backups, its cache entry is
+removed. See also: :ref:`always_chunking` and :ref:`a_status_oddity`
 
-* reference count
-* size
-* encrypted/compressed size
+The files cache is a python dictionary, storing python objects, which
+generates a lot of overhead.
 
-The **repository index** is stored in ``repo/index.%d`` and is indexed on the
-``chunk id_hash``. It is used to determine a chunk's location in the repository.
-It contains:
+Borg can also work without using the files cache (saves memory if you have a
+lot of files or not much RAM free), then all files are assumed to have changed.
+This is usually much slower than with files cache.
+
+The **chunks cache** is stored in ``cache/chunks`` and is used to determine
+whether we already have a specific chunk, to count references to it and also
+for statistics.
+
+The chunks cache is a key -> value mapping and contains:
+
+* key:
+
+  - chunk id_hash
+* value:
 
-* segment (that contains the chunk)
-* offset (where the chunk is located in the segment)
+  - reference count
+  - size
+  - encrypted/compressed size
+
+The chunks cache is a hashindex, a hash table implemented in C and tuned for
+memory efficiency.
+
+The **repository index** is stored in ``repo/index.%d`` and is used to
+determine a chunk's location in the repository.
+
+The repo index is a key -> value mapping and contains:
+
+* key:
+
+  - chunk id_hash
+* value:
+
+  - segment (that contains the chunk)
+  - offset (where the chunk is located in the segment)
+
+The repo index is a hashindex, a hash table implemented in C and tuned for
+memory efficiency.
 
-The repository index file is random access.
 
 Hints are stored in a file (``repo/hints.%d``).
+
 It contains:
 
 * version
@@ -314,7 +364,7 @@ varies between 33% and 300%.
 Indexes / Caches memory usage
 -----------------------------
 
-Here is the estimated memory usage of |project_name|:
+Here is the estimated memory usage of |project_name| - it's complicated:
 
   chunk_count ~= total_file_size / 2 ^ HASH_MASK_BITS
 
@@ -327,6 +377,14 @@ Here is the estimated memory usage of |project_name|:
   mem_usage ~= repo_index_usage + chunks_cache_usage + files_cache_usage
              = chunk_count * 164 + total_file_count * 240
 
+Due to the hashtables, the best/usual/worst cases for memory allocation can
+be estimated like that:
+
+  mem_allocation = mem_usage / load_factor  # l_f = 0.25 .. 0.75
+
+  mem_allocation_peak = mem_allocation * (1 + growth_factor)  # g_f = 1.1 .. 2
+
+
 All units are Bytes.
 
 It is assuming every chunk is referenced exactly once (if you have a lot of
@@ -338,6 +396,17 @@ more chunks than estimated above, because 1 file is at least 1 chunk).
 
 If a remote repository is used the repo index will be allocated on the remote side.
 
+The chunks cache, files cache and the repo index are all implemented as hash
+tables. A hash table must have a significant amount of unused entries to be
+fast - the so-called load factor gives the used/unused elements ratio.
+
+When a hash table gets full (load factor getting too high), it needs to be
+grown (allocate new, bigger hash table, copy all elements over to it, free old
+hash table) - this will lead to short-time peaks in memory usage each time this
+happens. Usually does not happen for all hashtables at the same time, though.
+For small hash tables, we start with a growth factor of 2, which comes down to
+~1.1x for big hash tables.
+
 E.g. backing up a total count of 1 Mi (IEC binary prefix i.e. 2^20) files with a total size of 1TiB.
 
 a) with ``create --chunker-params 10,23,16,4095`` (custom, like borg < 1.0 or attic):

+ 1 - 1
docs/quickstart.rst

@@ -59,7 +59,7 @@ A step by step example
     -rw-r--r-- user   group       7961 Mon, 2016-02-15 18:22:30 home/user/Documents/Important.doc
     ...
 
-6. Restore the *Monday* archive::
+6. Restore the *Monday* archive by extracting the files relative to the current directory::
 
     $ borg extract /path/to/repo::Monday
 

+ 102 - 20
docs/usage.rst

@@ -42,7 +42,7 @@ Note: you may also prepend a ``file://`` to a filesystem path to get URL style.
 ``user@host:~other/path/to/repo`` - path relative to other's home directory
 
 Note: giving ``user@host:/./path/to/repo`` or ``user@host:/~/path/to/repo`` or
-``user@host:/~other/path/to/repo``is also supported, but not required here.
+``user@host:/~other/path/to/repo`` is also supported, but not required here.
 
 
 **Remote repositories with relative pathes, alternative syntax with port**:
@@ -220,36 +220,80 @@ Resource Usage
 
 |project_name| might use a lot of resources depending on the size of the data set it is dealing with.
 
-CPU:
+If one uses |project_name| in a client/server way (with a ssh: repository),
+the resource usage occurs in part on the client and in another part on the
+server.
+
+If one uses |project_name| as a single process (with a filesystem repo),
+all the resource usage occurs in that one process, so just add up client +
+server to get the approximate resource usage.
+
+CPU client:
+    borg create: does chunking, hashing, compression, crypto (high CPU usage)
+    chunks cache sync: quite heavy on CPU, doing lots of hashtable operations.
+    borg extract: crypto, decompression (medium to high CPU usage)
+    borg check: similar to extract, but depends on options given.
+    borg prune / borg delete archive: low to medium CPU usage
+    borg delete repo: done on the server
     It won't go beyond 100% of 1 core as the code is currently single-threaded.
     Especially higher zlib and lzma compression levels use significant amounts
-    of CPU cycles.
+    of CPU cycles. Crypto might be cheap on the CPU (if hardware accelerated) or
+    expensive (if not).
 
-Memory (RAM):
+CPU server:
+    It usually doesn't need much CPU, it just deals with the key/value store
+    (repository) and uses the repository index for that.
+
+    borg check: the repository check computes the checksums of all chunks
+    (medium CPU usage)
+    borg delete repo: low CPU usage
+
+CPU (only for client/server operation):
+    When using borg in a client/server way with a ssh:-type repo, the ssh
+    processes used for the transport layer will need some CPU on the client and
+    on the server due to the crypto they are doing - esp. if you are pumping
+    big amounts of data.
+
+Memory (RAM) client:
     The chunks index and the files index are read into memory for performance
-    reasons.
+    reasons. Might need big amounts of memory (see below).
     Compression, esp. lzma compression with high levels might need substantial
     amounts of memory.
 
-Temporary files:
-    Reading data and metadata from a FUSE mounted repository will consume about
-    the same space as the deduplicated chunks used to represent them in the
-    repository.
+Memory (RAM) server:
+    The server process will load the repository index into memory. Might need
+    considerable amounts of memory, but less than on the client (see below).
 
-Cache files:
-    Contains the chunks index and files index (plus a compressed collection of
-    single-archive chunk indexes).
-
-Chunks index:
+Chunks index (client only):
     Proportional to the amount of data chunks in your repo. Lots of chunks
     in your repo imply a big chunks index.
     It is possible to tweak the chunker params (see create options).
 
-Files index:
-    Proportional to the amount of files in your last backup. Can be switched
-    off (see create options), but next backup will be much slower if you do.
+Files index (client only):
+    Proportional to the amount of files in your last backups. Can be switched
+    off (see create options), but next backup might be much slower if you do.
+    The speed benefit of using the files cache is proportional to file size.
+
+Repository index (server only):
+    Proportional to the amount of data chunks in your repo. Lots of chunks
+    in your repo imply a big repository index.
+    It is possible to tweak the chunker params (see create options) to
+    influence the amount of chunks being created.
 
-Network:
+Temporary files (client):
+    Reading data and metadata from a FUSE mounted repository will consume up to
+    the size of all deduplicated, small chunks in the repository. Big chunks
+    won't be locally cached.
+
+Temporary files (server):
+    None.
+
+Cache files (client only):
+    Contains the chunks index and files index (plus a collection of single-
+    archive chunk indexes which might need huge amounts of disk space,
+    depending on archive count and size - see FAQ about how to reduce).
+
+Network (only for client/server operation):
     If your repository is remote, all deduplicated (and optionally compressed/
     encrypted) data of course has to go over the connection (ssh: repo url).
     If you use a locally mounted network filesystem, additionally some copy
@@ -257,7 +301,8 @@ Network:
     you backup multiple sources to one target repository, additional traffic
     happens for cache resynchronization.
 
-In case you are interested in more details, please read the internals documentation.
+In case you are interested in more details (like formulas), please see
+:ref:`internals`.
 
 File systems
 ~~~~~~~~~~~~
@@ -386,7 +431,19 @@ Examples
 
     # Use short hostname, user name and current time in archive name
     $ borg create /path/to/repo::{hostname}-{user}-{now} ~
-    $ borg create /path/to/repo::{hostname}-{user}-{now:%Y-%m-%d_%H:%M:%S} ~
+    # Similar, use the same datetime format as borg 1.1 will have as default
+    $ borg create /path/to/repo::{hostname}-{user}-{now:%Y-%m-%dT%H:%M:%S} ~
+    # As above, but add nanoseconds
+    $ borg create /path/to/repo::{hostname}-{user}-{now:%Y-%m-%dT%H:%M:%S.%f} ~
+
+Notes
+~~~~~
+
+- the --exclude patterns are not like tar. In tar --exclude .bundler/gems will
+  exclude foo/.bundler/gems. In borg it will not, you need to use --exclude
+  '\*/.bundler/gems' to get the same effect. See ``borg help patterns`` for
+  more information.
+
 
 .. include:: usage/extract.rst.inc
 
@@ -705,6 +762,20 @@ Examples
     no key file found for repository
 
 
+Upgrading a passphrase encrypted attic repo
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+attic offered a "passphrase" encryption mode, but this was removed in borg 1.0
+and replaced by the "repokey" mode (which stores the passphrase-protected
+encryption key into the repository config).
+
+Thus, to upgrade a "passphrase" attic repo to a "repokey" borg repo, 2 steps
+are needed, in this order:
+
+- borg upgrade repo
+- borg migrate-to-repokey repo
+
+
 .. include:: usage/recreate.rst.inc
 
 Examples
@@ -861,6 +932,17 @@ a new repository when changing chunker params.
 
 For more details, see :ref:`chunker_details`.
 
+
+--umask
+~~~~~~~
+
+If you use ``--umask``, make sure that all repository-modifying borg commands
+(create, delete, prune) that access the repository in question use the same
+``--umask`` value.
+
+If multiple machines access the same repository, this should hold true for all
+of them.
+
 --read-special
 ~~~~~~~~~~~~~~
 

+ 10 - 4
docs/usage/check.rst.inc

@@ -23,16 +23,22 @@ optional arguments
         | attempt to repair any inconsistencies found
     ``--save-space``
         | work slower, but using less space
-    ``--last N``
-        | only check last N archives (Default: all)
-    ``-P``, ``--prefix``
-        | only consider archive names starting with this prefix
     ``-p``, ``--progress``
         | show progress display while checking
 
 `Common options`_
     |
 
+filters
+    ``-P``, ``--prefix``
+        | only consider archive names starting with this prefix
+    ``--sort-by``
+        | Comma-separated list of sorting keys; valid keys are: timestamp, name, id; default is: timestamp
+    ``--first N``
+        | consider first N archives after other filters were applied
+    ``--last N``
+        | consider last N archives after other filters were applied
+
 Description
 ~~~~~~~~~~~
 

+ 6 - 2
docs/usage/create.rst.inc

@@ -46,6 +46,10 @@ Filesystem options
         | stay in same file system, do not cross mount points
     ``--numeric-owner``
         | only store numeric user and group identifiers
+    ``--noatime``
+        | do not store atime into archive
+    ``--noctime``
+        | do not store ctime into archive
     ``--ignore-inode``
         | ignore inode data in the file metadata cache used to detect unchanged files.
     ``--read-special``
@@ -76,8 +80,8 @@ The archive name needs to be unique. It must not end in '.checkpoint' or
 '.checkpoint.N' (with N being a number), because these names are used for
 checkpoints and treated in special ways.
 
-In the archive name, you may use the following format tags:
-{now}, {utcnow}, {fqdn}, {hostname}, {user}, {pid}, {uuid4}, {borgversion}
+In the archive name, you may use the following placeholders:
+{now}, {utcnow}, {fqdn}, {hostname}, {user} and some others.
 
 To speed up pulling backups over sshfs and similar network file systems which do
 not provide correct inode information the --ignore-inode flag can be used. This

+ 9 - 8
docs/usage/diff.rst.inc

@@ -6,15 +6,15 @@ borg diff
 ---------
 ::
 
-    borg diff <options> ARCHIVE1 ARCHIVE2 PATH
+    borg diff <options> REPO_ARCHIVE1 ARCHIVE2 PATH
 
 positional arguments
-    ARCHIVE1
-        archive
+    REPO_ARCHIVE1
+        repository location and ARCHIVE1 name
     ARCHIVE2
-        archive to compare with ARCHIVE1 (no repository location)
+        ARCHIVE2 name (no repository location allowed)
     PATH
-        paths to compare; patterns are supported
+        paths of items inside the archives to compare; patterns are supported
 
 optional arguments
     ``-e PATTERN``, ``--exclude PATTERN``
@@ -34,10 +34,11 @@ optional arguments
 Description
 ~~~~~~~~~~~
 
-This command finds differences in files (contents, user, group, mode) between archives.
+This command finds differences (file contents, user/group/mode) between archives.
 
-Both archives need to be in the same repository, and a repository location may only
-be specified for ARCHIVE1.
+A repository location and an archive name must be specified for REPO_ARCHIVE1.
+ARCHIVE2 is just another archive name in same repository (no repository location
+allowed).
 
 For archives created with Borg 1.1 or newer diff automatically detects whether
 the archives are created with the same chunker params. If so, only chunk IDs

+ 7 - 7
docs/usage/help.rst.inc

@@ -130,19 +130,19 @@ placeholders:
 
 {borgversion}
 
-     The version of borg, e.g.: 1.0.8rc1
+    The version of borg, e.g.: 1.0.8rc1
 
- {borgmajor}
+{borgmajor}
 
-     The version of borg, only the major version, e.g.: 1
+    The version of borg, only the major version, e.g.: 1
 
- {borgminor}
+{borgminor}
 
-     The version of borg, only major and minor version, e.g.: 1.0
+    The version of borg, only major and minor version, e.g.: 1.0
 
- {borgpatch}
+{borgpatch}
 
-     The version of borg, only major, minor and patch version, e.g.: 1.0.8
+    The version of borg, only major, minor and patch version, e.g.: 1.0.8
 
 Examples::
 

+ 15 - 2
docs/usage/init.rst.inc

@@ -66,5 +66,18 @@ a different keyboard layout.
 You can change your passphrase for existing repos at any time, it won't affect
 the encryption/decryption key or other secrets.
 
-When encrypting, AES-CTR-256 is used for encryption, and HMAC-SHA256 for
-authentication. Hardware acceleration will be used automatically.
+Encryption modes
+++++++++++++++++
+
+repokey and keyfile use AES-CTR-256 for encryption and HMAC-SHA256 for
+authentication in an encrypt-then-MAC (EtM) construction. The chunk ID hash
+is HMAC-SHA256 as well (with a separate key).
+
+repokey-blake2 and keyfile-blake2 use the same authenticated encryption, but
+use a keyed BLAKE2b-256 hash for the chunk ID hash.
+
+"authenticated" mode uses no encryption, but authenticates repository contents
+through the same keyed BLAKE2b-256 hash as the other blake2 modes.
+The key is stored like repokey.
+
+Hardware acceleration will be used automatically.

+ 10 - 0
docs/usage/mount.rst.inc

@@ -23,6 +23,16 @@ optional arguments
 `Common options`_
     |
 
+filters
+    ``-P``, ``--prefix``
+        | only consider archive names starting with this prefix
+    ``--sort-by``
+        | Comma-separated list of sorting keys; valid keys are: timestamp, name, id; default is: timestamp
+    ``--first N``
+        | consider first N archives after other filters were applied
+    ``--last N``
+        | consider last N archives after other filters were applied
+
 Description
 ~~~~~~~~~~~
 

+ 2 - 0
docs/usage/prune.rst.inc

@@ -61,6 +61,8 @@ If a prefix is set with -P, then only archives that start with the prefix are
 considered for deletion and only those archives count towards the totals
 specified by the rules.
 Otherwise, *all* archives in the repository are candidates for deletion!
+There is no automatic distinction between archives representing different
+contents. These need to be distinguished by specifying matching prefixes.
 
 If you have multiple sequences of archives with different data sets (e.g.
 from different machines) in one shared repository, use one prune call per

+ 5 - 11
docs/usage/recreate.rst.inc

@@ -44,24 +44,18 @@ Exclusion options
 Archive options
     ``--target TARGET``
         | create a new archive with the name ARCHIVE, do not replace existing archive (only applies for a single archive)
+    ``-c SECONDS``, ``--checkpoint-interval SECONDS``
+        | write checkpoint every SECONDS seconds (Default: 1800)
     ``--comment COMMENT``
         | add a comment text to the archive
     ``--timestamp yyyy-mm-ddThh:mm:ss``
         | manually specify the archive creation date/time (UTC). alternatively, give a reference file/directory.
     ``-C COMPRESSION``, ``--compression COMPRESSION``
-        | select compression algorithm (and level):
-        | none == no compression (default),
-        | auto,C[,L] == built-in heuristic decides between none or C[,L] - with C[,L]
-        |               being any valid compression algorithm (and optional level),
-        | lz4 == lz4,
-        | zlib == zlib (default level 6),
-        | zlib,0 .. zlib,9 == zlib (with level 0..9),
-        | lzma == lzma (default level 6),
-        | lzma,0 .. lzma,9 == lzma (with level 0..9).
+        | select compression algorithm, see the output of the "borg help compression" command for details.
     ``--always-recompress``
-        | always recompress chunks, don't skip chunks already compressed with the samealgorithm.
+        | always recompress chunks, don't skip chunks already compressed with the same algorithm.
     ``--compression-from COMPRESSIONCONFIG``
-        | read compression patterns from COMPRESSIONCONFIG, one per line
+        | read compression patterns from COMPRESSIONCONFIG, see the output of the "borg help compression" command for details.
     ``--chunker-params CHUNK_MIN_EXP,CHUNK_MAX_EXP,HASH_MASK_BITS,HASH_WINDOW_SIZE``
         | specify the chunker parameters (or "default").
 

+ 9 - 25
docs/usage/umount.rst.inc

@@ -6,31 +6,15 @@ borg umount
 -----------
 ::
 
-    usage: borg umount [-h] [--critical] [--error] [--warning] [--info] [--debug]
-                       [--lock-wait N] [--show-rc] [--no-files-cache] [--umask M]
-                       [--remote-path PATH]
-                       MOUNTPOINT
-    
-    un-mount the FUSE filesystem
-    
-    positional arguments:
-      MOUNTPOINT            mountpoint of the filesystem to umount
-    
-    optional arguments:
-      -h, --help            show this help message and exit
-      --critical            work on log level CRITICAL
-      --error               work on log level ERROR
-      --warning             work on log level WARNING (default)
-      --info, -v, --verbose
-                            work on log level INFO
-      --debug               work on log level DEBUG
-      --lock-wait N         wait for the lock, but max. N seconds (default: 1).
-      --show-rc             show/log the return code (rc)
-      --no-files-cache      do not load/update the file metadata cache used to
-                            detect unchanged files
-      --umask M             set umask to M (local and remote, default: 0077)
-      --remote-path PATH    set remote path to executable (default: "borg")
-    
+    borg umount <options> MOUNTPOINT
+
+positional arguments
+    MOUNTPOINT
+        mountpoint of the filesystem to umount
+
+`Common options`_
+    |
+
 Description
 ~~~~~~~~~~~
 

+ 6 - 0
setup.py

@@ -179,6 +179,12 @@ if libb2_prefix:
 
 with open('README.rst', 'r') as fd:
     long_description = fd.read()
+    # remove badges
+    long_description = re.compile(r'^\.\. start-badges.*^\.\. end-badges', re.M | re.S).sub('', long_description)
+    # remove |substitutions|
+    long_description = re.compile(r'\|screencast\|').sub('', long_description)
+    # remove unknown directives
+    long_description = re.compile(r'^\.\. highlight:: \w+$', re.M).sub('', long_description)
 
 
 class build_usage(Command):

+ 10 - 3
src/borg/archive.py

@@ -241,7 +241,7 @@ class Archive:
         """Failed to encode filename "{}" into file system encoding "{}". Consider configuring the LANG environment variable."""
 
     def __init__(self, repository, key, manifest, name, cache=None, create=False,
-                 checkpoint_interval=300, numeric_owner=False, progress=False,
+                 checkpoint_interval=300, numeric_owner=False, noatime=False, noctime=False, progress=False,
                  chunker_params=CHUNKER_PARAMS, start=None, end=None, compression=None, compression_files=None,
                  consider_part_files=False):
         self.cwd = os.getcwd()
@@ -255,6 +255,8 @@ class Archive:
         self.name = name
         self.checkpoint_interval = checkpoint_interval
         self.numeric_owner = numeric_owner
+        self.noatime = noatime
+        self.noctime = noctime
         if start is None:
             start = datetime.utcnow()
         self.chunker_params = chunker_params
@@ -685,10 +687,15 @@ Number of files: {0.stats.nfiles}'''.format(
             mode=st.st_mode,
             uid=st.st_uid,
             gid=st.st_gid,
-            atime=st.st_atime_ns,
-            ctime=st.st_ctime_ns,
             mtime=st.st_mtime_ns,
         )
+        # borg can work with archives only having mtime (older attic archives do not have
+        # atime/ctime). it can be useful to omit atime/ctime, if they change without the
+        # file content changing - e.g. to get better metadata deduplication.
+        if not self.noatime:
+            attrs['atime'] = st.st_atime_ns
+        if not self.noctime:
+            attrs['ctime'] = st.st_ctime_ns
         if self.numeric_owner:
             attrs['user'] = attrs['group'] = None
         else:

+ 8 - 1
src/borg/archiver.py

@@ -344,7 +344,8 @@ class Archiver:
             with Cache(repository, key, manifest, do_files=args.cache_files, lock_wait=self.lock_wait) as cache:
                 archive = Archive(repository, key, manifest, args.location.archive, cache=cache,
                                   create=True, checkpoint_interval=args.checkpoint_interval,
-                                  numeric_owner=args.numeric_owner, progress=args.progress,
+                                  numeric_owner=args.numeric_owner, noatime=args.noatime, noctime=args.noctime,
+                                  progress=args.progress,
                                   chunker_params=args.chunker_params, start=t0,
                                   compression=args.compression, compression_files=args.compression_files)
                 create_inner(archive, cache)
@@ -1880,6 +1881,12 @@ class Archiver:
         fs_group.add_argument('--numeric-owner', dest='numeric_owner',
                               action='store_true', default=False,
                               help='only store numeric user and group identifiers')
+        fs_group.add_argument('--noatime', dest='noatime',
+                              action='store_true', default=False,
+                              help='do not store atime into archive')
+        fs_group.add_argument('--noctime', dest='noctime',
+                              action='store_true', default=False,
+                              help='do not store ctime into archive')
         fs_group.add_argument('--ignore-inode', dest='ignore_inode',
                               action='store_true', default=False,
                               help='ignore inode data in the file metadata cache used to detect unchanged files.')

+ 6 - 3
src/borg/cache.py

@@ -217,7 +217,7 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
 
     def _read_files(self):
         self.files = {}
-        self._newest_mtime = 0
+        self._newest_mtime = None
         logger.debug('Reading files cache ...')
         with open(os.path.join(self.path, 'files'), 'rb') as fd:
             u = msgpack.Unpacker(use_list=True)
@@ -254,8 +254,11 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
             return
         pi = ProgressIndicatorMessage()
         if self.files is not None:
-            pi.output('Saving files cache')
+            if self._newest_mtime is None:
+                # was never set because no files were modified/added
+                self._newest_mtime = 2 ** 63 - 1  # nanoseconds, good until y2262
             ttl = int(os.environ.get('BORG_FILES_CACHE_TTL', 20))
+            pi.output('Saving files cache')
             with SaveFile(os.path.join(self.path, 'files'), binary=True) as fd:
                 for path_hash, item in self.files.items():
                     # Only keep files seen in this backup that are older than newest mtime seen in this backup -
@@ -484,4 +487,4 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
             return
         entry = FileCacheEntry(age=0, inode=st.st_ino, size=st.st_size, mtime=int_to_bigint(st.st_mtime_ns), chunk_ids=ids)
         self.files[path_hash] = msgpack.packb(entry)
-        self._newest_mtime = max(self._newest_mtime, st.st_mtime_ns)
+        self._newest_mtime = max(self._newest_mtime or 0, st.st_mtime_ns)