Преглед изворни кода

Merge branch '1.0-maint' (into master)

Thomas Waldmann пре 9 година
родитељ
комит
c83a124e65
11 измењених фајлова са 369 додато и 129 уклоњено
  1. 5 3
      README.rst
  2. 50 66
      Vagrantfile
  3. 84 4
      docs/changes.rst
  4. 1 1
      docs/faq.rst
  5. 96 27
      docs/internals.rst
  6. 1 1
      docs/quickstart.rst
  7. 102 20
      docs/usage.rst
  8. 6 0
      setup.py
  9. 10 3
      src/borg/archive.py
  10. 8 1
      src/borg/archiver.py
  11. 6 3
      src/borg/cache.py

+ 5 - 3
README.rst

@@ -89,9 +89,7 @@ Initialize a new backup repository and create a backup archive::
     $ borg init /path/to/repo
     $ borg create /path/to/repo::Saturday1 ~/Documents
 
-Now doing another backup, just to show off the great deduplication:
-
-.. code-block:: none
+Now doing another backup, just to show off the great deduplication::
 
     $ borg create -v --stats /path/to/repo::Saturday2 ~/Documents
     -----------------------------------------------------------------------------
@@ -141,6 +139,8 @@ THIS IS SOFTWARE IN DEVELOPMENT, DECIDE YOURSELF WHETHER IT FITS YOUR NEEDS.
 Security issues should be reported to the `Security contact`_ (or
 see ``docs/suppport.rst`` in the source distribution).
 
+.. start-badges
+
 |doc| |build| |coverage| |bestpractices|
 
 .. |doc| image:: https://readthedocs.org/projects/borgbackup/badge/?version=stable
@@ -162,3 +162,5 @@ see ``docs/suppport.rst`` in the source distribution).
 .. |bestpractices| image:: https://bestpractices.coreinfrastructure.org/projects/271/badge
         :alt: Best Practices Score
         :target: https://bestpractices.coreinfrastructure.org/projects/271
+
+.. end-badges

+ 50 - 66
Vagrantfile

@@ -65,9 +65,9 @@ def packages_darwin
     # install all the (security and other) updates
     sudo softwareupdate --install --all
     # get osxfuse 3.x release code from github:
-    curl -s -L https://github.com/osxfuse/osxfuse/releases/download/osxfuse-3.5.2/osxfuse-3.5.2.dmg >osxfuse.dmg
+    curl -s -L https://github.com/osxfuse/osxfuse/releases/download/osxfuse-3.5.3/osxfuse-3.5.3.dmg >osxfuse.dmg
     MOUNTDIR=$(echo `hdiutil mount osxfuse.dmg | tail -1 | awk '{$1="" ; print $0}'` | xargs -0 echo) \
-    && sudo installer -pkg "${MOUNTDIR}/Extras/FUSE for macOS 3.5.2.pkg" -target /
+    && sudo installer -pkg "${MOUNTDIR}/Extras/FUSE for macOS 3.5.3.pkg" -target /
     sudo chown -R vagrant /usr/local  # brew must be able to create stuff here
     ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)"
     brew update
@@ -172,14 +172,14 @@ def packages_cygwin(version)
     set CYGSETUP=#{setup_exe}
     REM --- Install build version of CygWin in a subfolder
     set OURPATH=%cd%
-	    set CYGBUILD="C:\\cygwin\\CygWin"
-	    set CYGMIRROR=ftp://mirrors.kernel.org/sourceware/cygwin/
-	    set BUILDPKGS=python3,python3-setuptools,binutils,gcc-g++,libopenssl,openssl-devel,git,make,openssh,liblz4-devel,liblz4_1,rsync,curl,python-devel
+    set CYGBUILD="C:\\cygwin\\CygWin"
+    set CYGMIRROR=ftp://mirrors.kernel.org/sourceware/cygwin/
+    set BUILDPKGS=python3,python3-setuptools,binutils,gcc-g++,libopenssl,openssl-devel,git,make,openssh,liblz4-devel,liblz4_1,rsync,curl,python-devel
     %CYGSETUP% -q -B -o -n -R %CYGBUILD% -L -D -s %CYGMIRROR% -P %BUILDPKGS%
     cd /d C:\\cygwin\\CygWin\\bin
     regtool set /HKLM/SYSTEM/CurrentControlSet/Services/OpenSSHd/ImagePath "C:\\cygwin\\CygWin\\bin\\cygrunsrv.exe"
     bash -c "ssh-host-config --no"
-	    ' > /cygdrive/c/cygwin/install.bat
+    ' > /cygdrive/c/cygwin/install.bat
     cd /cygdrive/c/cygwin && cmd.exe /c install.bat
 
     echo "alias mkdir='mkdir -p'" > ~/.profile
@@ -201,7 +201,6 @@ def install_cygwin_venv
   EOF
 end
 
-
 def install_pyenv(boxname)
   return <<-EOF
     curl -s -L https://raw.githubusercontent.com/yyuu/pyenv-installer/master/bin/pyenv-installer | bash
@@ -248,8 +247,8 @@ def build_pyenv_venv(boxname)
   EOF
 end
 
-def install_borg(boxname)
-  return <<-EOF
+def install_borg(fuse)
+  script = <<-EOF
     . ~/.bash_profile
     cd /vagrant/borg
     . borg-env/bin/activate
@@ -260,31 +259,24 @@ def install_borg(boxname)
     rm -f borg/{chunker,crypto,compress,hashindex,platform_linux}.c
     rm -rf borg/__pycache__ borg/support/__pycache__ borg/testsuite/__pycache__
     pip install -r requirements.d/development.txt
-    # by using [fuse], setup.py can handle different fuse requirements:
-    pip install -e .[fuse]
-  EOF
-end
-
-def install_borg_no_fuse(boxname)
-  return <<-EOF
-    . ~/.bash_profile
-    cd /vagrant/borg
-    . borg-env/bin/activate
-    pip install -U wheel  # upgrade wheel, too old for 3.5
-    cd borg
-    # clean up (wrong/outdated) stuff we likely got via rsync:
-    rm -f borg/*.so borg/*.cpy*
-    rm -f borg/{chunker,crypto,compress,hashindex,platform_linux}.c
-    rm -rf borg/__pycache__ borg/support/__pycache__ borg/testsuite/__pycache__
-    pip install -r requirements.d/development.txt
-    pip install -e .
-    # do not install llfuse into the virtualenvs built by tox:
-    sed -i.bak '/fuse.txt/d' tox.ini
   EOF
+  if fuse
+    script += <<-EOF
+      # by using [fuse], setup.py can handle different fuse requirements:
+      pip install -e .[fuse]
+    EOF
+  else
+    script += <<-EOF
+      pip install -e .
+      # do not install llfuse into the virtualenvs built by tox:
+      sed -i.bak '/fuse.txt/d' tox.ini
+    EOF
+  end
+  return script
 end
 
-def install_pyinstaller(boxname)
-  return <<-EOF
+def install_pyinstaller(bootloader)
+  script = <<-EOF
     . ~/.bash_profile
     cd /vagrant/borg
     . borg-env/bin/activate
@@ -292,25 +284,19 @@ def install_pyinstaller(boxname)
     cd pyinstaller
     # develop branch, with fixed / freshly rebuilt bootloaders
     git checkout fresh-bootloader
-    pip install -e .
   EOF
-end
-
-def install_pyinstaller_bootloader(boxname)
-  return <<-EOF
-    . ~/.bash_profile
-    cd /vagrant/borg
-    . borg-env/bin/activate
-    git clone https://github.com/thomaswaldmann/pyinstaller.git
-    cd pyinstaller
-    # develop branch, with fixed / freshly rebuilt bootloaders
-    git checkout fresh-bootloader
-    # build bootloader, if it is not included
-    cd bootloader
-    python ./waf all
-    cd ..
+  if bootloader
+    script += <<-EOF
+      # build bootloader, if it is not included
+      cd bootloader
+      python ./waf all
+      cd ..
+    EOF
+  end
+  script += <<-EOF
     pip install -e .
   EOF
+  return script
 end
 
 def build_binary_with_pyinstaller(boxname)
@@ -347,13 +333,11 @@ end
 def fix_perms
   return <<-EOF
     # . ~/.profile
-
     if id "vagrant" >/dev/null 2>&1; then
       chown -R vagrant /vagrant/borg
     else
       chown -R ubuntu /vagrant/borg
     fi
-
   EOF
 end
 
@@ -381,7 +365,7 @@ Vagrant.configure(2) do |config|
     b.vm.provision "install pyenv", :type => :shell, :privileged => false, :inline => install_pyenv("centos7_64")
     b.vm.provision "install pythons", :type => :shell, :privileged => false, :inline => install_pythons("centos7_64")
     b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_pyenv_venv("centos7_64")
-    b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg("centos7_64")
+    b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg(true)
     b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("centos7_64")
   end
 
@@ -391,7 +375,7 @@ Vagrant.configure(2) do |config|
     b.vm.provision "install pyenv", :type => :shell, :privileged => false, :inline => install_pyenv("centos6_32")
     b.vm.provision "install pythons", :type => :shell, :privileged => false, :inline => install_pythons("centos6_32")
     b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_pyenv_venv("centos6_32")
-    b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg_no_fuse("centos6_32")
+    b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg(false)
     b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("centos6_32")
   end
 
@@ -404,7 +388,7 @@ Vagrant.configure(2) do |config|
     b.vm.provision "install pyenv", :type => :shell, :privileged => false, :inline => install_pyenv("centos6_64")
     b.vm.provision "install pythons", :type => :shell, :privileged => false, :inline => install_pythons("centos6_64")
     b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_pyenv_venv("centos6_64")
-    b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg_no_fuse("centos6_64")
+    b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg(false)
     b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("centos6_64")
   end
 
@@ -415,7 +399,7 @@ Vagrant.configure(2) do |config|
     end
     b.vm.provision "packages debianoid", :type => :shell, :inline => packages_debianoid
     b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_sys_venv("xenial64")
-    b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg("xenial64")
+    b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg(true)
     b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("xenial64")
   end
 
@@ -426,7 +410,7 @@ Vagrant.configure(2) do |config|
     end
     b.vm.provision "packages debianoid", :type => :shell, :inline => packages_debianoid
     b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_sys_venv("trusty64")
-    b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg("trusty64")
+    b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg(true)
     b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("trusty64")
   end
 
@@ -437,7 +421,7 @@ Vagrant.configure(2) do |config|
     end
     b.vm.provision "packages debianoid", :type => :shell, :inline => packages_debianoid
     b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_sys_venv("jessie64")
-    b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg("jessie64")
+    b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg(true)
     b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("jessie64")
   end
 
@@ -448,8 +432,8 @@ Vagrant.configure(2) do |config|
     b.vm.provision "install pyenv", :type => :shell, :privileged => false, :inline => install_pyenv("wheezy32")
     b.vm.provision "install pythons", :type => :shell, :privileged => false, :inline => install_pythons("wheezy32")
     b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_pyenv_venv("wheezy32")
-    b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg("wheezy32")
-    b.vm.provision "install pyinstaller", :type => :shell, :privileged => false, :inline => install_pyinstaller("wheezy32")
+    b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg(true)
+    b.vm.provision "install pyinstaller", :type => :shell, :privileged => false, :inline => install_pyinstaller(false)
     b.vm.provision "build binary with pyinstaller", :type => :shell, :privileged => false, :inline => build_binary_with_pyinstaller("wheezy32")
     b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("wheezy32")
   end
@@ -461,8 +445,8 @@ Vagrant.configure(2) do |config|
     b.vm.provision "install pyenv", :type => :shell, :privileged => false, :inline => install_pyenv("wheezy64")
     b.vm.provision "install pythons", :type => :shell, :privileged => false, :inline => install_pythons("wheezy64")
     b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_pyenv_venv("wheezy64")
-    b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg("wheezy64")
-    b.vm.provision "install pyinstaller", :type => :shell, :privileged => false, :inline => install_pyinstaller("wheezy64")
+    b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg(true)
+    b.vm.provision "install pyinstaller", :type => :shell, :privileged => false, :inline => install_pyinstaller(false)
     b.vm.provision "build binary with pyinstaller", :type => :shell, :privileged => false, :inline => build_binary_with_pyinstaller("wheezy64")
     b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("wheezy64")
   end
@@ -475,8 +459,8 @@ Vagrant.configure(2) do |config|
     b.vm.provision "fix pyenv", :type => :shell, :privileged => false, :inline => fix_pyenv_darwin("darwin64")
     b.vm.provision "install pythons", :type => :shell, :privileged => false, :inline => install_pythons("darwin64")
     b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_pyenv_venv("darwin64")
-    b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg("darwin64")
-    b.vm.provision "install pyinstaller", :type => :shell, :privileged => false, :inline => install_pyinstaller("darwin64")
+    b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg(true)
+    b.vm.provision "install pyinstaller", :type => :shell, :privileged => false, :inline => install_pyinstaller(false)
     b.vm.provision "build binary with pyinstaller", :type => :shell, :privileged => false, :inline => build_binary_with_pyinstaller("darwin64")
     b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("darwin64")
   end
@@ -491,8 +475,8 @@ Vagrant.configure(2) do |config|
     b.vm.provision "install pyenv", :type => :shell, :privileged => false, :inline => install_pyenv("freebsd")
     b.vm.provision "install pythons", :type => :shell, :privileged => false, :inline => install_pythons("freebsd")
     b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_pyenv_venv("freebsd")
-    b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg("freebsd")
-    b.vm.provision "install pyinstaller", :type => :shell, :privileged => false, :inline => install_pyinstaller_bootloader("freebsd")
+    b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg(true)
+    b.vm.provision "install pyinstaller", :type => :shell, :privileged => false, :inline => install_pyinstaller(true)
     b.vm.provision "build binary with pyinstaller", :type => :shell, :privileged => false, :inline => build_binary_with_pyinstaller("freebsd")
     b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("freebsd")
   end
@@ -504,7 +488,7 @@ Vagrant.configure(2) do |config|
     end
     b.vm.provision "packages openbsd", :type => :shell, :inline => packages_openbsd
     b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_sys_venv("openbsd64")
-    b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg_no_fuse("openbsd64")
+    b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg(false)
     b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("openbsd64")
   end
 
@@ -515,7 +499,7 @@ Vagrant.configure(2) do |config|
     end
     b.vm.provision "packages netbsd", :type => :shell, :inline => packages_netbsd
     b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_sys_venv("netbsd64")
-    b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg_no_fuse("netbsd64")
+    b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg(false)
     b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("netbsd64")
   end
 
@@ -542,7 +526,7 @@ Vagrant.configure(2) do |config|
     b.vm.provision :reload
     b.vm.provision "cygwin install pip", :type => :shell, :privileged => false, :inline => install_cygwin_venv
     b.vm.provision "cygwin build env", :type => :shell, :privileged => false, :inline => build_sys_venv("windows10")    
-    b.vm.provision "cygwin install borg", :type => :shell, :privileged => false, :inline => install_borg_no_fuse("windows10")
+    b.vm.provision "cygwin install borg", :type => :shell, :privileged => false, :inline => install_borg(false)
     b.vm.provision "cygwin run tests", :type => :shell, :privileged => false, :inline => run_tests("windows10")
   end
 end

+ 84 - 4
docs/changes.rst

@@ -1,8 +1,25 @@
-Changelog
-=========
+Important notes
+===============
+
+This section is used for infos about e.g. security and corruption issues.
+
+Pre-1.0.9 potential data loss
+-----------------------------
+
+If you have archives in your repository that were made with attic <= 0.13
+(and later migrated to borg), running borg check would report errors in these
+archives. See issue #1837.
+
+The reason for this is a invalid (and useless) metadata key that was
+always added due to a bug in these old attic versions.
+
+If you run borg check --repair, things escalate quickly: all archive items
+with invalid metadata will be killed. Due to that attic bug, that means all
+items in all archives made with these old attic versions.
 
-Important note about pre-1.0.4 potential repo corruption
---------------------------------------------------------
+
+Pre-1.0.4 potential repo corruption
+-----------------------------------
 
 Some external errors (like network or disk I/O errors) could lead to
 corruption of the backup repository due to issue #1138.
@@ -49,6 +66,69 @@ The best check that everything is ok is to run a dry-run extraction::
 
     borg extract -v --dry-run REPO::ARCHIVE
 
+.. _changelog:
+
+Changelog
+=========
+
+Version 1.0.9rc1 (2016-11-27)
+-----------------------------
+
+Bug fixes:
+
+- files cache: fix determination of newest mtime in backup set (which is
+  used in cache cleanup and led to wrong "A" [added] status for unchanged
+  files in next backup), #1860.
+
+- borg check:
+
+  - fix incorrectly reporting attic 0.13 and earlier archives as corrupt
+  - handle repo w/o objects gracefully and also bail out early if repo is
+    *completely* empty, #1815.
+- fix tox/pybuild in 1.0-maint
+- at xattr module import time, loggers are not initialized yet
+
+New features:
+
+- borg umount <mountpoint>
+  exposed already existing umount code via the CLI api, so users can use it,
+  which is more consistent than using borg to mount and fusermount -u (or
+  umount) to un-mount, #1855.
+- implement borg create --noatime --noctime, fixes #1853
+
+Other changes:
+
+- docs:
+
+  - display README correctly on PyPI
+  - improve cache / index docs, esp. files cache docs, fixes #1825
+  - different pattern matching for --exclude, #1779
+  - datetime formatting examples for {now} placeholder, #1822
+  - clarify passphrase mode attic repo upgrade, #1854
+  - clarify --umask usage, #1859
+  - clarify how to choose PR target branch
+  - clarify prune behavior for different archive contents, #1824
+  - fix PDF issues, add logo, fix authors, headings, TOC
+  - move security verification to support section
+  - fix links in standalone README (:ref: tags)
+  - add link to security contact in README
+  - add FAQ about security
+  - move fork differences to FAQ
+  - add more details about resource usage
+- tests: skip remote tests on cygwin, #1268
+- travis:
+
+  - allow OS X failures until the brew cask osxfuse issue is fixed
+  - caskroom osxfuse-beta gone, it's osxfuse now (3.5.3)
+- vagrant:
+
+  - upgrade OSXfuse / FUSE for macOS to 3.5.3
+  - remove llfuse from tox.ini at a central place
+  - do not try to install llfuse on centos6
+  - fix fuse test for darwin, #1546
+  - add windows virtual machine with cygwin
+  - Vagrantfile cleanup / code deduplication
+
 
 Version 1.1.0b2 (2016-10-01)
 ----------------------------

+ 1 - 1
docs/faq.rst

@@ -540,7 +540,7 @@ Here's a (incomplete) list of some major changes:
 * better logging, screen output, progress indication
 * tested on misc. Linux systems, 32 and 64bit, FreeBSD, OpenBSD, NetBSD, Mac OS X
 
-Please read the `ChangeLog`_ (or ``docs/changes.rst`` in the source distribution) for more
+Please read the :ref:`changelog` (or ``docs/changes.rst`` in the source distribution) for more
 information.
 
 Borg is not compatible with original attic (but there is a one-way conversion).

+ 96 - 27
docs/internals.rst

@@ -252,44 +252,94 @@ For some more general usage hints see also ``--chunker-params``.
 Indexes / Caches
 ----------------
 
-The **files cache** is stored in ``cache/files`` and is indexed on the
-``file path hash``. At backup time, it is used to quickly determine whether we
-need to chunk a given file (or whether it is unchanged and we already have all
-its pieces).
-It contains:
+The **files cache** is stored in ``cache/files`` and is used at backup time to
+quickly determine whether a given file is unchanged and we have all its chunks.
+
+The files cache is a key -> value mapping and contains:
+
+* key:
+
+  - full, absolute file path id_hash
+* value:
+
+  - file inode number
+  - file size
+  - file mtime_ns
+  - list of file content chunk id hashes
+  - age (0 [newest], 1, 2, 3, ..., BORG_FILES_CACHE_TTL - 1)
+
+To determine whether a file has not changed, cached values are looked up via
+the key in the mapping and compared to the current file attribute values.
+
+If the file's size, mtime_ns and inode number is still the same, it is
+considered to not have changed. In that case, we check that all file content
+chunks are (still) present in the repository (we check that via the chunks
+cache).
+
+If everything is matching and all chunks are present, the file is not read /
+chunked / hashed again (but still a file metadata item is written to the
+archive, made from fresh file metadata read from the filesystem). This is
+what makes borg so fast when processing unchanged files.
 
-* age
-* file inode number
-* file size
-* file mtime_ns
-* file content chunk hashes
+If there is a mismatch or a chunk is missing, the file is read / chunked /
+hashed. Chunks already present in repo won't be transferred to repo again.
 
-The inode number is stored to make sure we distinguish between
+The inode number is stored and compared to make sure we distinguish between
 different files, as a single path may not be unique across different
 archives in different setups.
 
-The files cache is stored as a python associative array storing
-python objects, which generates a lot of overhead.
+Not all filesystems have stable inode numbers. If that is the case, borg can
+be told to ignore the inode number in the check via --ignore-inode.
 
-The **chunks cache** is stored in ``cache/chunks`` and is indexed on the
-``chunk id_hash``. It is used to determine whether we already have a specific
-chunk, to count references to it and also for statistics.
-It contains:
+The age value is used for cache management. If a file is "seen" in a backup
+run, its age is reset to 0, otherwise its age is incremented by one.
+If a file was not seen in BORG_FILES_CACHE_TTL backups, its cache entry is
+removed. See also: :ref:`always_chunking` and :ref:`a_status_oddity`
 
-* reference count
-* size
-* encrypted/compressed size
+The files cache is a python dictionary, storing python objects, which
+generates a lot of overhead.
 
-The **repository index** is stored in ``repo/index.%d`` and is indexed on the
-``chunk id_hash``. It is used to determine a chunk's location in the repository.
-It contains:
+Borg can also work without using the files cache (saves memory if you have a
+lot of files or not much RAM free), then all files are assumed to have changed.
+This is usually much slower than with files cache.
+
+The **chunks cache** is stored in ``cache/chunks`` and is used to determine
+whether we already have a specific chunk, to count references to it and also
+for statistics.
+
+The chunks cache is a key -> value mapping and contains:
+
+* key:
+
+  - chunk id_hash
+* value:
 
-* segment (that contains the chunk)
-* offset (where the chunk is located in the segment)
+  - reference count
+  - size
+  - encrypted/compressed size
+
+The chunks cache is a hashindex, a hash table implemented in C and tuned for
+memory efficiency.
+
+The **repository index** is stored in ``repo/index.%d`` and is used to
+determine a chunk's location in the repository.
+
+The repo index is a key -> value mapping and contains:
+
+* key:
+
+  - chunk id_hash
+* value:
+
+  - segment (that contains the chunk)
+  - offset (where the chunk is located in the segment)
+
+The repo index is a hashindex, a hash table implemented in C and tuned for
+memory efficiency.
 
-The repository index file is random access.
 
 Hints are stored in a file (``repo/hints.%d``).
+
 It contains:
 
 * version
@@ -314,7 +364,7 @@ varies between 33% and 300%.
 Indexes / Caches memory usage
 -----------------------------
 
-Here is the estimated memory usage of |project_name|:
+Here is the estimated memory usage of |project_name| - it's complicated:
 
   chunk_count ~= total_file_size / 2 ^ HASH_MASK_BITS
 
@@ -327,6 +377,14 @@ Here is the estimated memory usage of |project_name|:
   mem_usage ~= repo_index_usage + chunks_cache_usage + files_cache_usage
              = chunk_count * 164 + total_file_count * 240
 
+Due to the hashtables, the best/usual/worst cases for memory allocation can
+be estimated like that:
+
+  mem_allocation = mem_usage / load_factor  # l_f = 0.25 .. 0.75
+
+  mem_allocation_peak = mem_allocation * (1 + growth_factor)  # g_f = 1.1 .. 2
+
+
 All units are Bytes.
 
 It is assuming every chunk is referenced exactly once (if you have a lot of
@@ -338,6 +396,17 @@ more chunks than estimated above, because 1 file is at least 1 chunk).
 
 If a remote repository is used the repo index will be allocated on the remote side.
 
+The chunks cache, files cache and the repo index are all implemented as hash
+tables. A hash table must have a significant amount of unused entries to be
+fast - the so-called load factor gives the used/unused elements ratio.
+
+When a hash table gets full (load factor getting too high), it needs to be
+grown (allocate new, bigger hash table, copy all elements over to it, free old
+hash table) - this will lead to short-time peaks in memory usage each time this
+happens. Usually does not happen for all hashtables at the same time, though.
+For small hash tables, we start with a growth factor of 2, which comes down to
+~1.1x for big hash tables.
+
 E.g. backing up a total count of 1 Mi (IEC binary prefix i.e. 2^20) files with a total size of 1TiB.
 
 a) with ``create --chunker-params 10,23,16,4095`` (custom, like borg < 1.0 or attic):

+ 1 - 1
docs/quickstart.rst

@@ -59,7 +59,7 @@ A step by step example
     -rw-r--r-- user   group       7961 Mon, 2016-02-15 18:22:30 home/user/Documents/Important.doc
     ...
 
-6. Restore the *Monday* archive::
+6. Restore the *Monday* archive by extracting the files relative to the current directory::
 
     $ borg extract /path/to/repo::Monday
 

+ 102 - 20
docs/usage.rst

@@ -42,7 +42,7 @@ Note: you may also prepend a ``file://`` to a filesystem path to get URL style.
 ``user@host:~other/path/to/repo`` - path relative to other's home directory
 
 Note: giving ``user@host:/./path/to/repo`` or ``user@host:/~/path/to/repo`` or
-``user@host:/~other/path/to/repo``is also supported, but not required here.
+``user@host:/~other/path/to/repo`` is also supported, but not required here.
 
 
 **Remote repositories with relative pathes, alternative syntax with port**:
@@ -220,36 +220,80 @@ Resource Usage
 
 |project_name| might use a lot of resources depending on the size of the data set it is dealing with.
 
-CPU:
+If one uses |project_name| in a client/server way (with a ssh: repository),
+the resource usage occurs in part on the client and in another part on the
+server.
+
+If one uses |project_name| as a single process (with a filesystem repo),
+all the resource usage occurs in that one process, so just add up client +
+server to get the approximate resource usage.
+
+CPU client:
+    borg create: does chunking, hashing, compression, crypto (high CPU usage)
+    chunks cache sync: quite heavy on CPU, doing lots of hashtable operations.
+    borg extract: crypto, decompression (medium to high CPU usage)
+    borg check: similar to extract, but depends on options given.
+    borg prune / borg delete archive: low to medium CPU usage
+    borg delete repo: done on the server
     It won't go beyond 100% of 1 core as the code is currently single-threaded.
     Especially higher zlib and lzma compression levels use significant amounts
-    of CPU cycles.
+    of CPU cycles. Crypto might be cheap on the CPU (if hardware accelerated) or
+    expensive (if not).
 
-Memory (RAM):
+CPU server:
+    It usually doesn't need much CPU, it just deals with the key/value store
+    (repository) and uses the repository index for that.
+
+    borg check: the repository check computes the checksums of all chunks
+    (medium CPU usage)
+    borg delete repo: low CPU usage
+
+CPU (only for client/server operation):
+    When using borg in a client/server way with a ssh:-type repo, the ssh
+    processes used for the transport layer will need some CPU on the client and
+    on the server due to the crypto they are doing - esp. if you are pumping
+    big amounts of data.
+
+Memory (RAM) client:
     The chunks index and the files index are read into memory for performance
-    reasons.
+    reasons. Might need big amounts of memory (see below).
     Compression, esp. lzma compression with high levels might need substantial
     amounts of memory.
 
-Temporary files:
-    Reading data and metadata from a FUSE mounted repository will consume about
-    the same space as the deduplicated chunks used to represent them in the
-    repository.
+Memory (RAM) server:
+    The server process will load the repository index into memory. Might need
+    considerable amounts of memory, but less than on the client (see below).
 
-Cache files:
-    Contains the chunks index and files index (plus a compressed collection of
-    single-archive chunk indexes).
-
-Chunks index:
+Chunks index (client only):
     Proportional to the amount of data chunks in your repo. Lots of chunks
     in your repo imply a big chunks index.
     It is possible to tweak the chunker params (see create options).
 
-Files index:
-    Proportional to the amount of files in your last backup. Can be switched
-    off (see create options), but next backup will be much slower if you do.
+Files index (client only):
+    Proportional to the amount of files in your last backups. Can be switched
+    off (see create options), but next backup might be much slower if you do.
+    The speed benefit of using the files cache is proportional to file size.
+
+Repository index (server only):
+    Proportional to the amount of data chunks in your repo. Lots of chunks
+    in your repo imply a big repository index.
+    It is possible to tweak the chunker params (see create options) to
+    influence the amount of chunks being created.
 
-Network:
+Temporary files (client):
+    Reading data and metadata from a FUSE mounted repository will consume up to
+    the size of all deduplicated, small chunks in the repository. Big chunks
+    won't be locally cached.
+
+Temporary files (server):
+    None.
+
+Cache files (client only):
+    Contains the chunks index and files index (plus a collection of single-
+    archive chunk indexes which might need huge amounts of disk space,
+    depending on archive count and size - see FAQ about how to reduce).
+
+Network (only for client/server operation):
     If your repository is remote, all deduplicated (and optionally compressed/
     encrypted) data of course has to go over the connection (ssh: repo url).
     If you use a locally mounted network filesystem, additionally some copy
@@ -257,7 +301,8 @@ Network:
     you backup multiple sources to one target repository, additional traffic
     happens for cache resynchronization.
 
-In case you are interested in more details, please read the internals documentation.
+In case you are interested in more details (like formulas), please see
+:ref:`internals`.
 
 File systems
 ~~~~~~~~~~~~
@@ -386,7 +431,19 @@ Examples
 
     # Use short hostname, user name and current time in archive name
     $ borg create /path/to/repo::{hostname}-{user}-{now} ~
-    $ borg create /path/to/repo::{hostname}-{user}-{now:%Y-%m-%d_%H:%M:%S} ~
+    # Similar, use the same datetime format as borg 1.1 will have as default
+    $ borg create /path/to/repo::{hostname}-{user}-{now:%Y-%m-%dT%H:%M:%S} ~
+    # As above, but add nanoseconds
+    $ borg create /path/to/repo::{hostname}-{user}-{now:%Y-%m-%dT%H:%M:%S.%f} ~
+
+Notes
+~~~~~
+
+- the --exclude patterns are not like tar. In tar --exclude .bundler/gems will
+  exclude foo/.bundler/gems. In borg it will not, you need to use --exclude
+  '\*/.bundler/gems' to get the same effect. See ``borg help patterns`` for
+  more information.
+
 
 .. include:: usage/extract.rst.inc
 
@@ -705,6 +762,20 @@ Examples
     no key file found for repository
 
 
+Upgrading a passphrase encrypted attic repo
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+attic offered a "passphrase" encryption mode, but this was removed in borg 1.0
+and replaced by the "repokey" mode (which stores the passphrase-protected
+encryption key into the repository config).
+
+Thus, to upgrade a "passphrase" attic repo to a "repokey" borg repo, 2 steps
+are needed, in this order:
+
+- borg upgrade repo
+- borg migrate-to-repokey repo
+
+
 .. include:: usage/recreate.rst.inc
 
 Examples
@@ -861,6 +932,17 @@ a new repository when changing chunker params.
 
 For more details, see :ref:`chunker_details`.
 
+
+--umask
+~~~~~~~
+
+If you use ``--umask``, make sure that all repository-modifying borg commands
+(create, delete, prune) that access the repository in question use the same
+``--umask`` value.
+
+If multiple machines access the same repository, this should hold true for all
+of them.
+
 --read-special
 ~~~~~~~~~~~~~~
 

+ 6 - 0
setup.py

@@ -179,6 +179,12 @@ if libb2_prefix:
 
 with open('README.rst', 'r') as fd:
     long_description = fd.read()
+    # remove badges
+    long_description = re.compile(r'^\.\. start-badges.*^\.\. end-badges', re.M | re.S).sub('', long_description)
+    # remove |substitutions|
+    long_description = re.compile(r'\|screencast\|').sub('', long_description)
+    # remove unknown directives
+    long_description = re.compile(r'^\.\. highlight:: \w+$', re.M).sub('', long_description)
 
 
 class build_usage(Command):

+ 10 - 3
src/borg/archive.py

@@ -241,7 +241,7 @@ class Archive:
         """Failed to encode filename "{}" into file system encoding "{}". Consider configuring the LANG environment variable."""
 
     def __init__(self, repository, key, manifest, name, cache=None, create=False,
-                 checkpoint_interval=300, numeric_owner=False, progress=False,
+                 checkpoint_interval=300, numeric_owner=False, noatime=False, noctime=False, progress=False,
                  chunker_params=CHUNKER_PARAMS, start=None, end=None, compression=None, compression_files=None,
                  consider_part_files=False):
         self.cwd = os.getcwd()
@@ -255,6 +255,8 @@ class Archive:
         self.name = name
         self.checkpoint_interval = checkpoint_interval
         self.numeric_owner = numeric_owner
+        self.noatime = noatime
+        self.noctime = noctime
         if start is None:
             start = datetime.utcnow()
         self.chunker_params = chunker_params
@@ -685,10 +687,15 @@ Number of files: {0.stats.nfiles}'''.format(
             mode=st.st_mode,
             uid=st.st_uid,
             gid=st.st_gid,
-            atime=st.st_atime_ns,
-            ctime=st.st_ctime_ns,
             mtime=st.st_mtime_ns,
         )
+        # borg can work with archives only having mtime (older attic archives do not have
+        # atime/ctime). it can be useful to omit atime/ctime, if they change without the
+        # file content changing - e.g. to get better metadata deduplication.
+        if not self.noatime:
+            attrs['atime'] = st.st_atime_ns
+        if not self.noctime:
+            attrs['ctime'] = st.st_ctime_ns
         if self.numeric_owner:
             attrs['user'] = attrs['group'] = None
         else:

+ 8 - 1
src/borg/archiver.py

@@ -344,7 +344,8 @@ class Archiver:
             with Cache(repository, key, manifest, do_files=args.cache_files, lock_wait=self.lock_wait) as cache:
                 archive = Archive(repository, key, manifest, args.location.archive, cache=cache,
                                   create=True, checkpoint_interval=args.checkpoint_interval,
-                                  numeric_owner=args.numeric_owner, progress=args.progress,
+                                  numeric_owner=args.numeric_owner, noatime=args.noatime, noctime=args.noctime,
+                                  progress=args.progress,
                                   chunker_params=args.chunker_params, start=t0,
                                   compression=args.compression, compression_files=args.compression_files)
                 create_inner(archive, cache)
@@ -1880,6 +1881,12 @@ class Archiver:
         fs_group.add_argument('--numeric-owner', dest='numeric_owner',
                               action='store_true', default=False,
                               help='only store numeric user and group identifiers')
+        fs_group.add_argument('--noatime', dest='noatime',
+                              action='store_true', default=False,
+                              help='do not store atime into archive')
+        fs_group.add_argument('--noctime', dest='noctime',
+                              action='store_true', default=False,
+                              help='do not store ctime into archive')
         fs_group.add_argument('--ignore-inode', dest='ignore_inode',
                               action='store_true', default=False,
                               help='ignore inode data in the file metadata cache used to detect unchanged files.')

+ 6 - 3
src/borg/cache.py

@@ -217,7 +217,7 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
 
     def _read_files(self):
         self.files = {}
-        self._newest_mtime = 0
+        self._newest_mtime = None
         logger.debug('Reading files cache ...')
         with open(os.path.join(self.path, 'files'), 'rb') as fd:
             u = msgpack.Unpacker(use_list=True)
@@ -254,8 +254,11 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
             return
         pi = ProgressIndicatorMessage()
         if self.files is not None:
-            pi.output('Saving files cache')
+            if self._newest_mtime is None:
+                # was never set because no files were modified/added
+                self._newest_mtime = 2 ** 63 - 1  # nanoseconds, good until y2262
             ttl = int(os.environ.get('BORG_FILES_CACHE_TTL', 20))
+            pi.output('Saving files cache')
             with SaveFile(os.path.join(self.path, 'files'), binary=True) as fd:
                 for path_hash, item in self.files.items():
                     # Only keep files seen in this backup that are older than newest mtime seen in this backup -
@@ -484,4 +487,4 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
             return
         entry = FileCacheEntry(age=0, inode=st.st_ino, size=st.st_size, mtime=int_to_bigint(st.st_mtime_ns), chunk_ids=ids)
         self.files[path_hash] = msgpack.packb(entry)
-        self._newest_mtime = max(self._newest_mtime, st.st_mtime_ns)
+        self._newest_mtime = max(self._newest_mtime or 0, st.st_mtime_ns)