Browse Source

Merge branch 'master' into multithreading

Note: there is a failing archiver test on py33-only now.
It is somehow related to __del__ method usage in Cache
and/or locking code. Could not find out the exact reason
why it behaves like that.
Thomas Waldmann 10 years ago
parent
commit
322a87cbfd

+ 1 - 0
.gitignore

@@ -18,6 +18,7 @@ platform_linux.c
 docs/usage/*.inc
 .idea/
 .cache/
+borg/_version.py
 borg.build/
 borg.dist/
 borg.exe

+ 2 - 1
.travis/install.sh

@@ -43,5 +43,6 @@ fi
 
 python -m virtualenv ~/.venv
 source ~/.venv/bin/activate
-pip install tox pytest pytest-cov codecov Cython
+pip install -r requirements.d/development.txt
+pip install codecov
 pip install -e .

+ 49 - 4
CHANGES.rst

@@ -2,11 +2,33 @@ Borg Changelog
 ==============
 
 
-Version 0.25.0 (not released yet)
+Version 0.26.0 (not released yet)
 ---------------------------------
 
+New features:
+
+- BORG_REPO env var to specify the default repo, #168
+- read special files as if they were regular files, #79
+
+Bug fixes:
+
+- borg mount repo: use absolute path, attic #200, attic #137
+- chunker: use off_t to get 64bit on 32bit platform, #178
+- initialize chunker fd to -1, so it's not equal to STDIN_FILENO (0)
+- fix reaction to "no" answer at delete repo prompt, #182
+
+Other changes:
+
+- detect inconsistency / corruption / hash collision, #170
+- replace versioneer with setuptools_scm, #106
+
+
+Version 0.25.0
+--------------
+
 Compatibility notes:
 
+- lz4 compression library (liblz4) is a new requirement (#156)
 - the new compression code is very compatible: as long as you stay with zlib
   compression, older borg releases will still be able to read data from a
   repo/archive made with the new code (note: this is not the case for the
@@ -25,24 +47,47 @@ Deprecations:
   --compression 1 (in 0.24) is the same as --compression zlib,1 (now)
   --compression 9 (in 0.24) is the same as --compression zlib,9 (now)
 
-
 New features:
 
 - create --compression none (default, means: do not compress, just pass through
   data "as is". this is more efficient than zlib level 0 as used in borg 0.24)
 - create --compression lz4 (super-fast, but not very high compression)
-  Please note that borgbackup needs lz4 library as additional requirement.
 - create --compression zlib,N (slower, higher compression, default for N is 6)
 - create --compression lzma,N (slowest, highest compression, default N is 6)
 - honor the nodump flag (UF_NODUMP) and do not backup such items
+- list --short just outputs a simple list of the files/directories in an archive
 
 Bug fixes:
 
+- fixed --chunker-params parameter order confusion / malfunction, fixes #154
 - close fds of segments we delete (during compaction)
+- close files which fell out the lrucache
+- fadvise DONTNEED now is only called for the byte range actually read, not for
+  the whole file, fixes #158.
+- fix issue with negative "all archives" size, fixes #165
+- restore_xattrs: ignore if setxattr fails with EACCES, fixes #162
 
 Other changes:
 
-- none yet
+- remove fakeroot requirement for tests, tests run faster without fakeroot
+  (test setup does not fail any more without fakeroot, so you can run with or
+  without fakeroot), fixes #151 and #91.
+- more tests for archiver
+- recover_segment(): don't assume we have an fd for segment
+- lrucache refactoring / cleanup, add dispose function, py.test tests
+- generalize hashindex code for any key length (less hardcoding)
+- lock roster: catch file not found in remove() method and ignore it
+- travis CI: use requirements file
+- improved docs:
+
+  - replace hack for llfuse with proper solution (install libfuse-dev)
+  - update docs about compression
+  - update development docs about fakeroot
+  - internals: add some words about lock files / locking system
+  - support: mention BountySource and for what it can be used
+  - theme: use a lighter green
+  - add pypi, wheel, dist package based install docs
+  - split install docs into system-specific preparations and generic instructions
 
 
 Version 0.24.0

+ 1 - 3
borg/__init__.py

@@ -1,5 +1,3 @@
 # This is a python package
 
-from ._version import get_versions
-__version__ = get_versions()['version']
-del get_versions
+from ._version import version as __version__

+ 11 - 4
borg/_chunker.c

@@ -83,7 +83,8 @@ typedef struct {
     PyObject *fd;
     int fh;
     int done, eof;
-    size_t remaining, bytes_read, bytes_yielded, position, last;
+    size_t remaining, position, last;
+    off_t bytes_read, bytes_yielded;
 } Chunker;
 
 static Chunker *
@@ -96,6 +97,7 @@ chunker_init(int window_size, int chunk_mask, int min_size, int max_size, uint32
     c->table = buzhash_init_table(seed);
     c->buf_size = max_size;
     c->data = malloc(c->buf_size);
+    c->fh = -1;
     return c;
 }
 
@@ -128,6 +130,7 @@ static int
 chunker_fill(Chunker *c, PyThreadState **tstatep)
 {
     ssize_t n;
+    off_t offset, length;
     PyObject *data;
     memmove(c->data, c->data + c->last, c->position + c->remaining - c->last);
     c->position -= c->last;
@@ -137,6 +140,7 @@ chunker_fill(Chunker *c, PyThreadState **tstatep)
         return 1;
     }
     if(c->fh >= 0) {
+        offset = c->bytes_read;
         // if we have a os-level file descriptor, use os-level API
         n = read(c->fh, c->data + c->position + c->remaining, n);
         if(n > 0) {
@@ -151,13 +155,16 @@ chunker_fill(Chunker *c, PyThreadState **tstatep)
             // some error happened
             return 0;
         }
+        length = c->bytes_read - offset;
         #if ( _XOPEN_SOURCE >= 600 || _POSIX_C_SOURCE >= 200112L )
-        // We tell the OS that we do not need the data of this file any more
-        // that it maybe has in the cache. This avoids that we spoil the
+        // We tell the OS that we do not need the data that we just have read any
+        // more (that it maybe has in the cache). This avoids that we spoil the
         // complete cache with data that we only read once and (due to cache
         // size limit) kick out data from the cache that might be still useful
         // for the OS or other processes.
-        posix_fadvise(c->fh, (off_t) 0, (off_t) 0, POSIX_FADV_DONTNEED);
+        if (length > 0) {
+            posix_fadvise(c->fh, offset, length, POSIX_FADV_DONTNEED);
+        }
         #endif
     }
     else {

+ 2 - 2
borg/_hashindex.c

@@ -380,8 +380,8 @@ hashindex_summarize(HashIndex *index, long long *total_size, long long *total_cs
         chunks += values[0];
         unique_size += values[1];
         unique_csize += values[2];
-        size += values[0] * values[1];
-        csize += values[0] * values[2];
+        size += (int64_t) values[0] * values[1];
+        csize += (int64_t) values[0] * values[2];
     }
     *total_size = size;
     *total_csize = csize;

+ 0 - 239
borg/_version.py

@@ -1,239 +0,0 @@
-
-# This file helps to compute a version number in source trees obtained from
-# git-archive tarball (such as those provided by githubs download-from-tag
-# feature). Distribution tarballs (built by setup.py sdist) and build
-# directories (produced by setup.py build) will contain a much shorter file
-# that just contains the computed version number.
-
-# This file is released into the public domain. Generated by
-# versioneer-0.14 (https://github.com/warner/python-versioneer)
-
-import errno
-import os
-import re
-import subprocess
-import sys
-
-# these strings will be replaced by git during git-archive
-git_refnames = "$Format:%d$"
-git_full = "$Format:%H$"
-
-# these strings are filled in when 'setup.py versioneer' creates _version.py
-tag_prefix = ""
-parentdir_prefix = "borgbackup-"
-versionfile_source = "borg/_version.py"
-
-
-def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False):
-    assert isinstance(commands, list)
-    p = None
-    for c in commands:
-        try:
-            # remember shell=False, so use git.cmd on windows, not just git
-            p = subprocess.Popen([c] + args, cwd=cwd, stdout=subprocess.PIPE,
-                                 stderr=(subprocess.PIPE if hide_stderr
-                                         else None))
-            break
-        except EnvironmentError:
-            e = sys.exc_info()[1]
-            if e.errno == errno.ENOENT:
-                continue
-            if verbose:
-                print("unable to run %s" % args[0])
-                print(e)
-            return None
-    else:
-        if verbose:
-            print("unable to find command, tried %s" % (commands,))
-        return None
-    stdout = p.communicate()[0].strip()
-    if sys.version_info[0] >= 3:
-        stdout = stdout.decode()
-    if p.returncode != 0:
-        if verbose:
-            print("unable to run %s (error)" % args[0])
-        return None
-    return stdout
-
-
-def versions_from_parentdir(parentdir_prefix, root, verbose=False):
-    # Source tarballs conventionally unpack into a directory that includes
-    # both the project name and a version string.
-    dirname = os.path.basename(root)
-    if not dirname.startswith(parentdir_prefix):
-        if verbose:
-            print("guessing rootdir is '%s', but '%s' doesn't start with "
-                  "prefix '%s'" % (root, dirname, parentdir_prefix))
-        return None
-    return {"version": dirname[len(parentdir_prefix):], "full": ""}
-
-
-def git_get_keywords(versionfile_abs):
-    # the code embedded in _version.py can just fetch the value of these
-    # keywords. When used from setup.py, we don't want to import _version.py,
-    # so we do it with a regexp instead. This function is not used from
-    # _version.py.
-    keywords = {}
-    try:
-        f = open(versionfile_abs, "r")
-        for line in f.readlines():
-            if line.strip().startswith("git_refnames ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    keywords["refnames"] = mo.group(1)
-            if line.strip().startswith("git_full ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    keywords["full"] = mo.group(1)
-        f.close()
-    except EnvironmentError:
-        pass
-    return keywords
-
-
-def git_versions_from_keywords(keywords, tag_prefix, verbose=False):
-    if not keywords:
-        return {}  # keyword-finding function failed to find keywords
-    refnames = keywords["refnames"].strip()
-    if refnames.startswith("$Format"):
-        if verbose:
-            print("keywords are unexpanded, not using")
-        return {}  # unexpanded, so not in an unpacked git-archive tarball
-    refs = set([r.strip() for r in refnames.strip("()").split(",")])
-    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
-    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
-    TAG = "tag: "
-    tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
-    if not tags:
-        # Either we're using git < 1.8.3, or there really are no tags. We use
-        # a heuristic: assume all version tags have a digit. The old git %d
-        # expansion behaves like git log --decorate=short and strips out the
-        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
-        # between branches and tags. By ignoring refnames without digits, we
-        # filter out many common branch names like "release" and
-        # "stabilization", as well as "HEAD" and "master".
-        tags = set([r for r in refs if re.search(r'\d', r)])
-        if verbose:
-            print("discarding '%s', no digits" % ",".join(refs-tags))
-    if verbose:
-        print("likely tags: %s" % ",".join(sorted(tags)))
-    for ref in sorted(tags):
-        # sorting will prefer e.g. "2.0" over "2.0rc1"
-        if ref.startswith(tag_prefix):
-            r = ref[len(tag_prefix):]
-            if verbose:
-                print("picking %s" % r)
-            return {"version": r,
-                    "full": keywords["full"].strip()}
-    # no suitable tags, so version is "0+unknown", but full hex is still there
-    if verbose:
-        print("no suitable tags, using unknown + full revision id")
-    return {"version": "0+unknown",
-            "full": keywords["full"].strip()}
-
-
-def git_parse_vcs_describe(git_describe, tag_prefix, verbose=False):
-    # TAG-NUM-gHEX[-dirty] or HEX[-dirty] . TAG might have hyphens.
-
-    # dirty
-    dirty = git_describe.endswith("-dirty")
-    if dirty:
-        git_describe = git_describe[:git_describe.rindex("-dirty")]
-    dirty_suffix = ".dirty" if dirty else ""
-
-    # now we have TAG-NUM-gHEX or HEX
-
-    if "-" not in git_describe:  # just HEX
-        return "0+untagged.g"+git_describe+dirty_suffix, dirty
-
-    # just TAG-NUM-gHEX
-    mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
-    if not mo:
-        # unparseable. Maybe git-describe is misbehaving?
-        return "0+unparseable"+dirty_suffix, dirty
-
-    # tag
-    full_tag = mo.group(1)
-    if not full_tag.startswith(tag_prefix):
-        if verbose:
-            fmt = "tag '%s' doesn't start with prefix '%s'"
-            print(fmt % (full_tag, tag_prefix))
-        return None, dirty
-    tag = full_tag[len(tag_prefix):]
-
-    # distance: number of commits since tag
-    distance = int(mo.group(2))
-
-    # commit: short hex revision ID
-    commit = mo.group(3)
-
-    # now build up version string, with post-release "local version
-    # identifier". Our goal: TAG[+NUM.gHEX[.dirty]] . Note that if you get a
-    # tagged build and then dirty it, you'll get TAG+0.gHEX.dirty . So you
-    # can always test version.endswith(".dirty").
-    version = tag
-    if distance or dirty:
-        version += "+%d.g%s" % (distance, commit) + dirty_suffix
-
-    return version, dirty
-
-
-def git_versions_from_vcs(tag_prefix, root, verbose=False):
-    # this runs 'git' from the root of the source tree. This only gets called
-    # if the git-archive 'subst' keywords were *not* expanded, and
-    # _version.py hasn't already been rewritten with a short version string,
-    # meaning we're inside a checked out source tree.
-
-    if not os.path.exists(os.path.join(root, ".git")):
-        if verbose:
-            print("no .git in %s" % root)
-        return {}  # get_versions() will try next method
-
-    GITS = ["git"]
-    if sys.platform == "win32":
-        GITS = ["git.cmd", "git.exe"]
-    # if there is a tag, this yields TAG-NUM-gHEX[-dirty]
-    # if there are no tags, this yields HEX[-dirty] (no NUM)
-    stdout = run_command(GITS, ["describe", "--tags", "--dirty",
-                                "--always", "--long"],
-                         cwd=root)
-    # --long was added in git-1.5.5
-    if stdout is None:
-        return {}  # try next method
-    version, dirty = git_parse_vcs_describe(stdout, tag_prefix, verbose)
-
-    # build "full", which is FULLHEX[.dirty]
-    stdout = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
-    if stdout is None:
-        return {}
-    full = stdout.strip()
-    if dirty:
-        full += ".dirty"
-
-    return {"version": version, "full": full}
-
-
-def get_versions(default={"version": "0+unknown", "full": ""}, verbose=False):
-    # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
-    # __file__, we can work backwards from there to the root. Some
-    # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
-    # case we can only use expanded keywords.
-
-    keywords = {"refnames": git_refnames, "full": git_full}
-    ver = git_versions_from_keywords(keywords, tag_prefix, verbose)
-    if ver:
-        return ver
-
-    try:
-        root = os.path.realpath(__file__)
-        # versionfile_source is the relative path from the top of the source
-        # tree (where the .git directory might live) to this file. Invert
-        # this to find the root from __file__.
-        for i in versionfile_source.split('/'):
-            root = os.path.dirname(root)
-    except NameError:
-        return default
-
-    return (git_versions_from_vcs(tag_prefix, root, verbose)
-            or versions_from_parentdir(parentdir_prefix, root, verbose)
-            or default)

+ 12 - 9
borg/archive.py

@@ -533,14 +533,17 @@ class Archive:
             raise Exception('Unknown archive item type %r' % item[b'mode'])
 
     def restore_attrs(self, path, item, symlink=False, fd=None):
-        xattrs = item.get(b'xattrs')
-        if xattrs:
-                for k, v in xattrs.items():
-                    try:
-                        xattr.setxattr(fd or path, k, v, follow_symlinks=False)
-                    except OSError as e:
-                        if e.errno != errno.ENOTSUP:
-                            raise
+        xattrs = item.get(b'xattrs', {})
+        for k, v in xattrs.items():
+            try:
+                xattr.setxattr(fd or path, k, v, follow_symlinks=False)
+            except OSError as e:
+                if e.errno not in (errno.ENOTSUP, errno.EACCES, ):
+                    # only raise if the errno is not on our ignore list:
+                    # ENOTSUP == xattrs not supported here
+                    # EACCES == permission denied to set this specific xattr
+                    #           (this may happen related to security.* keys)
+                    raise
         uid = gid = None
         if not self.numeric_owner:
             uid = user2uid(item[b'user'])
@@ -657,7 +660,7 @@ class Archive:
             b'mtime': int_to_bigint(int(time.time()) * 1000000000)
         }
         self.add_item_queued(item)
-        return 'A'
+        return 'i'  # stdin
 
     def process_file(self, path, st, cache):
         status = None

+ 84 - 53
borg/archiver.py

@@ -102,17 +102,21 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
 
     def do_create(self, args):
         """Create new archive"""
+        dry_run = args.dry_run
         t0 = datetime.now()
-        repository = self.open_repository(args.archive, exclusive=True)
-        manifest, key = Manifest.load(repository)
-        compr_args = dict(buffer=COMPR_BUFFER)
-        compr_args.update(args.compression)
-        key.compressor = Compressor(**compr_args)
-        cache = Cache(repository, key, manifest, do_files=args.cache_files)
-        archive = Archive(repository, key, manifest, args.archive.archive, cache=cache,
-                          create=True, checkpoint_interval=args.checkpoint_interval,
-                          numeric_owner=args.numeric_owner, progress=args.progress,
-                          chunker_params=args.chunker_params)
+        if not dry_run:
+            repository = self.open_repository(args.archive, exclusive=True)
+            manifest, key = Manifest.load(repository)
+            compr_args = dict(buffer=COMPR_BUFFER)
+            compr_args.update(args.compression)
+            key.compressor = Compressor(**compr_args)
+            cache = Cache(repository, key, manifest, do_files=args.cache_files)
+            archive = Archive(repository, key, manifest, args.archive.archive, cache=cache,
+                              create=True, checkpoint_interval=args.checkpoint_interval,
+                              numeric_owner=args.numeric_owner, progress=args.progress,
+                              chunker_params=args.chunker_params)
+        else:
+            archive = cache = None
         try:
             # Add cache dir to inode_skip list
             skip_inodes = set()
@@ -131,11 +135,14 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
             for path in args.paths:
                 if path == '-':  # stdin
                     path = 'stdin'
-                    self.print_verbose(path)
-                    try:
-                        archive.process_stdin(path, cache)
-                    except IOError as e:
-                        self.print_error('%s: %s', path, e)
+                    if not dry_run:
+                        try:
+                            status = archive.process_stdin(path, cache)
+                        except IOError as e:
+                            self.print_error('%s: %s', path, e)
+                    else:
+                        status = '-'
+                    self.print_verbose("%1s %s", status, path)
                     continue
                 path = os.path.normpath(path)
                 if args.dontcross:
@@ -146,27 +153,31 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
                         continue
                 else:
                     restrict_dev = None
-                self._process(archive, cache, args.excludes, args.exclude_caches, skip_inodes, path, restrict_dev)
-            archive.save(timestamp=args.timestamp)
-            if args.progress:
-                archive.stats.show_progress(final=True)
-            if args.stats:
-                t = datetime.now()
-                diff = t - t0
-                print('-' * 78)
-                print('Archive name: %s' % args.archive.archive)
-                print('Archive fingerprint: %s' % hexlify(archive.id).decode('ascii'))
-                print('Start time: %s' % t0.strftime('%c'))
-                print('End time: %s' % t.strftime('%c'))
-                print('Duration: %s' % format_timedelta(diff))
-                print('Number of files: %d' % archive.stats.nfiles)
-                archive.stats.print_('This archive:', cache)
-                print('-' * 78)
+                self._process(archive, cache, args.excludes, args.exclude_caches, skip_inodes, path, restrict_dev,
+                              read_special=args.read_special, dry_run=dry_run)
+            if not dry_run:
+                archive.save(timestamp=args.timestamp)
+                if args.progress:
+                    archive.stats.show_progress(final=True)
+                if args.stats:
+                    t = datetime.now()
+                    diff = t - t0
+                    print('-' * 78)
+                    print('Archive name: %s' % args.archive.archive)
+                    print('Archive fingerprint: %s' % hexlify(archive.id).decode('ascii'))
+                    print('Start time: %s' % t0.strftime('%c'))
+                    print('End time: %s' % t.strftime('%c'))
+                    print('Duration: %s' % format_timedelta(diff))
+                    print('Number of files: %d' % archive.stats.nfiles)
+                    archive.stats.print_('This archive:', cache)
+                    print('-' * 78)
         finally:
-            archive.close()
+            if not dry_run:
+                archive.close()
         return self.exit_code
 
-    def _process(self, archive, cache, excludes, exclude_caches, skip_inodes, path, restrict_dev):
+    def _process(self, archive, cache, excludes, exclude_caches, skip_inodes, path, restrict_dev,
+                 read_special=False, dry_run=False):
         if exclude_path(path, excludes):
             return
         try:
@@ -183,15 +194,18 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
         # Ignore if nodump flag is set
         if has_lchflags and (st.st_flags & stat.UF_NODUMP):
             return
-        if stat.S_ISREG(st.st_mode):
-            try:
-                status = archive.process_file(path, st, cache)
-            except IOError as e:
-                self.print_error('%s: %s', path, e)
+        if (stat.S_ISREG(st.st_mode) or
+            read_special and not stat.S_ISDIR(st.st_mode)):
+            if not dry_run:
+                try:
+                    status = archive.process_file(path, st, cache)
+                except IOError as e:
+                    self.print_error('%s: %s', path, e)
         elif stat.S_ISDIR(st.st_mode):
             if exclude_caches and is_cachedir(path):
                 return
-            status = archive.process_dir(path, st)
+            if not dry_run:
+                status = archive.process_dir(path, st)
             try:
                 entries = os.listdir(path)
             except OSError as e:
@@ -200,13 +214,17 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
                 for filename in sorted(entries):
                     entry_path = os.path.normpath(os.path.join(path, filename))
                     self._process(archive, cache, excludes, exclude_caches, skip_inodes,
-                                  entry_path, restrict_dev)
+                                  entry_path, restrict_dev, read_special=read_special,
+                                  dry_run=dry_run)
         elif stat.S_ISLNK(st.st_mode):
-            status = archive.process_symlink(path, st)
+            if not dry_run:
+                status = archive.process_symlink(path, st)
         elif stat.S_ISFIFO(st.st_mode):
-            status = archive.process_fifo(path, st)
+            if not dry_run:
+                status = archive.process_fifo(path, st)
         elif stat.S_ISCHR(st.st_mode) or stat.S_ISBLK(st.st_mode):
-            status = archive.process_dev(path, st)
+            if not dry_run:
+                status = archive.process_dev(path, st)
         elif stat.S_ISSOCK(st.st_mode):
             # Ignore unix sockets
             return
@@ -222,7 +240,10 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
         # Note: A/M/U is relative to the "files" cache, not to the repo.
         # This would be an issue if the files cache is not used.
         if status is None:
-            status = '?'  # need to add a status code somewhere
+            if not dry_run:
+                status = '?'  # need to add a status code somewhere
+            else:
+                status = '-'  # dry run, item was not backed up
         # output ALL the stuff - it can be easily filtered using grep.
         # even stuff considered unchanged might be interesting.
         self.print_verbose("%1s %s", status, remove_surrogates(path))
@@ -299,10 +320,11 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
             print("You requested to completely DELETE the repository *including* all archives it contains:")
             for archive_info in manifest.list_archive_infos(sort_by='ts'):
                 print(format_archive(archive_info))
-            while not os.environ.get('BORG_CHECK_I_KNOW_WHAT_I_AM_DOING'):
+            if not os.environ.get('BORG_CHECK_I_KNOW_WHAT_I_AM_DOING'):
                 print("""Type "YES" if you understand this and want to continue.\n""")
-                if input('Do you want to continue? ') == 'YES':
-                    break
+                if input('Do you want to continue? ') != 'YES':
+                    self.exit_code = 1
+                    return self.exit_code
             repository.destroy()
             cache.destroy()
             print("Repository and corresponding cache were deleted.")
@@ -559,7 +581,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
                                           description=self.do_init.__doc__, epilog=init_epilog,
                                           formatter_class=argparse.RawDescriptionHelpFormatter)
         subparser.set_defaults(func=self.do_init)
-        subparser.add_argument('repository', metavar='REPOSITORY',
+        subparser.add_argument('repository', metavar='REPOSITORY', nargs='?', default='',
                                type=location_validator(archive=False),
                                help='repository to create')
         subparser.add_argument('-e', '--encryption', dest='encryption',
@@ -570,6 +592,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
         The check command verifies the consistency of a repository and the corresponding archives.
 
         First, the underlying repository data files are checked:
+
         - For all segments the segment magic (header) is checked
         - For all objects stored in the segments, all metadata (e.g. crc and size) and
           all data is read. The read data is checked by size and CRC. Bit rot and other
@@ -583,6 +606,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
         - The repository check can be skipped using the --archives-only option.
 
         Second, the consistency and correctness of the archive metadata is verified:
+
         - Is the repo manifest present? If not, it is rebuilt from archive metadata
           chunks (this requires reading and decrypting of all metadata and data).
         - Check if archive metadata chunk is present. if not, remove archive from
@@ -605,7 +629,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
                                           epilog=check_epilog,
                                           formatter_class=argparse.RawDescriptionHelpFormatter)
         subparser.set_defaults(func=self.do_check)
-        subparser.add_argument('repository', metavar='REPOSITORY_OR_ARCHIVE',
+        subparser.add_argument('repository', metavar='REPOSITORY_OR_ARCHIVE', nargs='?', default='',
                                type=location_validator(),
                                help='repository or archive to check consistency of')
         subparser.add_argument('--repository-only', dest='repo_only', action='store_true',
@@ -630,7 +654,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
                                           epilog=change_passphrase_epilog,
                                           formatter_class=argparse.RawDescriptionHelpFormatter)
         subparser.set_defaults(func=self.do_change_passphrase)
-        subparser.add_argument('repository', metavar='REPOSITORY',
+        subparser.add_argument('repository', metavar='REPOSITORY', nargs='?', default='',
                                type=location_validator(archive=False))
 
         create_epilog = textwrap.dedent("""
@@ -688,6 +712,12 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
                                     'zlib,0 .. zlib,9 == zlib (with level 0..9), '
                                     'lzma == lzma (default level 6), '
                                     'lzma,0 .. lzma,9 == lzma (with level 0..9).')
+        subparser.add_argument('--read-special', dest='read_special',
+                               action='store_true', default=False,
+                               help='open and read special files as if they were regular files')
+        subparser.add_argument('-n', '--dry-run', dest='dry_run',
+                               action='store_true', default=False,
+                               help='do not create a backup archive')
         subparser.add_argument('archive', metavar='ARCHIVE',
                                type=location_validator(archive=True),
                                help='archive to create')
@@ -761,7 +791,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
         subparser.add_argument('-s', '--stats', dest='stats',
                                action='store_true', default=False,
                                help='print statistics for the deleted archive')
-        subparser.add_argument('target', metavar='TARGET',
+        subparser.add_argument('target', metavar='TARGET', nargs='?', default='',
                                type=location_validator(),
                                help='archive or repository to delete')
 
@@ -776,7 +806,8 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
         subparser.add_argument('--short', dest='short',
                                action='store_true', default=False,
                                help='only print file/directory names, nothing else')
-        subparser.add_argument('src', metavar='REPOSITORY_OR_ARCHIVE', type=location_validator(),
+        subparser.add_argument('src', metavar='REPOSITORY_OR_ARCHIVE', nargs='?', default='',
+                               type=location_validator(),
                                help='repository/archive to list contents of')
         mount_epilog = textwrap.dedent("""
         This command mounts an archive as a FUSE filesystem. This can be useful for
@@ -859,7 +890,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
                                help='number of yearly archives to keep')
         subparser.add_argument('-p', '--prefix', dest='prefix', type=str,
                                help='only consider archive names starting with this prefix')
-        subparser.add_argument('repository', metavar='REPOSITORY',
+        subparser.add_argument('repository', metavar='REPOSITORY', nargs='?', default='',
                                type=location_validator(archive=False),
                                help='repository to prune')
 

+ 16 - 8
borg/cache.py

@@ -3,6 +3,7 @@ from .remote import cache_if_remote
 import errno
 import msgpack
 import os
+import stat
 import sys
 import threading
 from binascii import hexlify
@@ -40,7 +41,6 @@ class Cache:
         self.lock = None
         self.timestamp = None
         self.thread_lock = threading.Lock()
-        self.lock = None
         self.txn_active = False
         self.repository = repository
         self.key = key
@@ -352,9 +352,9 @@ class Cache:
     def add_chunk(self, id, data, stats):
         if not self.txn_active:
             self.begin_txn()
-        if self.seen_chunk(id):
-            return self.chunk_incref(id, stats)
         size = len(data)
+        if self.seen_chunk(id, size):
+            return self.chunk_incref(id, stats)
         data = self.key.encrypt(data)
         csize = len(data)
         self.repository.put(id, data, wait=False)
@@ -425,8 +425,14 @@ class Cache:
             results.append((id, size, csize))
         return results
 
-    def seen_chunk(self, id):
-        return self.chunks.get(id, (0, 0, 0))[0]
+    def seen_chunk(self, id, size=None):
+        refcount, stored_size, _ = self.chunks.get(id, (0, None, None))
+        if size is not None and stored_size is not None and size != stored_size:
+            # we already have a chunk with that id, but different size.
+            # this is either a hash collision (unlikely) or corruption or a bug.
+            raise Exception("chunk has same id [%r], but different size (stored: %d new: %d)!" % (
+                            id, stored_size, size))
+        return refcount
 
     def seen_or_announce_chunk(self, id, size):
         """return True if we have seen the chunk <id> already (thus, we already have it or will have it soon).
@@ -437,7 +443,9 @@ class Cache:
             try:
                 # did we see this id already (and is count > 0)?
                 count, _size, _csize = self.chunks[id]
-                assert size == _size
+                if size != _size:
+                    raise Exception("chunk has same id [%r], but different size (stored: %d new: %d)!" % (
+                        id, _size, size))
                 return count > 0
             except KeyError:
                 # announce that we will put this chunk soon,
@@ -466,7 +474,7 @@ class Cache:
             stats.update(-size, -csize, False)
 
     def file_known_and_unchanged(self, path_hash, st):
-        if not self.do_files:
+        if not (self.do_files and stat.S_ISREG(st.st_mode)):
             return None
         if self.files is None:
             self._read_files()
@@ -483,7 +491,7 @@ class Cache:
             return None
 
     def memorize_file(self, path_hash, st, ids):
-        if not self.do_files:
+        if not (self.do_files and stat.S_ISREG(st.st_mode)):
             return
         # Entry: Age, inode, size, mtime, chunk ids
         mtime_ns = st_mtime_ns(st)

+ 24 - 19
borg/hashindex.pyx

@@ -32,9 +32,10 @@ cimport cython
 @cython.internal
 cdef class IndexBase:
     cdef HashIndex *index
-    key_size = 32
+    cdef int key_size
 
-    def __cinit__(self, capacity=0, path=None):
+    def __cinit__(self, capacity=0, path=None, key_size=32):
+        self.key_size = key_size
         if path:
             self.index = hashindex_read(os.fsencode(path))
             if not self.index:
@@ -67,7 +68,7 @@ cdef class IndexBase:
             self[key] = value
 
     def __delitem__(self, key):
-        assert len(key) == 32
+        assert len(key) == self.key_size
         if not hashindex_delete(self.index, <char *>key):
             raise Exception('hashindex_delete failed')
 
@@ -96,14 +97,14 @@ cdef class NSIndex(IndexBase):
     value_size = 8
 
     def __getitem__(self, key):
-        assert len(key) == 32
+        assert len(key) == self.key_size
         data = <int *>hashindex_get(self.index, <char *>key)
         if not data:
             raise KeyError
         return _le32toh(data[0]), _le32toh(data[1])
 
     def __setitem__(self, key, value):
-        assert len(key) == 32
+        assert len(key) == self.key_size
         cdef int[2] data
         data[0] = _htole32(value[0])
         data[1] = _htole32(value[1])
@@ -111,20 +112,20 @@ cdef class NSIndex(IndexBase):
             raise Exception('hashindex_set failed')
 
     def __contains__(self, key):
-        assert len(key) == 32
+        assert len(key) == self.key_size
         data = <int *>hashindex_get(self.index, <char *>key)
         return data != NULL
 
     def iteritems(self, marker=None):
         cdef const void *key
-        iter = NSKeyIterator()
+        iter = NSKeyIterator(self.key_size)
         iter.idx = self
         iter.index = self.index
         if marker:
             key = hashindex_get(self.index, <char *>marker)
             if marker is None:
                 raise IndexError
-            iter.key = key - 32
+            iter.key = key - self.key_size
         return iter
 
 
@@ -132,9 +133,11 @@ cdef class NSKeyIterator:
     cdef NSIndex idx
     cdef HashIndex *index
     cdef const void *key
+    cdef int key_size
 
-    def __cinit__(self):
+    def __cinit__(self, key_size):
         self.key = NULL
+        self.key_size = key_size
 
     def __iter__(self):
         return self
@@ -143,8 +146,8 @@ cdef class NSKeyIterator:
         self.key = hashindex_next_key(self.index, <char *>self.key)
         if not self.key:
             raise StopIteration
-        cdef int *value = <int *>(self.key + 32)
-        return (<char *>self.key)[:32], (_le32toh(value[0]), _le32toh(value[1]))
+        cdef int *value = <int *>(self.key + self.key_size)
+        return (<char *>self.key)[:self.key_size], (_le32toh(value[0]), _le32toh(value[1]))
 
 
 cdef class ChunkIndex(IndexBase):
@@ -152,14 +155,14 @@ cdef class ChunkIndex(IndexBase):
     value_size = 12
 
     def __getitem__(self, key):
-        assert len(key) == 32
+        assert len(key) == self.key_size
         data = <int *>hashindex_get(self.index, <char *>key)
         if not data:
             raise KeyError
         return _le32toh(data[0]), _le32toh(data[1]), _le32toh(data[2])
 
     def __setitem__(self, key, value):
-        assert len(key) == 32
+        assert len(key) == self.key_size
         cdef int[3] data
         data[0] = _htole32(value[0])
         data[1] = _htole32(value[1])
@@ -168,20 +171,20 @@ cdef class ChunkIndex(IndexBase):
             raise Exception('hashindex_set failed')
 
     def __contains__(self, key):
-        assert len(key) == 32
+        assert len(key) == self.key_size
         data = <int *>hashindex_get(self.index, <char *>key)
         return data != NULL
 
     def iteritems(self, marker=None):
         cdef const void *key
-        iter = ChunkKeyIterator()
+        iter = ChunkKeyIterator(self.key_size)
         iter.idx = self
         iter.index = self.index
         if marker:
             key = hashindex_get(self.index, <char *>marker)
             if marker is None:
                 raise IndexError
-            iter.key = key - 32
+            iter.key = key - self.key_size
         return iter
 
     def summarize(self):
@@ -199,9 +202,11 @@ cdef class ChunkKeyIterator:
     cdef ChunkIndex idx
     cdef HashIndex *index
     cdef const void *key
+    cdef int key_size
 
-    def __cinit__(self):
+    def __cinit__(self, key_size):
         self.key = NULL
+        self.key_size = key_size
 
     def __iter__(self):
         return self
@@ -210,5 +215,5 @@ cdef class ChunkKeyIterator:
         self.key = hashindex_next_key(self.index, <char *>self.key)
         if not self.key:
             raise StopIteration
-        cdef int *value = <int *>(self.key + 32)
-        return (<char *>self.key)[:32], (_le32toh(value[0]), _le32toh(value[1]), _le32toh(value[2]))
+        cdef int *value = <int *>(self.key + self.key_size)
+        return (<char *>self.key)[:self.key_size], (_le32toh(value[0]), _le32toh(value[1]), _le32toh(value[2]))

+ 26 - 5
borg/helpers.py

@@ -280,12 +280,12 @@ def timestamp(s):
 
 
 def ChunkerParams(s):
-    window_size, chunk_mask, chunk_min, chunk_max = s.split(',')
+    chunk_min, chunk_max, chunk_mask, window_size = s.split(',')
     if int(chunk_max) > 23:
         # do not go beyond 2**23 (8MB) chunk size now,
         # COMPR_BUFFER can only cope with up to this size
-        raise ValueError
-    return int(window_size), int(chunk_mask), int(chunk_min), int(chunk_max)
+        raise ValueError('max. chunk size exponent must not be more than 23 (2^23 = 8MiB max. chunk size)')
+    return int(chunk_min), int(chunk_max), int(chunk_mask), int(window_size)
 
 
 def CompressionSpec(s):
@@ -469,13 +469,34 @@ class Location:
                          r'(?P<path>[^:]+)(?:::(?P<archive>.+))?$')
     scp_re = re.compile(r'((?:(?P<user>[^@]+)@)?(?P<host>[^:/]+):)?'
                         r'(?P<path>[^:]+)(?:::(?P<archive>.+))?$')
+    # get the repo from BORG_RE env and the optional archive from param.
+    # if the syntax requires giving REPOSITORY (see "borg mount"),
+    # use "::" to let it use the env var.
+    # if REPOSITORY argument is optional, it'll automatically use the env.
+    env_re = re.compile(r'(?:::(?P<archive>.+)?)?$')
 
-    def __init__(self, text):
+    def __init__(self, text=''):
         self.orig = text
-        if not self.parse(text):
+        if not self.parse(self.orig):
             raise ValueError
 
     def parse(self, text):
+        valid = self._parse(text)
+        if valid:
+            return True
+        m = self.env_re.match(text)
+        if not m:
+            return False
+        repo = os.environ.get('BORG_REPO')
+        if repo is None:
+            return False
+        valid = self._parse(repo)
+        if not valid:
+            return False
+        self.archive = m.group('archive')
+        return True
+
+    def _parse(self, text):
         m = self.ssh_re.match(text)
         if m:
             self.proto = m.group('proto')

+ 5 - 1
borg/locking.py

@@ -176,7 +176,11 @@ class LockRoster:
             json.dump(data, f)
 
     def remove(self):
-        os.unlink(self.path)
+        try:
+            os.unlink(self.path)
+        except OSError as e:
+            if e.errno != errno.ENOENT:
+                raise
 
     def get(self, key):
         roster = self.load()

+ 3 - 3
borg/repository.py

@@ -50,14 +50,14 @@ class Repository:
         """Object with key {} not found in repository {}."""
 
     def __init__(self, path, create=False, exclusive=False):
-        self.path = path
+        self.path = os.path.abspath(path)
         self.io = None
         self.lock = None
         self.index = None
         self._active_txn = False
         if create:
-            self.create(path)
-        self.open(path, exclusive)
+            self.create(self.path)
+        self.open(self.path, exclusive)
 
     def __del__(self):
         self.close()

+ 8 - 0
borg/testsuite/archiver.py

@@ -485,6 +485,14 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         mode = os.stat(self.repository_path).st_mode
         self.assertEqual(stat.S_IMODE(mode), 0o700)
 
+    def test_create_dry_run(self):
+        self.cmd('init', self.repository_location)
+        self.cmd('create', '--dry-run', self.repository_location + '::test', 'input')
+        # Make sure no archive has been created
+        repository = Repository(self.repository_path)
+        manifest, key = Manifest.load(repository)
+        self.assert_equal(len(manifest.archives), 0)
+
     def test_cmdline_compatibility(self):
         self.create_regular_file('file1', size=1024 * 80)
         self.cmd('init', self.repository_location)

+ 113 - 33
borg/testsuite/helpers.py

@@ -7,7 +7,7 @@ import msgpack
 
 from ..helpers import adjust_patterns, exclude_path, Location, format_timedelta, ExcludePattern, make_path_safe, \
     prune_within, prune_split, \
-    StableDict, int_to_bigint, bigint_to_int, parse_timestamp, CompressionSpec
+    StableDict, int_to_bigint, bigint_to_int, parse_timestamp, CompressionSpec, ChunkerParams
 from . import BaseTestCase
 
 
@@ -23,42 +23,115 @@ class BigIntTestCase(BaseTestCase):
         self.assert_equal(bigint_to_int(int_to_bigint(2**70)), 2**70)
 
 
-class LocationTestCase(BaseTestCase):
-
-    def test(self):
-        self.assert_equal(
-            repr(Location('ssh://user@host:1234/some/path::archive')),
-            "Location(proto='ssh', user='user', host='host', port=1234, path='/some/path', archive='archive')"
-        )
-        self.assert_equal(
-            repr(Location('file:///some/path::archive')),
-            "Location(proto='file', user=None, host=None, port=None, path='/some/path', archive='archive')"
-        )
-        self.assert_equal(
-            repr(Location('user@host:/some/path::archive')),
-            "Location(proto='ssh', user='user', host='host', port=None, path='/some/path', archive='archive')"
-        )
-        self.assert_equal(
-            repr(Location('path::archive')),
-            "Location(proto='file', user=None, host=None, port=None, path='path', archive='archive')"
-        )
-        self.assert_equal(
-            repr(Location('/some/absolute/path::archive')),
-            "Location(proto='file', user=None, host=None, port=None, path='/some/absolute/path', archive='archive')"
-        )
-        self.assert_equal(
-            repr(Location('some/relative/path::archive')),
-            "Location(proto='file', user=None, host=None, port=None, path='some/relative/path', archive='archive')"
-        )
-        self.assert_raises(ValueError, lambda: Location('ssh://localhost:22/path:archive'))
-
-    def test_canonical_path(self):
+class TestLocationWithoutEnv:
+    def test_ssh(self, monkeypatch):
+        monkeypatch.delenv('BORG_REPO', raising=False)
+        assert repr(Location('ssh://user@host:1234/some/path::archive')) == \
+               "Location(proto='ssh', user='user', host='host', port=1234, path='/some/path', archive='archive')"
+        assert repr(Location('ssh://user@host:1234/some/path')) == \
+               "Location(proto='ssh', user='user', host='host', port=1234, path='/some/path', archive=None)"
+
+    def test_file(self, monkeypatch):
+        monkeypatch.delenv('BORG_REPO', raising=False)
+        assert repr(Location('file:///some/path::archive')) == \
+               "Location(proto='file', user=None, host=None, port=None, path='/some/path', archive='archive')"
+        assert repr(Location('file:///some/path')) == \
+               "Location(proto='file', user=None, host=None, port=None, path='/some/path', archive=None)"
+
+    def test_scp(self, monkeypatch):
+        monkeypatch.delenv('BORG_REPO', raising=False)
+        assert repr(Location('user@host:/some/path::archive')) == \
+               "Location(proto='ssh', user='user', host='host', port=None, path='/some/path', archive='archive')"
+        assert repr(Location('user@host:/some/path')) == \
+               "Location(proto='ssh', user='user', host='host', port=None, path='/some/path', archive=None)"
+
+    def test_folder(self, monkeypatch):
+        monkeypatch.delenv('BORG_REPO', raising=False)
+        assert repr(Location('path::archive')) == \
+               "Location(proto='file', user=None, host=None, port=None, path='path', archive='archive')"
+        assert repr(Location('path')) == \
+               "Location(proto='file', user=None, host=None, port=None, path='path', archive=None)"
+
+    def test_abspath(self, monkeypatch):
+        monkeypatch.delenv('BORG_REPO', raising=False)
+        assert repr(Location('/some/absolute/path::archive')) == \
+               "Location(proto='file', user=None, host=None, port=None, path='/some/absolute/path', archive='archive')"
+        assert repr(Location('/some/absolute/path')) == \
+               "Location(proto='file', user=None, host=None, port=None, path='/some/absolute/path', archive=None)"
+
+    def test_relpath(self, monkeypatch):
+        monkeypatch.delenv('BORG_REPO', raising=False)
+        assert repr(Location('some/relative/path::archive')) == \
+               "Location(proto='file', user=None, host=None, port=None, path='some/relative/path', archive='archive')"
+        assert repr(Location('some/relative/path')) == \
+               "Location(proto='file', user=None, host=None, port=None, path='some/relative/path', archive=None)"
+
+    def test_underspecified(self, monkeypatch):
+        monkeypatch.delenv('BORG_REPO', raising=False)
+        with pytest.raises(ValueError):
+            Location('::archive')
+        with pytest.raises(ValueError):
+            Location('::')
+        with pytest.raises(ValueError):
+            Location()
+
+    def test_no_double_colon(self, monkeypatch):
+        monkeypatch.delenv('BORG_REPO', raising=False)
+        with pytest.raises(ValueError):
+            Location('ssh://localhost:22/path:archive')
+
+    def test_canonical_path(self, monkeypatch):
+        monkeypatch.delenv('BORG_REPO', raising=False)
         locations = ['some/path::archive', 'file://some/path::archive', 'host:some/path::archive',
                      'host:~user/some/path::archive', 'ssh://host/some/path::archive',
                      'ssh://user@host:1234/some/path::archive']
         for location in locations:
-            self.assert_equal(Location(location).canonical_path(),
-                              Location(Location(location).canonical_path()).canonical_path())
+            assert Location(location).canonical_path() == \
+                   Location(Location(location).canonical_path()).canonical_path()
+
+
+class TestLocationWithEnv:
+    def test_ssh(self, monkeypatch):
+        monkeypatch.setenv('BORG_REPO', 'ssh://user@host:1234/some/path')
+        assert repr(Location('::archive')) == \
+               "Location(proto='ssh', user='user', host='host', port=1234, path='/some/path', archive='archive')"
+        assert repr(Location()) == \
+               "Location(proto='ssh', user='user', host='host', port=1234, path='/some/path', archive=None)"
+
+    def test_file(self, monkeypatch):
+        monkeypatch.setenv('BORG_REPO', 'file:///some/path')
+        assert repr(Location('::archive')) == \
+               "Location(proto='file', user=None, host=None, port=None, path='/some/path', archive='archive')"
+        assert repr(Location()) == \
+               "Location(proto='file', user=None, host=None, port=None, path='/some/path', archive=None)"
+
+    def test_scp(self, monkeypatch):
+        monkeypatch.setenv('BORG_REPO', 'user@host:/some/path')
+        assert repr(Location('::archive')) == \
+               "Location(proto='ssh', user='user', host='host', port=None, path='/some/path', archive='archive')"
+        assert repr(Location()) == \
+               "Location(proto='ssh', user='user', host='host', port=None, path='/some/path', archive=None)"
+
+    def test_folder(self, monkeypatch):
+        monkeypatch.setenv('BORG_REPO', 'path')
+        assert repr(Location('::archive')) == \
+               "Location(proto='file', user=None, host=None, port=None, path='path', archive='archive')"
+        assert repr(Location()) == \
+               "Location(proto='file', user=None, host=None, port=None, path='path', archive=None)"
+
+    def test_abspath(self, monkeypatch):
+        monkeypatch.setenv('BORG_REPO', '/some/absolute/path')
+        assert repr(Location('::archive')) == \
+               "Location(proto='file', user=None, host=None, port=None, path='/some/absolute/path', archive='archive')"
+        assert repr(Location()) == \
+               "Location(proto='file', user=None, host=None, port=None, path='/some/absolute/path', archive=None)"
+
+    def test_relpath(self, monkeypatch):
+        monkeypatch.setenv('BORG_REPO', 'some/relative/path')
+        assert repr(Location('::archive')) == \
+               "Location(proto='file', user=None, host=None, port=None, path='some/relative/path', archive='archive')"
+        assert repr(Location()) == \
+               "Location(proto='file', user=None, host=None, port=None, path='some/relative/path', archive=None)"
 
 
 class FormatTimedeltaTestCase(BaseTestCase):
@@ -129,6 +202,13 @@ def test_compression_specs():
         CompressionSpec('invalid')
 
 
+def test_chunkerparams():
+    assert ChunkerParams('19,23,21,4095') == (19, 23, 21, 4095)
+    assert ChunkerParams('10,23,16,4095') == (10, 23, 16, 4095)
+    with pytest.raises(ValueError):
+        ChunkerParams('19,24,21,4095')
+
+
 class MakePathSafeTestCase(BaseTestCase):
 
     def test(self):

BIN
docs/_themes/local/static/favicon.ico


+ 4 - 5
docs/_themes/local/static/local.css_t

@@ -31,7 +31,7 @@ div.documentwrapper {
   float: right;
   width: 760px;
   padding: 0 20px 20px 20px;
-  color: #00aa00;
+  color: #00cc00;
   background-color: #000000;
   margin-bottom: 2em;
 }
@@ -48,7 +48,7 @@ div.sphinxsidebar {
 
 h1, h2, h3 {
   font-weight: normal;
-  color: #33dd33;
+  color: #33ff33;
 }
 
 h1 {
@@ -99,12 +99,12 @@ div.sphinxsidebar a:link, div.sphinxsidebar a:visited {
 }
 
 div.sphinxsidebar {
-  color: #00aa00;
+  color: #00cc00;
   background: 0000000;
 }
 
 div.sphinxsidebar input {
-  color: #00cc00;
+  color: #00ff00;
   background: 0000000;
   border: 1px solid #444444;
 }
@@ -171,4 +171,3 @@ div.seealso {
   border-radius: .4em;
   box-shadow: 2px 2px #dd6;
 }
-

+ 2 - 2
docs/conf.py

@@ -42,7 +42,7 @@ master_doc = 'index'
 
 # General information about the project.
 project = 'Borg - Deduplicating Archiver'
-copyright = '2010-2014, Jonas Borgström, 2015 The Borg Collective (see AUTHORS file)'
+copyright = '2010-2014 Jonas Borgström, 2015 The Borg Collective (see AUTHORS file)'
 
 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
@@ -116,7 +116,7 @@ html_theme_path = ['_themes']
 # The name of an image file (within the static path) to use as favicon of the
 # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
 # pixels large.
-#html_favicon = None
+html_favicon = 'favicon.ico'
 
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,

+ 3 - 2
docs/development.rst

@@ -26,7 +26,9 @@ Running the tests
 
 The tests are in the borg/testsuite package.
 
-To run them, you need to have fakeroot, tox and pytest installed.
+To run all the tests, you need to have fakeroot installed. If you do not have
+fakeroot, you still will be able to run most tests, just leave away the
+`fakeroot -u` from the given command lines.
 
 To run the test suite use the following command::
 
@@ -47,7 +49,6 @@ Some more advanced examples::
 
 Important notes:
 
-- Without fakeroot -u some tests will fail.
 - When using -- to give options to py.test, you MUST also give borg.testsuite[.module].
 
 Building the docs with Sphinx

+ 14 - 1
docs/faq.rst

@@ -86,6 +86,18 @@ If it crashes with a UnicodeError, what can I do?
 
         export LANG=en_US.UTF-8  # or similar, important is correct charset
 
+I can't extract non-ascii filenames by giving them on the commandline on OS X!?
+    This is due to different ways to represent some characters in unicode.
+    HFS+ likes the decomposed form while the commandline seems to be the composed
+    form usually. If you run into that, for now maybe just try:
+
+    - avoiding the non-ascii characters on the commandline by e.g. extracting
+      the parent directory (or even everything)
+    - try to enter the composed form on the commandline
+    - mount the repo using FUSE and use some file manager
+
+    See issue #143 on the issue tracker for more about this.
+
 If I want to run |project_name| on a ARM CPU older than ARM v6?
     You need to enable the alignment trap handler to fixup misaligned accesses::
     
@@ -97,7 +109,8 @@ Can |project_name| add redundancy to the backup data to deal with hardware malfu
     of low-level storage layout information and control which we do not have (and also can't
     get, even if we wanted).
 
-    So, if you need that, consider RAID1 or a filesystems that offers redundant storage.
+    So, if you need that, consider RAID1 or a filesystem that offers redundant storage
+    or just make 2 backups to different locations / different hardware.
 
 Can |project_name| verify data integrity of a backup archive?
     Yes, if you want to detect accidental data damage (like bit rot), use the ``check``

+ 124 - 67
docs/installation.rst

@@ -14,22 +14,39 @@ Installation
 
 General notes
 -------------
-Even though Python 3 is not the default Python version on many systems, it is
-usually available as an optional install.
+You need to do some platform specific preparation steps (to install libraries
+and tools) followed by the generic installation of |project_name| itself:
+
+Below, we describe different ways to install |project_name|.
+
+- **dist package** - easy and fast, needs a distribution and platform specific
+  binary package (for your Linux/*BSD/OS X/... distribution).
+- **wheel** - easy and fast, needs a platform specific borgbackup binary wheel,
+  which matches your platform [OS and CPU]).
+- **pypi** - installing a source package from pypi needs more installation steps
+  and will compile stuff - try this if there is no binary wheel that works for
+  you.
+- **git** - for developers and power users who want to have the latest code or
+  use revision control (each release is tagged).
+
+**Python 3**: Even though this is not the default Python version on many systems,
+it is usually available as an optional install.
 
 Virtualenv_ can be used to build and install |project_name| without affecting
 the system Python or requiring root access.
 
 Important:
-if you install into a virtual environment, you need to activate
+If you install into a virtual environment, you need to **activate**
 the virtual env first (``source borg-env/bin/activate``).
 Alternatively, directly run ``borg-env/bin/borg`` (or symlink that into some
 directory that is in your PATH so you can just run ``borg``).
+Using a virtual environment is optional, but recommended except for the most
+simple use cases.
 
 The llfuse_ python package is also required if you wish to mount an
 archive as a FUSE filesystem. Only FUSE >= 2.8.0 can support llfuse.
 
-You only need Cython to compile the .pyx files to the respective .c files
+You only need **Cython** to compile the .pyx files to the respective .c files
 when using |project_name| code from git. For |project_name| releases, the .c
 files will be bundled, so you won't need Cython to install a release.
 
@@ -42,17 +59,57 @@ Mac OS X: You may need to get a recent enough OpenSSL version from homebrew_.
 Mac OS X: You need OS X FUSE >= 3.0.
 
 
-Debian / Ubuntu installation (from git)
----------------------------------------
-Note: this uses latest, unreleased development code from git.
-While we try not to break master, there are no guarantees on anything.
+Installation (dist package)
+---------------------------
+Some Linux, BSD and OS X distributions might offer a ready-to-use
+`borgbackup` package (which can be easily installed in the usual way).
+
+As |project_name| is still relatively new, such a package might be not
+available for your system yet. Please ask package maintainers to build a
+package or, if you can package / submit it yourself, please help us with
+that!
+
+If a package is available, it might be interesting for you to check its version
+and compare that to our latest release and review the change log (see links on
+our web site).
+
+
+Debian Jessie / Ubuntu 14.04 preparations (wheel)
+-------------------------------------------------
+
+.. parsed-literal::
+
+    # Python stuff we need
+    apt-get install python3 python3-pip
 
-Some of the steps detailled below might be useful also for non-git installs.
+    # Libraries we need (fuse is optional)
+    apt-get install openssl libacl1 liblz4-1 fuse
+
+
+Installation (wheel)
+--------------------
+
+This uses the latest binary wheel release.
 
 .. parsed-literal::
 
-    # Python 3.x (>= 3.2) + Headers, Py Package Installer
-    apt-get install python3 python3-dev python3-pip
+    # Check https://github.com/borgbackup/borg/issues/147 for the correct
+    # platform-specific binary wheel, download and install it:
+
+    # system-wide installation, needs sudo/root permissions:
+    sudo pip install borgbackup.whl
+
+    # home directory installation, no sudo/root needed:
+    pip install --user borgbackup.whl
+
+
+Debian Jessie / Ubuntu 14.04 preparations (git/pypi)
+----------------------------------------------------
+
+.. parsed-literal::
+
+    # Python 3.x (>= 3.2) + Headers, Py Package Installer, VirtualEnv
+    apt-get install python3 python3-dev python3-pip python-virtualenv
 
     # we need OpenSSL + Headers for Crypto
     apt-get install libssl-dev openssl
@@ -75,35 +132,14 @@ Some of the steps detailled below might be useful also for non-git installs.
     # optional: for unit testing
     apt-get install fakeroot
 
-    # get |project_name| from github, install it
-    git clone |git_url|
-
-    apt-get install python-virtualenv
-    virtualenv --python=python3 borg-env
-    source borg-env/bin/activate   # always before using!
 
-    # install borg + dependencies into virtualenv
-    pip install cython  # compile .pyx -> .c
-    pip install tox pytest  # optional, for running unit tests
-    pip install sphinx  # optional, to build the docs
-    pip install llfuse  # optional, for FUSE support
-    cd borg
-    pip install -e .  # in-place editable mode
-
-    # optional: run all the tests, on all supported Python versions
-    fakeroot -u tox
-
-
-Korora / Fedora 21 installation (from git)
+Korora / Fedora 21 preparations (git/pypi)
 ------------------------------------------
-Note: this uses latest, unreleased development code from git.
-While we try not to break master, there are no guarantees on anything.
-
-Some of the steps detailled below might be useful also for non-git installs.
 
 .. parsed-literal::
-    # Python 3.x (>= 3.2) + Headers, Py Package Installer
-    sudo dnf install python3 python3-devel python3-pip
+
+    # Python 3.x (>= 3.2) + Headers, Py Package Installer, VirtualEnv
+    sudo dnf install python3 python3-devel python3-pip python3-virtualenv
 
     # we need OpenSSL + Headers for Crypto
     sudo dnf install openssl-devel openssl
@@ -112,7 +148,7 @@ Some of the steps detailled below might be useful also for non-git installs.
     sudo dnf install libacl-devel libacl
 
     # lz4 super fast compression support Headers + Library
-    sudo dnf install lz4
+    sudo dnf install lz4-devel
 
     # optional: FUSE support - to mount backup archives
     sudo dnf install fuse-devel fuse
@@ -120,27 +156,10 @@ Some of the steps detailled below might be useful also for non-git installs.
     # optional: for unit testing
     sudo dnf install fakeroot
 
-    # get |project_name| from github, install it
-    git clone |git_url|
-
-    dnf install python3-virtualenv
-    virtualenv --python=python3 borg-env
-    source borg-env/bin/activate   # always before using!
-
-    # install borg + dependencies into virtualenv
-    pip install cython  # compile .pyx -> .c
-    pip install tox pytest  # optional, for running unit tests
-    pip install sphinx  # optional, to build the docs
-    pip install llfuse  # optional, for FUSE support
-    cd borg
-    pip install -e .  # in-place editable mode
 
-    # optional: run all the tests, on all supported Python versions
-    fakeroot -u tox
+Cygwin preparations (git/pypi)
+------------------------------
 
-
-Cygwin (from git)
------------------
 Please note that running under cygwin is rather experimental, stuff has been
 tested with CygWin (x86-64) v2.1.0.
 
@@ -148,17 +167,12 @@ You'll need at least (use the cygwin installer to fetch/install these):
 
 ::
 
-    python3
-    python3-setuptools
-    python3-cython
-    binutils
-    gcc-core
-    git
-    libopenssl
+    python3 python3-setuptools
+    python3-cython  # not needed for releases
+    binutils gcc-core
+    libopenssl openssl-devel
     liblz4_1 liblz4-devel  # from cygwinports.org
-    make
-    openssh
-    openssl-devel
+    git make openssh
 
 You can then install ``pip`` and ``virtualenv``:
 
@@ -167,7 +181,7 @@ You can then install ``pip`` and ``virtualenv``:
     easy_install-3.4 pip
     pip install virtualenv
 
-And now continue as for Linux (see above).
+And now continue with the generic installation (see below).
 
 In case that creation of the virtual env fails, try deleting this file:
 
@@ -175,3 +189,46 @@ In case that creation of the virtual env fails, try deleting this file:
 
     /usr/lib/python3.4/__pycache__/platform.cpython-34.pyc
 
+
+Installation (pypi)
+-------------------
+
+This uses the latest (source package) release from PyPi.
+
+.. parsed-literal::
+
+    virtualenv --python=python3 borg-env
+    source borg-env/bin/activate   # always before using!
+
+    # install borg + dependencies into virtualenv
+    pip install llfuse  # optional, for FUSE support
+    pip install borgbackup
+
+Note: we install into a virtual environment here, but this is not a requirement.
+
+
+Installation (git)
+------------------
+
+This uses latest, unreleased development code from git.
+While we try not to break master, there are no guarantees on anything.
+
+.. parsed-literal::
+
+    # get |project_name| from github, install it
+    git clone |git_url|
+
+    virtualenv --python=python3 borg-env
+    source borg-env/bin/activate   # always before using!
+
+    # install borg + dependencies into virtualenv
+    pip install sphinx  # optional, to build the docs
+    pip install llfuse  # optional, for FUSE support
+    cd borg
+    pip install -r requirements.d/development.txt
+    pip install -e .  # in-place editable mode
+
+    # optional: run all the tests, on all supported Python versions
+    fakeroot -u tox
+
+Note: as a developer or power user, you always want to use a virtual environment.

+ 36 - 4
docs/internals.rst

@@ -8,7 +8,6 @@ This page documents the internal data structures and storage
 mechanisms of |project_name|. It is partly based on `mailing list
 discussion about internals`_ and also on static code analysis.
 
-It may not be exactly up to date with the current source code.
 
 Repository and Archives
 -----------------------
@@ -41,6 +40,32 @@ lock.roster and lock.exclusive/*
   used by the locking system to manage shared and exclusive locks
 
 
+Lock files
+----------
+
+|project_name| uses locks to get (exclusive or shared) access to the cache and
+the repository.
+
+The locking system is based on creating a directory `lock.exclusive` (for
+exclusive locks). Inside the lock directory, there is a file indication
+hostname, process id and thread id of the lock holder.
+
+There is also a json file `lock.roster` that keeps a directory of all shared
+and exclusive lockers.
+
+If the process can create the `lock.exclusive` directory for a resource, it has
+the lock for it. If creation fails (because the directory has already been
+created by some other process), lock acquisition fails.
+
+The cache lock is usually in `~/.cache/borg/REPOID/lock.*`.
+The repository lock is in `repository/lock.*`.
+
+In case you run into troubles with the locks, you can just delete the `lock.*`
+directory and file IF you first make sure that no |project_name| process is
+running on any machine that accesses this resource. Be very careful, the cache
+or repository might get damaged if multiple processes use it at the same time.
+
+
 Config file
 -----------
 
@@ -125,6 +150,9 @@ Each archive info contains:
 It is the last object stored, in the last segment, and is replaced
 each time.
 
+The Archive
+-----------
+
 The archive metadata does not contain the file items directly. Only
 references to other objects that contain that data. An archive is an
 object that contains:
@@ -137,6 +165,10 @@ object that contains:
 * username
 * time
 
+
+The Item
+--------
+
 Each item represents a file, directory or other fs item and is stored as an
 ``item`` dictionary that contains:
 
@@ -194,7 +226,7 @@ what files you have based on a specific set of chunk sizes).
 Indexes / Caches
 ----------------
 
-The files cache is stored in ``cache/files`` and is indexed on the
+The **files cache** is stored in ``cache/files`` and is indexed on the
 ``file path hash``. At backup time, it is used to quickly determine whether we
 need to chunk a given file (or whether it is unchanged and we already have all
 its pieces).
@@ -213,7 +245,7 @@ archives in different setups.
 The files cache is stored as a python associative array storing
 python objects, which generates a lot of overhead.
 
-The chunks cache is stored in ``cache/chunks`` and is indexed on the
+The **chunks cache** is stored in ``cache/chunks`` and is indexed on the
 ``chunk id_hash``. It is used to determine whether we already have a specific
 chunk, to count references to it and also for statistics.
 It contains:
@@ -222,7 +254,7 @@ It contains:
 * size
 * encrypted/compressed size
 
-The repository index is stored in ``repo/index.%d`` and is indexed on the
+The **repository index** is stored in ``repo/index.%d`` and is indexed on the
 ``chunk id_hash``. It is used to determine a chunk's location in the repository.
 It contains:
 

+ 0 - 130
docs/misc/create_compression.txt

@@ -1,130 +0,0 @@
-data compression
-================
-
-borg create --compression N repo::archive data
-
-Currently, borg only supports zlib compression. There are plans to expand this
-to other, faster or better compression algorithms in the future.
-
-N == 0 -> zlib level 0 == very quick, no compression
-N == 1 -> zlib level 1 == quick, low compression
-...
-N == 9 -> zlib level 9 == slow, high compression
-
-Measurements made on a Haswell Ultrabook, SSD storage, Linux.
-
-
-Example 1: lots of relatively small text files (linux kernel src)
------------------------------------------------------------------
-
-N == 1 does a good job here, it saves the additional time needed for
-compression because it needs to store less into storage (see N == 0).
-
-N == 6 is also quite ok, a little slower, a little less repo size.
-6 was the old default of borg.
-
-High compression levels only give a little more compression, but take a lot
-of cpu time.
-
-$ borg create --stats --compression 0
------------------------------------------------------------------------------- 
-Duration: 50.40 seconds
-Number of files: 72890
-
-                       Original size      Compressed size    Deduplicated size
-This archive:                1.17 GB              1.18 GB              1.01 GB
-
-                       Unique chunks         Total chunks
-Chunk index:                   70263                82309
------------------------------------------------------------------------------- 
-
-$ borg create --stats --compression 1
------------------------------------------------------------------------------- 
-Duration: 49.29 seconds
-Number of files: 72890
-
-                       Original size      Compressed size    Deduplicated size
-This archive:                1.17 GB            368.62 MB            295.22 MB
-
-                       Unique chunks         Total chunks
-Chunk index:                   70280                82326
-------------------------------------------------------------------------------
-
-$ borg create --stats --compression 5
------------------------------------------------------------------------------- 
-Duration: 59.99 seconds
-Number of files: 72890
-
-                       Original size      Compressed size    Deduplicated size
-This archive:                1.17 GB            331.70 MB            262.20 MB
-
-                       Unique chunks         Total chunks
-Chunk index:                   70290                82336
-------------------------------------------------------------------------------
-
-$ borg create --stats --compression 6
------------------------------------------------------------------------------- 
-Duration: 1 minutes 13.64 seconds
-Number of files: 72890
-
-                       Original size      Compressed size    Deduplicated size
-This archive:                1.17 GB            328.79 MB            259.56 MB
-
-                       Unique chunks         Total chunks
-Chunk index:                   70279                82325
-------------------------------------------------------------------------------
-
-$ borg create --stats --compression 9
-------------------------------------------------------------------------------
-Duration: 3 minutes 1.58 seconds
-Number of files: 72890
-
-                       Original size      Compressed size    Deduplicated size
-This archive:                1.17 GB            326.57 MB            257.57 MB
-
-                       Unique chunks         Total chunks
-Chunk index:                   70292                82338
-------------------------------------------------------------------------------
-
-
-Example 2: large VM disk file (sparse file)
--------------------------------------------
-
-The file's directory size is 80GB, but a lot of it is sparse (and reads as
-zeros).
-
-$ borg create --stats --compression 0
-------------------------------------------------------------------------------
-Duration: 13 minutes 48.47 seconds
-Number of files: 1
-
-                       Original size      Compressed size    Deduplicated size
-This archive:               80.54 GB             80.55 GB             10.87 GB
-
-                       Unique chunks         Total chunks
-Chunk index:                  147307               177109
-------------------------------------------------------------------------------
-
-$ borg create --stats --compression 1
-------------------------------------------------------------------------------
-Duration: 15 minutes 31.34 seconds
-Number of files: 1
-
-                       Original size      Compressed size    Deduplicated size
-This archive:               80.54 GB              6.68 GB              5.67 GB
-
-                       Unique chunks         Total chunks
-Chunk index:                  147309               177111
-------------------------------------------------------------------------------
-
-$ borg create --stats --compression 6
-------------------------------------------------------------------------------
-Duration: 18 minutes 57.54 seconds
-Number of files: 1
-
-                       Original size      Compressed size    Deduplicated size
-This archive:               80.54 GB              6.19 GB              5.44 GB
-
-                       Unique chunks         Total chunks
-Chunk index:                  147307               177109
-------------------------------------------------------------------------------

+ 19 - 18
docs/quickstart.rst

@@ -121,7 +121,7 @@ Repository encryption
 
 Repository encryption is enabled at repository creation time::
 
-    $ borg init --encryption=passphrase|keyfile PATH
+    $ borg init --encryption=repokey|keyfile PATH
 
 When repository encryption is enabled all data is encrypted using 256-bit AES_
 encryption and the integrity and authenticity is verified using `HMAC-SHA256`_.
@@ -130,28 +130,29 @@ All data is encrypted before being written to the repository. This means that
 an attacker who manages to compromise the host containing an encrypted
 archive will not be able to access any of the data.
 
-|project_name| supports two different methods to derive the AES and HMAC keys.
+|project_name| supports different methods to store the AES and HMAC keys.
 
-Passphrase based encryption
-    This method uses a user supplied passphrase to derive the keys using the
-    PBKDF2_ key derivation function. This method is convenient to use since
-    there is no key file to keep track of and secure as long as a *strong*
-    passphrase is used.
+``repokey`` mode
+    The key is stored inside the repository (in its "config" file).
+    Use this mode if you trust in your good passphrase giving you enough
+    protection.
 
-    .. Note::
-        For automated backups the passphrase can be specified using the
-        `BORG_PASSPHRASE` environment variable.
+``keyfile`` mode
+    The key is stored on your local disk (in ``~/.borg/keys/``).
+    Use this mode if you want "passphrase and having-the-key" security.
 
-Key file based encryption
-    This method generates random keys at repository initialization time that
-    are stored in a password protected file in the ``~/.borg/keys/`` directory.
-    The key file is a printable text file. This method is secure and suitable
-    for automated backups.
+In both modes, the key is stored in encrypted form and can be only decrypted
+by providing the correct passphrase.
 
-    .. Note::
-        The repository data is totally inaccessible without the key file
-        so it must be kept **safe**.
+For automated backups the passphrase can be specified using the
+`BORG_PASSPHRASE` environment variable.
 
+**The repository data is totally inaccessible without the key:**
+    Make a backup copy of the key file (``keyfile`` mode) or repo config
+    file (``repokey`` mode) and keep it at a safe place, so you still have
+    the key in case it gets corrupted or lost.
+    The backup that is encrypted with that key won't help you with that,
+    of course.
 
 .. _remote_repos:
 

+ 24 - 4
docs/support.rst

@@ -4,15 +4,15 @@
 Support
 =======
 
-Please first read the docs and the FAQ section in the docs, a lot of stuff is
-documented / explained there.
+Please first read the docs and existing issue tracker issues and mailing
+list posts, a lot of stuff is already documented / explained / discussed /
+filed there.
 
 Issue Tracker
 -------------
 
 If you've found a bug or have a concrete feature request, please create a new
-ticket on the project's `issue tracker`_ (after checking whether someone else
-already has reported the same thing).
+ticket on the project's `issue tracker`_.
 
 For more general questions or discussions, IRC or mailing list are preferred.
 
@@ -35,3 +35,23 @@ to the confirmation mail.
 
 To unsubscribe, send an email to borgbackup-unsubscribe@librelist.com and reply
 to the confirmation mail.
+
+Bounties and Fundraisers
+------------------------
+
+We use `BountySource <https://www.bountysource.com/teams/borgbackup>`_ to allow
+monetary contributions to the project and the developers, who push it forward.
+
+There, you can give general funds to the borgbackup members (the developers will
+then spend the funds as they deem fit). If you do not have some specific bounty
+(see below), you can use this as a general way to say "Thank You!" and support
+the software / project you like.
+
+If you want to encourage developers to fix some specific issue or implement some
+specific feature suggestion, you can post a new bounty or back an existing one
+(they always refer to an issue in our `issue tracker`_).
+
+As a developer, you can become a Bounty Hunter and win bounties (earn money) by
+contributing to |project_name|, a free and open source software project.
+
+We might also use BountySource to fund raise for some bigger goals.

+ 75 - 43
docs/usage.rst

@@ -41,26 +41,34 @@ Environment Variables
 
 |project_name| uses some environment variables for automation:
 
-::
-
-    Specifying a passphrase:
-        BORG_PASSPHRASE : When set, use the value to answer the passphrase question for encrypted repositories.
-
-    Some "yes" sayers (if set, they automatically confirm that you really want to do X even if there is that warning):
-        BORG_UNKNOWN_UNENCRYPTED_REPO_ACCESS_IS_OK : For "Warning: Attempting to access a previously unknown unencrypted repository"
-        BORG_RELOCATED_REPO_ACCESS_IS_OK : For "Warning: The repository at location ... was previously located at ..."
-        BORG_CHECK_I_KNOW_WHAT_I_AM_DOING : For "Warning: 'check --repair' is an experimental feature that might result in data loss."
-
-    Directories:
-        BORG_KEYS_DIR : Default to '~/.borg/keys'. This directory contains keys for encrypted repositories.
-        BORG_CACHE_DIR : Default to '~/.cache/borg'. This directory contains the local cache and might need a lot
-                         of space for dealing with big repositories).
-
-    Building:
-        BORG_OPENSSL_PREFIX : Adds given OpenSSL header file directory to the default locations (setup.py).
-
-    General:
-        TMPDIR : where temporary files are stored (might need a lot of temporary space for some operations)
+General:
+    BORG_REPO
+        When set, use the value to give the default repository location. If a command needs an archive
+        parameter, you can abbreviate as `::archive`. If a command needs a repository parameter, you
+        can either leave it away or abbreviate as `::`, if a positional parameter is required.
+    BORG_PASSPHRASE
+        When set, use the value to answer the passphrase question for encrypted repositories.
+    TMPDIR
+        where temporary files are stored (might need a lot of temporary space for some operations)
+
+Some "yes" sayers (if set, they automatically confirm that you really want to do X even if there is that warning):
+    BORG_UNKNOWN_UNENCRYPTED_REPO_ACCESS_IS_OK
+        For "Warning: Attempting to access a previously unknown unencrypted repository"
+    BORG_RELOCATED_REPO_ACCESS_IS_OK
+        For "Warning: The repository at location ... was previously located at ..."
+    BORG_CHECK_I_KNOW_WHAT_I_AM_DOING
+        For "Warning: 'check --repair' is an experimental feature that might result in data loss."
+
+Directories:
+    BORG_KEYS_DIR
+        Default to '~/.borg/keys'. This directory contains keys for encrypted repositories.
+    BORG_CACHE_DIR
+        Default to '~/.cache/borg'. This directory contains the local cache and might need a lot
+        of space for dealing with big repositories).
+
+Building:
+    BORG_OPENSSL_PREFIX
+        Adds given OpenSSL header file directory to the default locations (setup.py).
 
 
 Please note:
@@ -75,29 +83,43 @@ Resource Usage
 
 |project_name| might use a lot of resources depending on the size of the data set it is dealing with.
 
-CPU: it won't go beyond 100% of 1 core as the code is currently single-threaded.
-     Especially higher zlib and lzma compression levels use significant amounts of CPU cycles.
-
-Memory (RAM): the chunks index and the files index are read into memory for performance reasons.
-              compression, esp. lzma compression with high levels might need substantial amounts
-              of memory.
-
-Temporary files: reading data and metadata from a FUSE mounted repository will consume about the same space as the
-                 deduplicated chunks used to represent them in the repository.
-
-Cache files: chunks index and files index (plus a compressed collection of single-archive chunk indexes).
-
-Chunks index: proportional to the amount of data chunks in your repo. lots of small chunks in your repo implies a big
-              chunks index. you may need to tweak the chunker params (see create options) if you have a lot of data and
-              you want to keep the chunks index at some reasonable size.
-
-Files index: proportional to the amount of files in your last backup. can be switched off (see create options), but
-             next backup will be much slower if you do.
-
-Network: if your repository is remote, all deduplicated (and optionally compressed/encrypted) data of course has to go
-         over the connection (ssh: repo url). if you use a locally mounted network filesystem, additionally some copy
-         operations used for transaction support also go over the connection. if you backup multiple sources to one
-         target repository, additional traffic happens for cache resynchronization.
+CPU:
+    It won't go beyond 100% of 1 core as the code is currently single-threaded.
+    Especially higher zlib and lzma compression levels use significant amounts
+    of CPU cycles.
+
+Memory (RAM):
+    The chunks index and the files index are read into memory for performance
+    reasons.
+    Compression, esp. lzma compression with high levels might need substantial
+    amounts of memory.
+
+Temporary files:
+    Reading data and metadata from a FUSE mounted repository will consume about
+    the same space as the deduplicated chunks used to represent them in the
+    repository.
+
+Cache files:
+    Contains the chunks index and files index (plus a compressed collection of
+    single-archive chunk indexes).
+
+Chunks index:
+    Proportional to the amount of data chunks in your repo. Lots of small chunks
+    in your repo imply a big chunks index. You may need to tweak the chunker
+    params (see create options) if you have a lot of data and you want to keep
+    the chunks index at some reasonable size.
+
+Files index:
+    Proportional to the amount of files in your last backup. Can be switched
+    off (see create options), but next backup will be much slower if you do.
+
+Network:
+    If your repository is remote, all deduplicated (and optionally compressed/
+    encrypted) data of course has to go over the connection (ssh: repo url).
+    If you use a locally mounted network filesystem, additionally some copy
+    operations used for transaction support also go over the connection. If
+    you backup multiple sources to one target repository, additional traffic
+    happens for cache resynchronization.
 
 In case you are interested in more details, please read the internals documentation.
 
@@ -190,6 +212,11 @@ Examples
     # Even slower, even higher compression (N = 0..9)
     $ borg create --compression lzma,N /mnt/backup::repo ~
 
+    # Backup some LV snapshots (you have to create the snapshots before this
+    # and remove them afterwards). We also backup the output of lvdisplay so
+    # we can see the LV sizes at restore time. See also "borg extract" examples.
+    $ lvdisplay > lvdisplay.txt
+    $ borg create --read-special /mnt/backup::repo lvdisplay.txt /dev/vg0/*-snapshot
 
 .. include:: usage/extract.rst.inc
 
@@ -209,6 +236,11 @@ Examples
     # Extract the "src" directory but exclude object files
     $ borg extract /mnt/backup::my-files home/USERNAME/src --exclude '*.o'
 
+    # Restore LV snapshots (the target LVs /dev/vg0/* of correct size have
+    # to be already available and will be overwritten by this command!)
+    $ borg extract --stdout /mnt/backup::repo dev/vg0/root-snapshot > /dev/vg0/root
+    $ borg extract --stdout /mnt/backup::repo dev/vg0/home-snapshot > /dev/vg0/home
+
 Note: currently, extract always writes into the current working directory ("."),
       so make sure you ``cd`` to the right place before calling ``borg extract``.
 

+ 11 - 15
setup.py

@@ -3,14 +3,6 @@ import os
 import sys
 from glob import glob
 
-import versioneer
-versioneer.VCS = 'git'
-versioneer.style = 'pep440'
-versioneer.versionfile_source = 'borg/_version.py'
-versioneer.versionfile_build = 'borg/_version.py'
-versioneer.tag_prefix = ''
-versioneer.parentdir_prefix = 'borgbackup-'  # dirname like 'myproject-1.2.0'
-
 min_python = (3, 2)
 if sys.version_info < min_python:
     print("Borg requires Python %d.%d or later" % min_python)
@@ -18,6 +10,8 @@ if sys.version_info < min_python:
 
 
 from setuptools import setup, Extension
+from setuptools.command.sdist import sdist
+
 
 compress_source = 'borg/compress.pyx'
 crypto_source = 'borg/crypto.pyx'
@@ -31,11 +25,11 @@ try:
     from Cython.Distutils import build_ext
     import Cython.Compiler.Main as cython_compiler
 
-    class Sdist(versioneer.cmd_sdist):
+    class Sdist(sdist):
         def __init__(self, *args, **kwargs):
             for src in glob('borg/*.pyx'):
                 cython_compiler.compile(src, cython_compiler.default_options)
-            versioneer.cmd_sdist.__init__(self, *args, **kwargs)
+            super().__init__(*args, **kwargs)
 
         def make_distribution(self):
             self.filelist.extend([
@@ -50,7 +44,7 @@ try:
             super().make_distribution()
 
 except ImportError:
-    class Sdist(versioneer.cmd_sdist):
+    class Sdist(sdist):
         def __init__(self, *args, **kwargs):
             raise Exception('Cython is required to run sdist')
 
@@ -90,8 +84,7 @@ library_dirs = [os.path.join(ssl_prefix, 'lib')]
 with open('README.rst', 'r') as fd:
     long_description = fd.read()
 
-cmdclass = versioneer.get_cmdclass()
-cmdclass.update({'build_ext': build_ext, 'sdist': Sdist})
+cmdclass = {'build_ext': build_ext, 'sdist': Sdist}
 
 ext_modules = [
     Extension('borg.compress', [compress_source], libraries=['lz4']),
@@ -108,7 +101,9 @@ elif sys.platform == 'darwin':
 
 setup(
     name='borgbackup',
-    version=versioneer.get_version(),
+    use_scm_version={
+        'write_to': 'borg/_version.py',
+    },
     author='The Borg Collective (see AUTHORS file)',
     author_email='borgbackup@librelist.com',
     url='https://borgbackup.github.io/',
@@ -140,7 +135,8 @@ setup(
     },
     cmdclass=cmdclass,
     ext_modules=ext_modules,
+    setup_requires=['setuptools_scm>=1.7'],
     # msgpack pure python data corruption was fixed in 0.4.6.
     # Also, we might use some rather recent API features.
-    install_requires=['msgpack-python>=0.4.6']
+    install_requires=['msgpack-python>=0.4.6'],
 )

+ 0 - 1046
versioneer.py

@@ -1,1046 +0,0 @@
-
-# Version: 0.14
-
-"""
-The Versioneer
-==============
-
-* like a rocketeer, but for versions!
-* https://github.com/warner/python-versioneer
-* Brian Warner
-* License: Public Domain
-* Compatible With: python2.6, 2.7, 3.2, 3.3, 3.4, and pypy
-* [![Latest Version]
-(https://pypip.in/version/versioneer/badge.svg?style=flat)
-](https://pypi.python.org/pypi/versioneer/)
-* [![Build Status]
-(https://travis-ci.org/warner/python-versioneer.png?branch=master)
-](https://travis-ci.org/warner/python-versioneer)
-
-This is a tool for managing a recorded version number in distutils-based
-python projects. The goal is to remove the tedious and error-prone "update
-the embedded version string" step from your release process. Making a new
-release should be as easy as recording a new tag in your version-control
-system, and maybe making new tarballs.
-
-
-## Quick Install
-
-* `pip install versioneer` to somewhere to your $PATH
-* run `versioneer-installer` in your source tree: this installs `versioneer.py`
-* follow the instructions below (also in the `versioneer.py` docstring)
-
-## Version Identifiers
-
-Source trees come from a variety of places:
-
-* a version-control system checkout (mostly used by developers)
-* a nightly tarball, produced by build automation
-* a snapshot tarball, produced by a web-based VCS browser, like github's
-  "tarball from tag" feature
-* a release tarball, produced by "setup.py sdist", distributed through PyPI
-
-Within each source tree, the version identifier (either a string or a number,
-this tool is format-agnostic) can come from a variety of places:
-
-* ask the VCS tool itself, e.g. "git describe" (for checkouts), which knows
-  about recent "tags" and an absolute revision-id
-* the name of the directory into which the tarball was unpacked
-* an expanded VCS keyword ($Id$, etc)
-* a `_version.py` created by some earlier build step
-
-For released software, the version identifier is closely related to a VCS
-tag. Some projects use tag names that include more than just the version
-string (e.g. "myproject-1.2" instead of just "1.2"), in which case the tool
-needs to strip the tag prefix to extract the version identifier. For
-unreleased software (between tags), the version identifier should provide
-enough information to help developers recreate the same tree, while also
-giving them an idea of roughly how old the tree is (after version 1.2, before
-version 1.3). Many VCS systems can report a description that captures this,
-for example 'git describe --tags --dirty --always' reports things like
-"0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the
-0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has
-uncommitted changes.
-
-The version identifier is used for multiple purposes:
-
-* to allow the module to self-identify its version: `myproject.__version__`
-* to choose a name and prefix for a 'setup.py sdist' tarball
-
-## Theory of Operation
-
-Versioneer works by adding a special `_version.py` file into your source
-tree, where your `__init__.py` can import it. This `_version.py` knows how to
-dynamically ask the VCS tool for version information at import time. However,
-when you use "setup.py build" or "setup.py sdist", `_version.py` in the new
-copy is replaced by a small static file that contains just the generated
-version data.
-
-`_version.py` also contains `$Revision$` markers, and the installation
-process marks `_version.py` to have this marker rewritten with a tag name
-during the "git archive" command. As a result, generated tarballs will
-contain enough information to get the proper version.
-
-
-## Installation
-
-First, decide on values for the following configuration variables:
-
-* `VCS`: the version control system you use. Currently accepts "git".
-
-* `versionfile_source`:
-
-  A project-relative pathname into which the generated version strings should
-  be written. This is usually a `_version.py` next to your project's main
-  `__init__.py` file, so it can be imported at runtime. If your project uses
-  `src/myproject/__init__.py`, this should be `src/myproject/_version.py`.
-  This file should be checked in to your VCS as usual: the copy created below
-  by `setup.py versioneer` will include code that parses expanded VCS
-  keywords in generated tarballs. The 'build' and 'sdist' commands will
-  replace it with a copy that has just the calculated version string.
-
-  This must be set even if your project does not have any modules (and will
-  therefore never import `_version.py`), since "setup.py sdist" -based trees
-  still need somewhere to record the pre-calculated version strings. Anywhere
-  in the source tree should do. If there is a `__init__.py` next to your
-  `_version.py`, the `setup.py versioneer` command (described below) will
-  append some `__version__`-setting assignments, if they aren't already
-  present.
-
-* `versionfile_build`:
-
-  Like `versionfile_source`, but relative to the build directory instead of
-  the source directory. These will differ when your setup.py uses
-  'package_dir='. If you have `package_dir={'myproject': 'src/myproject'}`,
-  then you will probably have `versionfile_build='myproject/_version.py'` and
-  `versionfile_source='src/myproject/_version.py'`.
-
-  If this is set to None, then `setup.py build` will not attempt to rewrite
-  any `_version.py` in the built tree. If your project does not have any
-  libraries (e.g. if it only builds a script), then you should use
-  `versionfile_build = None` and override `distutils.command.build_scripts`
-  to explicitly insert a copy of `versioneer.get_version()` into your
-  generated script.
-
-* `tag_prefix`:
-
-  a string, like 'PROJECTNAME-', which appears at the start of all VCS tags.
-  If your tags look like 'myproject-1.2.0', then you should use
-  tag_prefix='myproject-'. If you use unprefixed tags like '1.2.0', this
-  should be an empty string.
-
-* `parentdir_prefix`:
-
-  a string, frequently the same as tag_prefix, which appears at the start of
-  all unpacked tarball filenames. If your tarball unpacks into
-  'myproject-1.2.0', this should be 'myproject-'.
-
-This tool provides one script, named `versioneer-installer`. That script does
-one thing: write a copy of `versioneer.py` into the current directory.
-
-To versioneer-enable your project:
-
-* 1: Run `versioneer-installer` to copy `versioneer.py` into the top of your
-  source tree.
-
-* 2: add the following lines to the top of your `setup.py`, with the
-  configuration values you decided earlier:
-
-  ````
-  import versioneer
-  versioneer.VCS = 'git'
-  versioneer.versionfile_source = 'src/myproject/_version.py'
-  versioneer.versionfile_build = 'myproject/_version.py'
-  versioneer.tag_prefix = '' # tags are like 1.2.0
-  versioneer.parentdir_prefix = 'myproject-' # dirname like 'myproject-1.2.0'
-  ````
-
-* 3: add the following arguments to the setup() call in your setup.py:
-
-        version=versioneer.get_version(),
-        cmdclass=versioneer.get_cmdclass(),
-
-* 4: now run `setup.py versioneer`, which will create `_version.py`, and will
-  modify your `__init__.py` (if one exists next to `_version.py`) to define
-  `__version__` (by calling a function from `_version.py`). It will also
-  modify your `MANIFEST.in` to include both `versioneer.py` and the generated
-  `_version.py` in sdist tarballs.
-
-* 5: commit these changes to your VCS. To make sure you won't forget,
-  `setup.py versioneer` will mark everything it touched for addition.
-
-## Post-Installation Usage
-
-Once established, all uses of your tree from a VCS checkout should get the
-current version string. All generated tarballs should include an embedded
-version string (so users who unpack them will not need a VCS tool installed).
-
-If you distribute your project through PyPI, then the release process should
-boil down to two steps:
-
-* 1: git tag 1.0
-* 2: python setup.py register sdist upload
-
-If you distribute it through github (i.e. users use github to generate
-tarballs with `git archive`), the process is:
-
-* 1: git tag 1.0
-* 2: git push; git push --tags
-
-Currently, all version strings must be based upon a tag. Versioneer will
-report "unknown" until your tree has at least one tag in its history. This
-restriction will be fixed eventually (see issue #12).
-
-## Version-String Flavors
-
-Code which uses Versioneer can learn about its version string at runtime by
-importing `_version` from your main `__init__.py` file and running the
-`get_versions()` function. From the "outside" (e.g. in `setup.py`), you can
-import the top-level `versioneer.py` and run `get_versions()`.
-
-Both functions return a dictionary with different keys for different flavors
-of the version string:
-
-* `['version']`: A condensed PEP440-compliant string, equal to the
-  un-prefixed tag name for actual releases, and containing an additional
-  "local version" section with more detail for in-between builds. For Git,
-  this is TAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe
-  --tags --dirty --always`. For example "0.11+2.g1076c97.dirty" indicates
-  that the tree is like the "1076c97" commit but has uncommitted changes
-  (".dirty"), and that this commit is two revisions ("+2") beyond the "0.11"
-  tag. For released software (exactly equal to a known tag), the identifier
-  will only contain the stripped tag, e.g. "0.11".
-
-* `['full']`: detailed revision identifier. For Git, this is the full SHA1
-  commit id, followed by ".dirty" if the tree contains uncommitted changes,
-  e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac.dirty".
-
-Some variants are more useful than others. Including `full` in a bug report
-should allow developers to reconstruct the exact code being tested (or
-indicate the presence of local changes that should be shared with the
-developers). `version` is suitable for display in an "about" box or a CLI
-`--version` output: it can be easily compared against release notes and lists
-of bugs fixed in various releases.
-
-The `setup.py versioneer` command adds the following text to your
-`__init__.py` to place a basic version in `YOURPROJECT.__version__`:
-
-    from ._version import get_versions
-    __version__ = get_versions()['version']
-    del get_versions
-
-## Updating Versioneer
-
-To upgrade your project to a new release of Versioneer, do the following:
-
-* install the new Versioneer (`pip install -U versioneer` or equivalent)
-* re-run `versioneer-installer` in your source tree to replace your copy of
-  `versioneer.py`
-* edit `setup.py`, if necessary, to include any new configuration settings
-  indicated by the release notes
-* re-run `setup.py versioneer` to replace `SRC/_version.py`
-* commit any changed files
-
-### Upgrading from 0.10 to 0.11
-
-You must add a `versioneer.VCS = "git"` to your `setup.py` before re-running
-`setup.py versioneer`. This will enable the use of additional version-control
-systems (SVN, etc) in the future.
-
-### Upgrading from 0.11 to 0.12
-
-Nothing special.
-
-## Upgrading to 0.14
-
-0.14 changes the format of the version string. 0.13 and earlier used
-hyphen-separated strings like "0.11-2-g1076c97-dirty". 0.14 and beyond use a
-plus-separated "local version" section strings, with dot-separated
-components, like "0.11+2.g1076c97". PEP440-strict tools did not like the old
-format, but should be ok with the new one.
-
-## Future Directions
-
-This tool is designed to make it easily extended to other version-control
-systems: all VCS-specific components are in separate directories like
-src/git/ . The top-level `versioneer.py` script is assembled from these
-components by running make-versioneer.py . In the future, make-versioneer.py
-will take a VCS name as an argument, and will construct a version of
-`versioneer.py` that is specific to the given VCS. It might also take the
-configuration arguments that are currently provided manually during
-installation by editing setup.py . Alternatively, it might go the other
-direction and include code from all supported VCS systems, reducing the
-number of intermediate scripts.
-
-
-## License
-
-To make Versioneer easier to embed, all its code is hereby released into the
-public domain. The `_version.py` that it creates is also in the public
-domain.
-
-"""
-
-import errno
-import os
-import re
-import subprocess
-import sys
-from distutils.command.build import build as _build
-from distutils.command.sdist import sdist as _sdist
-from distutils.core import Command
-
-# these configuration settings will be overridden by setup.py after it
-# imports us
-versionfile_source = None
-versionfile_build = None
-tag_prefix = None
-parentdir_prefix = None
-VCS = None
-
-# these dictionaries contain VCS-specific tools
-LONG_VERSION_PY = {}
-
-
-def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False):
-    assert isinstance(commands, list)
-    p = None
-    for c in commands:
-        try:
-            # remember shell=False, so use git.cmd on windows, not just git
-            p = subprocess.Popen([c] + args, cwd=cwd, stdout=subprocess.PIPE,
-                                 stderr=(subprocess.PIPE if hide_stderr
-                                         else None))
-            break
-        except EnvironmentError:
-            e = sys.exc_info()[1]
-            if e.errno == errno.ENOENT:
-                continue
-            if verbose:
-                print("unable to run %s" % args[0])
-                print(e)
-            return None
-    else:
-        if verbose:
-            print("unable to find command, tried %s" % (commands,))
-        return None
-    stdout = p.communicate()[0].strip()
-    if sys.version_info[0] >= 3:
-        stdout = stdout.decode()
-    if p.returncode != 0:
-        if verbose:
-            print("unable to run %s (error)" % args[0])
-        return None
-    return stdout
-LONG_VERSION_PY['git'] = '''
-# This file helps to compute a version number in source trees obtained from
-# git-archive tarball (such as those provided by githubs download-from-tag
-# feature). Distribution tarballs (built by setup.py sdist) and build
-# directories (produced by setup.py build) will contain a much shorter file
-# that just contains the computed version number.
-
-# This file is released into the public domain. Generated by
-# versioneer-0.14 (https://github.com/warner/python-versioneer)
-
-import errno
-import os
-import re
-import subprocess
-import sys
-
-# these strings will be replaced by git during git-archive
-git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s"
-git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s"
-
-# these strings are filled in when 'setup.py versioneer' creates _version.py
-tag_prefix = "%(TAG_PREFIX)s"
-parentdir_prefix = "%(PARENTDIR_PREFIX)s"
-versionfile_source = "%(VERSIONFILE_SOURCE)s"
-
-
-def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False):
-    assert isinstance(commands, list)
-    p = None
-    for c in commands:
-        try:
-            # remember shell=False, so use git.cmd on windows, not just git
-            p = subprocess.Popen([c] + args, cwd=cwd, stdout=subprocess.PIPE,
-                                 stderr=(subprocess.PIPE if hide_stderr
-                                         else None))
-            break
-        except EnvironmentError:
-            e = sys.exc_info()[1]
-            if e.errno == errno.ENOENT:
-                continue
-            if verbose:
-                print("unable to run %%s" %% args[0])
-                print(e)
-            return None
-    else:
-        if verbose:
-            print("unable to find command, tried %%s" %% (commands,))
-        return None
-    stdout = p.communicate()[0].strip()
-    if sys.version_info[0] >= 3:
-        stdout = stdout.decode()
-    if p.returncode != 0:
-        if verbose:
-            print("unable to run %%s (error)" %% args[0])
-        return None
-    return stdout
-
-
-def versions_from_parentdir(parentdir_prefix, root, verbose=False):
-    # Source tarballs conventionally unpack into a directory that includes
-    # both the project name and a version string.
-    dirname = os.path.basename(root)
-    if not dirname.startswith(parentdir_prefix):
-        if verbose:
-            print("guessing rootdir is '%%s', but '%%s' doesn't start with "
-                  "prefix '%%s'" %% (root, dirname, parentdir_prefix))
-        return None
-    return {"version": dirname[len(parentdir_prefix):], "full": ""}
-
-
-def git_get_keywords(versionfile_abs):
-    # the code embedded in _version.py can just fetch the value of these
-    # keywords. When used from setup.py, we don't want to import _version.py,
-    # so we do it with a regexp instead. This function is not used from
-    # _version.py.
-    keywords = {}
-    try:
-        f = open(versionfile_abs, "r")
-        for line in f.readlines():
-            if line.strip().startswith("git_refnames ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    keywords["refnames"] = mo.group(1)
-            if line.strip().startswith("git_full ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    keywords["full"] = mo.group(1)
-        f.close()
-    except EnvironmentError:
-        pass
-    return keywords
-
-
-def git_versions_from_keywords(keywords, tag_prefix, verbose=False):
-    if not keywords:
-        return {}  # keyword-finding function failed to find keywords
-    refnames = keywords["refnames"].strip()
-    if refnames.startswith("$Format"):
-        if verbose:
-            print("keywords are unexpanded, not using")
-        return {}  # unexpanded, so not in an unpacked git-archive tarball
-    refs = set([r.strip() for r in refnames.strip("()").split(",")])
-    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
-    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
-    TAG = "tag: "
-    tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
-    if not tags:
-        # Either we're using git < 1.8.3, or there really are no tags. We use
-        # a heuristic: assume all version tags have a digit. The old git %%d
-        # expansion behaves like git log --decorate=short and strips out the
-        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
-        # between branches and tags. By ignoring refnames without digits, we
-        # filter out many common branch names like "release" and
-        # "stabilization", as well as "HEAD" and "master".
-        tags = set([r for r in refs if re.search(r'\d', r)])
-        if verbose:
-            print("discarding '%%s', no digits" %% ",".join(refs-tags))
-    if verbose:
-        print("likely tags: %%s" %% ",".join(sorted(tags)))
-    for ref in sorted(tags):
-        # sorting will prefer e.g. "2.0" over "2.0rc1"
-        if ref.startswith(tag_prefix):
-            r = ref[len(tag_prefix):]
-            if verbose:
-                print("picking %%s" %% r)
-            return {"version": r,
-                    "full": keywords["full"].strip()}
-    # no suitable tags, so version is "0+unknown", but full hex is still there
-    if verbose:
-        print("no suitable tags, using unknown + full revision id")
-    return {"version": "0+unknown",
-            "full": keywords["full"].strip()}
-
-
-def git_parse_vcs_describe(git_describe, tag_prefix, verbose=False):
-    # TAG-NUM-gHEX[-dirty] or HEX[-dirty] . TAG might have hyphens.
-
-    # dirty
-    dirty = git_describe.endswith("-dirty")
-    if dirty:
-        git_describe = git_describe[:git_describe.rindex("-dirty")]
-    dirty_suffix = ".dirty" if dirty else ""
-
-    # now we have TAG-NUM-gHEX or HEX
-
-    if "-" not in git_describe:  # just HEX
-        return "0+untagged.g"+git_describe+dirty_suffix, dirty
-
-    # just TAG-NUM-gHEX
-    mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
-    if not mo:
-        # unparseable. Maybe git-describe is misbehaving?
-        return "0+unparseable"+dirty_suffix, dirty
-
-    # tag
-    full_tag = mo.group(1)
-    if not full_tag.startswith(tag_prefix):
-        if verbose:
-            fmt = "tag '%%s' doesn't start with prefix '%%s'"
-            print(fmt %% (full_tag, tag_prefix))
-        return None, dirty
-    tag = full_tag[len(tag_prefix):]
-
-    # distance: number of commits since tag
-    distance = int(mo.group(2))
-
-    # commit: short hex revision ID
-    commit = mo.group(3)
-
-    # now build up version string, with post-release "local version
-    # identifier". Our goal: TAG[+NUM.gHEX[.dirty]] . Note that if you get a
-    # tagged build and then dirty it, you'll get TAG+0.gHEX.dirty . So you
-    # can always test version.endswith(".dirty").
-    version = tag
-    if distance or dirty:
-        version += "+%%d.g%%s" %% (distance, commit) + dirty_suffix
-
-    return version, dirty
-
-
-def git_versions_from_vcs(tag_prefix, root, verbose=False):
-    # this runs 'git' from the root of the source tree. This only gets called
-    # if the git-archive 'subst' keywords were *not* expanded, and
-    # _version.py hasn't already been rewritten with a short version string,
-    # meaning we're inside a checked out source tree.
-
-    if not os.path.exists(os.path.join(root, ".git")):
-        if verbose:
-            print("no .git in %%s" %% root)
-        return {}  # get_versions() will try next method
-
-    GITS = ["git"]
-    if sys.platform == "win32":
-        GITS = ["git.cmd", "git.exe"]
-    # if there is a tag, this yields TAG-NUM-gHEX[-dirty]
-    # if there are no tags, this yields HEX[-dirty] (no NUM)
-    stdout = run_command(GITS, ["describe", "--tags", "--dirty",
-                                "--always", "--long"],
-                         cwd=root)
-    # --long was added in git-1.5.5
-    if stdout is None:
-        return {}  # try next method
-    version, dirty = git_parse_vcs_describe(stdout, tag_prefix, verbose)
-
-    # build "full", which is FULLHEX[.dirty]
-    stdout = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
-    if stdout is None:
-        return {}
-    full = stdout.strip()
-    if dirty:
-        full += ".dirty"
-
-    return {"version": version, "full": full}
-
-
-def get_versions(default={"version": "0+unknown", "full": ""}, verbose=False):
-    # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
-    # __file__, we can work backwards from there to the root. Some
-    # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
-    # case we can only use expanded keywords.
-
-    keywords = {"refnames": git_refnames, "full": git_full}
-    ver = git_versions_from_keywords(keywords, tag_prefix, verbose)
-    if ver:
-        return ver
-
-    try:
-        root = os.path.realpath(__file__)
-        # versionfile_source is the relative path from the top of the source
-        # tree (where the .git directory might live) to this file. Invert
-        # this to find the root from __file__.
-        for i in versionfile_source.split('/'):
-            root = os.path.dirname(root)
-    except NameError:
-        return default
-
-    return (git_versions_from_vcs(tag_prefix, root, verbose)
-            or versions_from_parentdir(parentdir_prefix, root, verbose)
-            or default)
-'''
-
-
-def git_get_keywords(versionfile_abs):
-    # the code embedded in _version.py can just fetch the value of these
-    # keywords. When used from setup.py, we don't want to import _version.py,
-    # so we do it with a regexp instead. This function is not used from
-    # _version.py.
-    keywords = {}
-    try:
-        f = open(versionfile_abs, "r")
-        for line in f.readlines():
-            if line.strip().startswith("git_refnames ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    keywords["refnames"] = mo.group(1)
-            if line.strip().startswith("git_full ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    keywords["full"] = mo.group(1)
-        f.close()
-    except EnvironmentError:
-        pass
-    return keywords
-
-
-def git_versions_from_keywords(keywords, tag_prefix, verbose=False):
-    if not keywords:
-        return {}  # keyword-finding function failed to find keywords
-    refnames = keywords["refnames"].strip()
-    if refnames.startswith("$Format"):
-        if verbose:
-            print("keywords are unexpanded, not using")
-        return {}  # unexpanded, so not in an unpacked git-archive tarball
-    refs = set([r.strip() for r in refnames.strip("()").split(",")])
-    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
-    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
-    TAG = "tag: "
-    tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
-    if not tags:
-        # Either we're using git < 1.8.3, or there really are no tags. We use
-        # a heuristic: assume all version tags have a digit. The old git %d
-        # expansion behaves like git log --decorate=short and strips out the
-        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
-        # between branches and tags. By ignoring refnames without digits, we
-        # filter out many common branch names like "release" and
-        # "stabilization", as well as "HEAD" and "master".
-        tags = set([r for r in refs if re.search(r'\d', r)])
-        if verbose:
-            print("discarding '%s', no digits" % ",".join(refs-tags))
-    if verbose:
-        print("likely tags: %s" % ",".join(sorted(tags)))
-    for ref in sorted(tags):
-        # sorting will prefer e.g. "2.0" over "2.0rc1"
-        if ref.startswith(tag_prefix):
-            r = ref[len(tag_prefix):]
-            if verbose:
-                print("picking %s" % r)
-            return {"version": r,
-                    "full": keywords["full"].strip()}
-    # no suitable tags, so version is "0+unknown", but full hex is still there
-    if verbose:
-        print("no suitable tags, using unknown + full revision id")
-    return {"version": "0+unknown",
-            "full": keywords["full"].strip()}
-
-
-def git_parse_vcs_describe(git_describe, tag_prefix, verbose=False):
-    # TAG-NUM-gHEX[-dirty] or HEX[-dirty] . TAG might have hyphens.
-
-    # dirty
-    dirty = git_describe.endswith("-dirty")
-    if dirty:
-        git_describe = git_describe[:git_describe.rindex("-dirty")]
-    dirty_suffix = ".dirty" if dirty else ""
-
-    # now we have TAG-NUM-gHEX or HEX
-
-    if "-" not in git_describe:  # just HEX
-        return "0+untagged.g"+git_describe+dirty_suffix, dirty
-
-    # just TAG-NUM-gHEX
-    mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
-    if not mo:
-        # unparseable. Maybe git-describe is misbehaving?
-        return "0+unparseable"+dirty_suffix, dirty
-
-    # tag
-    full_tag = mo.group(1)
-    if not full_tag.startswith(tag_prefix):
-        if verbose:
-            fmt = "tag '%s' doesn't start with prefix '%s'"
-            print(fmt % (full_tag, tag_prefix))
-        return None, dirty
-    tag = full_tag[len(tag_prefix):]
-
-    # distance: number of commits since tag
-    distance = int(mo.group(2))
-
-    # commit: short hex revision ID
-    commit = mo.group(3)
-
-    # now build up version string, with post-release "local version
-    # identifier". Our goal: TAG[+NUM.gHEX[.dirty]] . Note that if you get a
-    # tagged build and then dirty it, you'll get TAG+0.gHEX.dirty . So you
-    # can always test version.endswith(".dirty").
-    version = tag
-    if distance or dirty:
-        version += "+%d.g%s" % (distance, commit) + dirty_suffix
-
-    return version, dirty
-
-
-def git_versions_from_vcs(tag_prefix, root, verbose=False):
-    # this runs 'git' from the root of the source tree. This only gets called
-    # if the git-archive 'subst' keywords were *not* expanded, and
-    # _version.py hasn't already been rewritten with a short version string,
-    # meaning we're inside a checked out source tree.
-
-    if not os.path.exists(os.path.join(root, ".git")):
-        if verbose:
-            print("no .git in %s" % root)
-        return {}  # get_versions() will try next method
-
-    GITS = ["git"]
-    if sys.platform == "win32":
-        GITS = ["git.cmd", "git.exe"]
-    # if there is a tag, this yields TAG-NUM-gHEX[-dirty]
-    # if there are no tags, this yields HEX[-dirty] (no NUM)
-    stdout = run_command(GITS, ["describe", "--tags", "--dirty",
-                                "--always", "--long"],
-                         cwd=root)
-    # --long was added in git-1.5.5
-    if stdout is None:
-        return {}  # try next method
-    version, dirty = git_parse_vcs_describe(stdout, tag_prefix, verbose)
-
-    # build "full", which is FULLHEX[.dirty]
-    stdout = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
-    if stdout is None:
-        return {}
-    full = stdout.strip()
-    if dirty:
-        full += ".dirty"
-
-    return {"version": version, "full": full}
-
-
-def do_vcs_install(manifest_in, versionfile_source, ipy):
-    GITS = ["git"]
-    if sys.platform == "win32":
-        GITS = ["git.cmd", "git.exe"]
-    files = [manifest_in, versionfile_source]
-    if ipy:
-        files.append(ipy)
-    try:
-        me = __file__
-        if me.endswith(".pyc") or me.endswith(".pyo"):
-            me = os.path.splitext(me)[0] + ".py"
-        versioneer_file = os.path.relpath(me)
-    except NameError:
-        versioneer_file = "versioneer.py"
-    files.append(versioneer_file)
-    present = False
-    try:
-        f = open(".gitattributes", "r")
-        for line in f.readlines():
-            if line.strip().startswith(versionfile_source):
-                if "export-subst" in line.strip().split()[1:]:
-                    present = True
-        f.close()
-    except EnvironmentError:
-        pass
-    if not present:
-        f = open(".gitattributes", "a+")
-        f.write("%s export-subst\n" % versionfile_source)
-        f.close()
-        files.append(".gitattributes")
-    run_command(GITS, ["add", "--"] + files)
-
-
-def versions_from_parentdir(parentdir_prefix, root, verbose=False):
-    # Source tarballs conventionally unpack into a directory that includes
-    # both the project name and a version string.
-    dirname = os.path.basename(root)
-    if not dirname.startswith(parentdir_prefix):
-        if verbose:
-            print("guessing rootdir is '%s', but '%s' doesn't start with "
-                  "prefix '%s'" % (root, dirname, parentdir_prefix))
-        return None
-    return {"version": dirname[len(parentdir_prefix):], "full": ""}
-
-SHORT_VERSION_PY = """
-# This file was generated by 'versioneer.py' (0.14) from
-# revision-control system data, or from the parent directory name of an
-# unpacked source archive. Distribution tarballs contain a pre-generated copy
-# of this file.
-
-version_version = '%(version)s'
-version_full = '%(full)s'
-def get_versions(default={}, verbose=False):
-    return {'version': version_version, 'full': version_full}
-
-"""
-
-DEFAULT = {"version": "0+unknown", "full": "unknown"}
-
-
-def versions_from_file(filename):
-    versions = {}
-    try:
-        with open(filename) as f:
-            for line in f.readlines():
-                mo = re.match("version_version = '([^']+)'", line)
-                if mo:
-                    versions["version"] = mo.group(1)
-                mo = re.match("version_full = '([^']+)'", line)
-                if mo:
-                    versions["full"] = mo.group(1)
-    except EnvironmentError:
-        return {}
-
-    return versions
-
-
-def write_to_version_file(filename, versions):
-    with open(filename, "w") as f:
-        f.write(SHORT_VERSION_PY % versions)
-
-    print("set %s to '%s'" % (filename, versions["version"]))
-
-
-def get_root():
-    try:
-        return os.path.dirname(os.path.abspath(__file__))
-    except NameError:
-        return os.path.dirname(os.path.abspath(sys.argv[0]))
-
-
-def vcs_function(vcs, suffix):
-    return getattr(sys.modules[__name__], '%s_%s' % (vcs, suffix), None)
-
-
-def get_versions(default=DEFAULT, verbose=False):
-    # returns dict with two keys: 'version' and 'full'
-    assert versionfile_source is not None, \
-        "please set versioneer.versionfile_source"
-    assert tag_prefix is not None, "please set versioneer.tag_prefix"
-    assert parentdir_prefix is not None, \
-        "please set versioneer.parentdir_prefix"
-    assert VCS is not None, "please set versioneer.VCS"
-
-    # I am in versioneer.py, which must live at the top of the source tree,
-    # which we use to compute the root directory. py2exe/bbfreeze/non-CPython
-    # don't have __file__, in which case we fall back to sys.argv[0] (which
-    # ought to be the setup.py script). We prefer __file__ since that's more
-    # robust in cases where setup.py was invoked in some weird way (e.g. pip)
-    root = get_root()
-    versionfile_abs = os.path.join(root, versionfile_source)
-
-    # extract version from first of _version.py, VCS command (e.g. 'git
-    # describe'), parentdir. This is meant to work for developers using a
-    # source checkout, for users of a tarball created by 'setup.py sdist',
-    # and for users of a tarball/zipball created by 'git archive' or github's
-    # download-from-tag feature or the equivalent in other VCSes.
-
-    get_keywords_f = vcs_function(VCS, "get_keywords")
-    versions_from_keywords_f = vcs_function(VCS, "versions_from_keywords")
-    if get_keywords_f and versions_from_keywords_f:
-        vcs_keywords = get_keywords_f(versionfile_abs)
-        ver = versions_from_keywords_f(vcs_keywords, tag_prefix)
-        if ver:
-            if verbose:
-                print("got version from expanded keyword %s" % ver)
-            return ver
-
-    ver = versions_from_file(versionfile_abs)
-    if ver:
-        if verbose:
-            print("got version from file %s %s" % (versionfile_abs, ver))
-        return ver
-
-    versions_from_vcs_f = vcs_function(VCS, "versions_from_vcs")
-    if versions_from_vcs_f:
-        ver = versions_from_vcs_f(tag_prefix, root, verbose)
-        if ver:
-            if verbose:
-                print("got version from VCS %s" % ver)
-            return ver
-
-    ver = versions_from_parentdir(parentdir_prefix, root, verbose)
-    if ver:
-        if verbose:
-            print("got version from parentdir %s" % ver)
-        return ver
-
-    if verbose:
-        print("got version from default %s" % default)
-    return default
-
-
-def get_version(verbose=False):
-    return get_versions(verbose=verbose)["version"]
-
-
-class cmd_version(Command):
-    description = "report generated version string"
-    user_options = []
-    boolean_options = []
-
-    def initialize_options(self):
-        pass
-
-    def finalize_options(self):
-        pass
-
-    def run(self):
-        ver = get_version(verbose=True)
-        print("Version is currently: %s" % ver)
-
-
-class cmd_build(_build):
-    def run(self):
-        versions = get_versions(verbose=True)
-        _build.run(self)
-        # now locate _version.py in the new build/ directory and replace it
-        # with an updated value
-        if versionfile_build:
-            target_versionfile = os.path.join(self.build_lib,
-                                              versionfile_build)
-            print("UPDATING %s" % target_versionfile)
-            os.unlink(target_versionfile)
-            with open(target_versionfile, "w") as f:
-                f.write(SHORT_VERSION_PY % versions)
-
-if 'cx_Freeze' in sys.modules:  # cx_freeze enabled?
-    from cx_Freeze.dist import build_exe as _build_exe
-
-    class cmd_build_exe(_build_exe):
-        def run(self):
-            versions = get_versions(verbose=True)
-            target_versionfile = versionfile_source
-            print("UPDATING %s" % target_versionfile)
-            os.unlink(target_versionfile)
-            with open(target_versionfile, "w") as f:
-                f.write(SHORT_VERSION_PY % versions)
-
-            _build_exe.run(self)
-            os.unlink(target_versionfile)
-            with open(versionfile_source, "w") as f:
-                assert VCS is not None, "please set versioneer.VCS"
-                LONG = LONG_VERSION_PY[VCS]
-                f.write(LONG % {"DOLLAR": "$",
-                                "TAG_PREFIX": tag_prefix,
-                                "PARENTDIR_PREFIX": parentdir_prefix,
-                                "VERSIONFILE_SOURCE": versionfile_source,
-                                })
-
-
-class cmd_sdist(_sdist):
-    def run(self):
-        versions = get_versions(verbose=True)
-        self._versioneer_generated_versions = versions
-        # unless we update this, the command will keep using the old version
-        self.distribution.metadata.version = versions["version"]
-        return _sdist.run(self)
-
-    def make_release_tree(self, base_dir, files):
-        _sdist.make_release_tree(self, base_dir, files)
-        # now locate _version.py in the new base_dir directory (remembering
-        # that it may be a hardlink) and replace it with an updated value
-        target_versionfile = os.path.join(base_dir, versionfile_source)
-        print("UPDATING %s" % target_versionfile)
-        os.unlink(target_versionfile)
-        with open(target_versionfile, "w") as f:
-            f.write(SHORT_VERSION_PY % self._versioneer_generated_versions)
-
-INIT_PY_SNIPPET = """
-from ._version import get_versions
-__version__ = get_versions()['version']
-del get_versions
-"""
-
-
-class cmd_update_files(Command):
-    description = ("install/upgrade Versioneer files: "
-                   "__init__.py SRC/_version.py")
-    user_options = []
-    boolean_options = []
-
-    def initialize_options(self):
-        pass
-
-    def finalize_options(self):
-        pass
-
-    def run(self):
-        print(" creating %s" % versionfile_source)
-        with open(versionfile_source, "w") as f:
-            assert VCS is not None, "please set versioneer.VCS"
-            LONG = LONG_VERSION_PY[VCS]
-            f.write(LONG % {"DOLLAR": "$",
-                            "TAG_PREFIX": tag_prefix,
-                            "PARENTDIR_PREFIX": parentdir_prefix,
-                            "VERSIONFILE_SOURCE": versionfile_source,
-                            })
-
-        ipy = os.path.join(os.path.dirname(versionfile_source), "__init__.py")
-        if os.path.exists(ipy):
-            try:
-                with open(ipy, "r") as f:
-                    old = f.read()
-            except EnvironmentError:
-                old = ""
-            if INIT_PY_SNIPPET not in old:
-                print(" appending to %s" % ipy)
-                with open(ipy, "a") as f:
-                    f.write(INIT_PY_SNIPPET)
-            else:
-                print(" %s unmodified" % ipy)
-        else:
-            print(" %s doesn't exist, ok" % ipy)
-            ipy = None
-
-        # Make sure both the top-level "versioneer.py" and versionfile_source
-        # (PKG/_version.py, used by runtime code) are in MANIFEST.in, so
-        # they'll be copied into source distributions. Pip won't be able to
-        # install the package without this.
-        manifest_in = os.path.join(get_root(), "MANIFEST.in")
-        simple_includes = set()
-        try:
-            with open(manifest_in, "r") as f:
-                for line in f:
-                    if line.startswith("include "):
-                        for include in line.split()[1:]:
-                            simple_includes.add(include)
-        except EnvironmentError:
-            pass
-        # That doesn't cover everything MANIFEST.in can do
-        # (http://docs.python.org/2/distutils/sourcedist.html#commands), so
-        # it might give some false negatives. Appending redundant 'include'
-        # lines is safe, though.
-        if "versioneer.py" not in simple_includes:
-            print(" appending 'versioneer.py' to MANIFEST.in")
-            with open(manifest_in, "a") as f:
-                f.write("include versioneer.py\n")
-        else:
-            print(" 'versioneer.py' already in MANIFEST.in")
-        if versionfile_source not in simple_includes:
-            print(" appending versionfile_source ('%s') to MANIFEST.in" %
-                  versionfile_source)
-            with open(manifest_in, "a") as f:
-                f.write("include %s\n" % versionfile_source)
-        else:
-            print(" versionfile_source already in MANIFEST.in")
-
-        # Make VCS-specific changes. For git, this means creating/changing
-        # .gitattributes to mark _version.py for export-time keyword
-        # substitution.
-        do_vcs_install(manifest_in, versionfile_source, ipy)
-
-
-def get_cmdclass():
-    cmds = {'version': cmd_version,
-            'versioneer': cmd_update_files,
-            'build': cmd_build,
-            'sdist': cmd_sdist,
-            }
-    if 'cx_Freeze' in sys.modules:  # cx_freeze enabled?
-        cmds['build_exe'] = cmd_build_exe
-        del cmds['build']
-
-    return cmds