浏览代码

Merge branch 'master' into faster-cache-sync

Thomas Waldmann 9 年之前
父节点
当前提交
26bde96a3a
共有 12 个文件被更改,包括 375 次插入106 次删除
  1. 21 0
      CHANGES.rst
  2. 5 3
      borg/_chunker.c
  3. 1 0
      borg/archive.py
  4. 80 52
      borg/archiver.py
  5. 13 6
      borg/cache.py
  6. 1 1
      borg/chunker.pyx
  7. 1 1
      borg/crypto.pyx
  8. 52 2
      borg/helpers.py
  9. 3 3
      borg/repository.py
  10. 8 0
      borg/testsuite/archiver.py
  11. 173 33
      borg/testsuite/helpers.py
  12. 17 5
      docs/usage.rst

+ 21 - 0
CHANGES.rst

@@ -2,6 +2,27 @@ Borg Changelog
 ==============
 ==============
 
 
 
 
+Version 0.26.0 (not released yet)
+---------------------------------
+
+New features:
+
+- BORG_REPO env var to specify the default repo, #168
+- read special files as if they were regular files, #79
+
+Bug fixes:
+
+- borg mount repo: use absolute path, attic #200, attic #137
+- chunker: use off_t to get 64bit on 32bit platform, #178
+- initialize chunker fd to -1, so it's not equal to STDIN_FILENO (0)
+- fix reaction to "no" answer at delete repo prompt, #182
+
+Other changes:
+
+- detect inconsistency / corruption / hash collision, #170
+- replace versioneer with setuptools_scm, #106
+
+
 Version 0.25.0
 Version 0.25.0
 --------------
 --------------
 
 

+ 5 - 3
borg/_chunker.c

@@ -83,7 +83,8 @@ typedef struct {
     PyObject *fd;
     PyObject *fd;
     int fh;
     int fh;
     int done, eof;
     int done, eof;
-    size_t remaining, bytes_read, bytes_yielded, position, last;
+    size_t remaining, position, last;
+    off_t bytes_read, bytes_yielded;
 } Chunker;
 } Chunker;
 
 
 static Chunker *
 static Chunker *
@@ -96,6 +97,7 @@ chunker_init(int window_size, int chunk_mask, int min_size, int max_size, uint32
     c->table = buzhash_init_table(seed);
     c->table = buzhash_init_table(seed);
     c->buf_size = max_size;
     c->buf_size = max_size;
     c->data = malloc(c->buf_size);
     c->data = malloc(c->buf_size);
+    c->fh = -1;
     return c;
     return c;
 }
 }
 
 
@@ -128,7 +130,7 @@ static int
 chunker_fill(Chunker *c)
 chunker_fill(Chunker *c)
 {
 {
     ssize_t n;
     ssize_t n;
-    size_t offset, length;
+    off_t offset, length;
     PyObject *data;
     PyObject *data;
     memmove(c->data, c->data + c->last, c->position + c->remaining - c->last);
     memmove(c->data, c->data + c->last, c->position + c->remaining - c->last);
     c->position -= c->last;
     c->position -= c->last;
@@ -161,7 +163,7 @@ chunker_fill(Chunker *c)
         // size limit) kick out data from the cache that might be still useful
         // size limit) kick out data from the cache that might be still useful
         // for the OS or other processes.
         // for the OS or other processes.
         if (length > 0) {
         if (length > 0) {
-            posix_fadvise(c->fh, (off_t) offset, (off_t) length, POSIX_FADV_DONTNEED);
+            posix_fadvise(c->fh, offset, length, POSIX_FADV_DONTNEED);
         }
         }
         #endif
         #endif
     }
     }

+ 1 - 0
borg/archive.py

@@ -455,6 +455,7 @@ class Archive:
             b'mtime': int_to_bigint(int(time.time()) * 1000000000)
             b'mtime': int_to_bigint(int(time.time()) * 1000000000)
         }
         }
         self.add_item(item)
         self.add_item(item)
+        return 'i'  # stdin
 
 
     def process_file(self, path, st, cache):
     def process_file(self, path, st, cache):
         status = None
         status = None

+ 80 - 52
borg/archiver.py

@@ -102,17 +102,21 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
 
 
     def do_create(self, args):
     def do_create(self, args):
         """Create new archive"""
         """Create new archive"""
+        dry_run = args.dry_run
         t0 = datetime.now()
         t0 = datetime.now()
-        repository = self.open_repository(args.archive, exclusive=True)
-        manifest, key = Manifest.load(repository)
-        compr_args = dict(buffer=COMPR_BUFFER)
-        compr_args.update(args.compression)
-        key.compressor = Compressor(**compr_args)
-        cache = Cache(repository, key, manifest, do_files=args.cache_files)
-        archive = Archive(repository, key, manifest, args.archive.archive, cache=cache,
-                          create=True, checkpoint_interval=args.checkpoint_interval,
-                          numeric_owner=args.numeric_owner, progress=args.progress,
-                          chunker_params=args.chunker_params)
+        if not dry_run:
+            repository = self.open_repository(args.archive, exclusive=True)
+            manifest, key = Manifest.load(repository)
+            compr_args = dict(buffer=COMPR_BUFFER)
+            compr_args.update(args.compression)
+            key.compressor = Compressor(**compr_args)
+            cache = Cache(repository, key, manifest, do_files=args.cache_files)
+            archive = Archive(repository, key, manifest, args.archive.archive, cache=cache,
+                              create=True, checkpoint_interval=args.checkpoint_interval,
+                              numeric_owner=args.numeric_owner, progress=args.progress,
+                              chunker_params=args.chunker_params)
+        else:
+            archive = cache = None
         # Add cache dir to inode_skip list
         # Add cache dir to inode_skip list
         skip_inodes = set()
         skip_inodes = set()
         try:
         try:
@@ -130,11 +134,14 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
         for path in args.paths:
         for path in args.paths:
             if path == '-':  # stdin
             if path == '-':  # stdin
                 path = 'stdin'
                 path = 'stdin'
-                self.print_verbose(path)
-                try:
-                    archive.process_stdin(path, cache)
-                except IOError as e:
-                    self.print_error('%s: %s', path, e)
+                if not dry_run:
+                    try:
+                        status = archive.process_stdin(path, cache)
+                    except IOError as e:
+                        self.print_error('%s: %s', path, e)
+                else:
+                    status = '-'
+                self.print_verbose("%1s %s", status, path)
                 continue
                 continue
             path = os.path.normpath(path)
             path = os.path.normpath(path)
             if args.dontcross:
             if args.dontcross:
@@ -145,25 +152,28 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
                     continue
                     continue
             else:
             else:
                 restrict_dev = None
                 restrict_dev = None
-            self._process(archive, cache, args.excludes, args.exclude_caches, skip_inodes, path, restrict_dev)
-        archive.save(timestamp=args.timestamp)
-        if args.progress:
-            archive.stats.show_progress(final=True)
-        if args.stats:
-            t = datetime.now()
-            diff = t - t0
-            print('-' * 78)
-            print('Archive name: %s' % args.archive.archive)
-            print('Archive fingerprint: %s' % hexlify(archive.id).decode('ascii'))
-            print('Start time: %s' % t0.strftime('%c'))
-            print('End time: %s' % t.strftime('%c'))
-            print('Duration: %s' % format_timedelta(diff))
-            print('Number of files: %d' % archive.stats.nfiles)
-            archive.stats.print_('This archive:', cache)
-            print('-' * 78)
+            self._process(archive, cache, args.excludes, args.exclude_caches, skip_inodes, path, restrict_dev,
+                          read_special=args.read_special, dry_run=dry_run)
+        if not dry_run:
+            archive.save(timestamp=args.timestamp)
+            if args.progress:
+                archive.stats.show_progress(final=True)
+            if args.stats:
+                t = datetime.now()
+                diff = t - t0
+                print('-' * 78)
+                print('Archive name: %s' % args.archive.archive)
+                print('Archive fingerprint: %s' % hexlify(archive.id).decode('ascii'))
+                print('Start time: %s' % t0.strftime('%c'))
+                print('End time: %s' % t.strftime('%c'))
+                print('Duration: %s' % format_timedelta(diff))
+                print('Number of files: %d' % archive.stats.nfiles)
+                archive.stats.print_('This archive:', cache)
+                print('-' * 78)
         return self.exit_code
         return self.exit_code
 
 
-    def _process(self, archive, cache, excludes, exclude_caches, skip_inodes, path, restrict_dev):
+    def _process(self, archive, cache, excludes, exclude_caches, skip_inodes, path, restrict_dev,
+                 read_special=False, dry_run=False):
         if exclude_path(path, excludes):
         if exclude_path(path, excludes):
             return
             return
         try:
         try:
@@ -180,15 +190,18 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
         # Ignore if nodump flag is set
         # Ignore if nodump flag is set
         if has_lchflags and (st.st_flags & stat.UF_NODUMP):
         if has_lchflags and (st.st_flags & stat.UF_NODUMP):
             return
             return
-        if stat.S_ISREG(st.st_mode):
-            try:
-                status = archive.process_file(path, st, cache)
-            except IOError as e:
-                self.print_error('%s: %s', path, e)
+        if (stat.S_ISREG(st.st_mode) or
+            read_special and not stat.S_ISDIR(st.st_mode)):
+            if not dry_run:
+                try:
+                    status = archive.process_file(path, st, cache)
+                except IOError as e:
+                    self.print_error('%s: %s', path, e)
         elif stat.S_ISDIR(st.st_mode):
         elif stat.S_ISDIR(st.st_mode):
             if exclude_caches and is_cachedir(path):
             if exclude_caches and is_cachedir(path):
                 return
                 return
-            status = archive.process_dir(path, st)
+            if not dry_run:
+                status = archive.process_dir(path, st)
             try:
             try:
                 entries = os.listdir(path)
                 entries = os.listdir(path)
             except OSError as e:
             except OSError as e:
@@ -197,13 +210,17 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
                 for filename in sorted(entries):
                 for filename in sorted(entries):
                     entry_path = os.path.normpath(os.path.join(path, filename))
                     entry_path = os.path.normpath(os.path.join(path, filename))
                     self._process(archive, cache, excludes, exclude_caches, skip_inodes,
                     self._process(archive, cache, excludes, exclude_caches, skip_inodes,
-                                  entry_path, restrict_dev)
+                                  entry_path, restrict_dev, read_special=read_special,
+                                  dry_run=dry_run)
         elif stat.S_ISLNK(st.st_mode):
         elif stat.S_ISLNK(st.st_mode):
-            status = archive.process_symlink(path, st)
+            if not dry_run:
+                status = archive.process_symlink(path, st)
         elif stat.S_ISFIFO(st.st_mode):
         elif stat.S_ISFIFO(st.st_mode):
-            status = archive.process_fifo(path, st)
+            if not dry_run:
+                status = archive.process_fifo(path, st)
         elif stat.S_ISCHR(st.st_mode) or stat.S_ISBLK(st.st_mode):
         elif stat.S_ISCHR(st.st_mode) or stat.S_ISBLK(st.st_mode):
-            status = archive.process_dev(path, st)
+            if not dry_run:
+                status = archive.process_dev(path, st)
         elif stat.S_ISSOCK(st.st_mode):
         elif stat.S_ISSOCK(st.st_mode):
             # Ignore unix sockets
             # Ignore unix sockets
             return
             return
@@ -219,7 +236,10 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
         # Note: A/M/U is relative to the "files" cache, not to the repo.
         # Note: A/M/U is relative to the "files" cache, not to the repo.
         # This would be an issue if the files cache is not used.
         # This would be an issue if the files cache is not used.
         if status is None:
         if status is None:
-            status = '?'  # need to add a status code somewhere
+            if not dry_run:
+                status = '?'  # need to add a status code somewhere
+            else:
+                status = '-'  # dry run, item was not backed up
         # output ALL the stuff - it can be easily filtered using grep.
         # output ALL the stuff - it can be easily filtered using grep.
         # even stuff considered unchanged might be interesting.
         # even stuff considered unchanged might be interesting.
         self.print_verbose("%1s %s", status, remove_surrogates(path))
         self.print_verbose("%1s %s", status, remove_surrogates(path))
@@ -296,10 +316,11 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
             print("You requested to completely DELETE the repository *including* all archives it contains:")
             print("You requested to completely DELETE the repository *including* all archives it contains:")
             for archive_info in manifest.list_archive_infos(sort_by='ts'):
             for archive_info in manifest.list_archive_infos(sort_by='ts'):
                 print(format_archive(archive_info))
                 print(format_archive(archive_info))
-            while not os.environ.get('BORG_CHECK_I_KNOW_WHAT_I_AM_DOING'):
+            if not os.environ.get('BORG_CHECK_I_KNOW_WHAT_I_AM_DOING'):
                 print("""Type "YES" if you understand this and want to continue.\n""")
                 print("""Type "YES" if you understand this and want to continue.\n""")
-                if input('Do you want to continue? ') == 'YES':
-                    break
+                if input('Do you want to continue? ') != 'YES':
+                    self.exit_code = 1
+                    return self.exit_code
             repository.destroy()
             repository.destroy()
             cache.destroy()
             cache.destroy()
             print("Repository and corresponding cache were deleted.")
             print("Repository and corresponding cache were deleted.")
@@ -556,7 +577,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
                                           description=self.do_init.__doc__, epilog=init_epilog,
                                           description=self.do_init.__doc__, epilog=init_epilog,
                                           formatter_class=argparse.RawDescriptionHelpFormatter)
                                           formatter_class=argparse.RawDescriptionHelpFormatter)
         subparser.set_defaults(func=self.do_init)
         subparser.set_defaults(func=self.do_init)
-        subparser.add_argument('repository', metavar='REPOSITORY',
+        subparser.add_argument('repository', metavar='REPOSITORY', nargs='?', default='',
                                type=location_validator(archive=False),
                                type=location_validator(archive=False),
                                help='repository to create')
                                help='repository to create')
         subparser.add_argument('-e', '--encryption', dest='encryption',
         subparser.add_argument('-e', '--encryption', dest='encryption',
@@ -604,7 +625,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
                                           epilog=check_epilog,
                                           epilog=check_epilog,
                                           formatter_class=argparse.RawDescriptionHelpFormatter)
                                           formatter_class=argparse.RawDescriptionHelpFormatter)
         subparser.set_defaults(func=self.do_check)
         subparser.set_defaults(func=self.do_check)
-        subparser.add_argument('repository', metavar='REPOSITORY_OR_ARCHIVE',
+        subparser.add_argument('repository', metavar='REPOSITORY_OR_ARCHIVE', nargs='?', default='',
                                type=location_validator(),
                                type=location_validator(),
                                help='repository or archive to check consistency of')
                                help='repository or archive to check consistency of')
         subparser.add_argument('--repository-only', dest='repo_only', action='store_true',
         subparser.add_argument('--repository-only', dest='repo_only', action='store_true',
@@ -629,7 +650,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
                                           epilog=change_passphrase_epilog,
                                           epilog=change_passphrase_epilog,
                                           formatter_class=argparse.RawDescriptionHelpFormatter)
                                           formatter_class=argparse.RawDescriptionHelpFormatter)
         subparser.set_defaults(func=self.do_change_passphrase)
         subparser.set_defaults(func=self.do_change_passphrase)
-        subparser.add_argument('repository', metavar='REPOSITORY',
+        subparser.add_argument('repository', metavar='REPOSITORY', nargs='?', default='',
                                type=location_validator(archive=False))
                                type=location_validator(archive=False))
 
 
         create_epilog = textwrap.dedent("""
         create_epilog = textwrap.dedent("""
@@ -687,6 +708,12 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
                                     'zlib,0 .. zlib,9 == zlib (with level 0..9), '
                                     'zlib,0 .. zlib,9 == zlib (with level 0..9), '
                                     'lzma == lzma (default level 6), '
                                     'lzma == lzma (default level 6), '
                                     'lzma,0 .. lzma,9 == lzma (with level 0..9).')
                                     'lzma,0 .. lzma,9 == lzma (with level 0..9).')
+        subparser.add_argument('--read-special', dest='read_special',
+                               action='store_true', default=False,
+                               help='open and read special files as if they were regular files')
+        subparser.add_argument('-n', '--dry-run', dest='dry_run',
+                               action='store_true', default=False,
+                               help='do not create a backup archive')
         subparser.add_argument('archive', metavar='ARCHIVE',
         subparser.add_argument('archive', metavar='ARCHIVE',
                                type=location_validator(archive=True),
                                type=location_validator(archive=True),
                                help='archive to create')
                                help='archive to create')
@@ -760,7 +787,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
         subparser.add_argument('-s', '--stats', dest='stats',
         subparser.add_argument('-s', '--stats', dest='stats',
                                action='store_true', default=False,
                                action='store_true', default=False,
                                help='print statistics for the deleted archive')
                                help='print statistics for the deleted archive')
-        subparser.add_argument('target', metavar='TARGET',
+        subparser.add_argument('target', metavar='TARGET', nargs='?', default='',
                                type=location_validator(),
                                type=location_validator(),
                                help='archive or repository to delete')
                                help='archive or repository to delete')
 
 
@@ -775,7 +802,8 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
         subparser.add_argument('--short', dest='short',
         subparser.add_argument('--short', dest='short',
                                action='store_true', default=False,
                                action='store_true', default=False,
                                help='only print file/directory names, nothing else')
                                help='only print file/directory names, nothing else')
-        subparser.add_argument('src', metavar='REPOSITORY_OR_ARCHIVE', type=location_validator(),
+        subparser.add_argument('src', metavar='REPOSITORY_OR_ARCHIVE', nargs='?', default='',
+                               type=location_validator(),
                                help='repository/archive to list contents of')
                                help='repository/archive to list contents of')
         mount_epilog = textwrap.dedent("""
         mount_epilog = textwrap.dedent("""
         This command mounts an archive as a FUSE filesystem. This can be useful for
         This command mounts an archive as a FUSE filesystem. This can be useful for
@@ -858,7 +886,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
                                help='number of yearly archives to keep')
                                help='number of yearly archives to keep')
         subparser.add_argument('-p', '--prefix', dest='prefix', type=str,
         subparser.add_argument('-p', '--prefix', dest='prefix', type=str,
                                help='only consider archive names starting with this prefix')
                                help='only consider archive names starting with this prefix')
-        subparser.add_argument('repository', metavar='REPOSITORY',
+        subparser.add_argument('repository', metavar='REPOSITORY', nargs='?', default='',
                                type=location_validator(archive=False),
                                type=location_validator(archive=False),
                                help='repository to prune')
                                help='repository to prune')
 
 

+ 13 - 6
borg/cache.py

@@ -3,6 +3,7 @@ from .remote import cache_if_remote
 import errno
 import errno
 import msgpack
 import msgpack
 import os
 import os
+import stat
 import sys
 import sys
 from binascii import hexlify
 from binascii import hexlify
 import shutil
 import shutil
@@ -327,9 +328,9 @@ class Cache:
     def add_chunk(self, id, data, stats):
     def add_chunk(self, id, data, stats):
         if not self.txn_active:
         if not self.txn_active:
             self.begin_txn()
             self.begin_txn()
-        if self.seen_chunk(id):
-            return self.chunk_incref(id, stats)
         size = len(data)
         size = len(data)
+        if self.seen_chunk(id, size):
+            return self.chunk_incref(id, stats)
         data = self.key.encrypt(data)
         data = self.key.encrypt(data)
         csize = len(data)
         csize = len(data)
         self.repository.put(id, data, wait=False)
         self.repository.put(id, data, wait=False)
@@ -337,8 +338,14 @@ class Cache:
         stats.update(size, csize, True)
         stats.update(size, csize, True)
         return id, size, csize
         return id, size, csize
 
 
-    def seen_chunk(self, id):
-        return self.chunks.get(id, (0, 0, 0))[0]
+    def seen_chunk(self, id, size=None):
+        refcount, stored_size, _ = self.chunks.get(id, (0, None, None))
+        if size is not None and stored_size is not None and size != stored_size:
+            # we already have a chunk with that id, but different size.
+            # this is either a hash collision (unlikely) or corruption or a bug.
+            raise Exception("chunk has same id [%r], but different size (stored: %d new: %d)!" % (
+                            id, stored_size, size))
+        return refcount
 
 
     def chunk_incref(self, id, stats):
     def chunk_incref(self, id, stats):
         if not self.txn_active:
         if not self.txn_active:
@@ -361,7 +368,7 @@ class Cache:
             stats.update(-size, -csize, False)
             stats.update(-size, -csize, False)
 
 
     def file_known_and_unchanged(self, path_hash, st):
     def file_known_and_unchanged(self, path_hash, st):
-        if not self.do_files:
+        if not (self.do_files and stat.S_ISREG(st.st_mode)):
             return None
             return None
         if self.files is None:
         if self.files is None:
             self._read_files()
             self._read_files()
@@ -378,7 +385,7 @@ class Cache:
             return None
             return None
 
 
     def memorize_file(self, path_hash, st, ids):
     def memorize_file(self, path_hash, st, ids):
-        if not self.do_files:
+        if not (self.do_files and stat.S_ISREG(st.st_mode)):
             return
             return
         # Entry: Age, inode, size, mtime, chunk ids
         # Entry: Age, inode, size, mtime, chunk ids
         mtime_ns = st_mtime_ns(st)
         mtime_ns = st_mtime_ns(st)

+ 1 - 1
borg/chunker.pyx

@@ -20,7 +20,7 @@ cdef extern from "_chunker.c":
 cdef class Chunker:
 cdef class Chunker:
     cdef _Chunker *chunker
     cdef _Chunker *chunker
 
 
-    def __cinit__(self, seed, chunk_min_exp, chunk_max_exp, hash_mask_bits, hash_window_size):
+    def __cinit__(self, int seed, int chunk_min_exp, int chunk_max_exp, int hash_mask_bits, int hash_window_size):
         min_size = 1 << chunk_min_exp
         min_size = 1 << chunk_min_exp
         max_size = 1 << chunk_max_exp
         max_size = 1 << chunk_max_exp
         hash_mask = (1 << hash_mask_bits) - 1
         hash_mask = (1 << hash_mask_bits) - 1

+ 1 - 1
borg/crypto.pyx

@@ -52,7 +52,7 @@ bytes_to_long = lambda x, offset=0: _long.unpack_from(x, offset)[0]
 long_to_bytes = lambda x: _long.pack(x)
 long_to_bytes = lambda x: _long.pack(x)
 
 
 
 
-def num_aes_blocks(length):
+def num_aes_blocks(int length):
     """Return the number of AES blocks required to encrypt/decrypt *length* bytes of data.
     """Return the number of AES blocks required to encrypt/decrypt *length* bytes of data.
        Note: this is only correct for modes without padding, like AES-CTR.
        Note: this is only correct for modes without padding, like AES-CTR.
     """
     """

+ 52 - 2
borg/helpers.py

@@ -1,12 +1,15 @@
 import argparse
 import argparse
 import binascii
 import binascii
 from collections import namedtuple
 from collections import namedtuple
+from functools import wraps
 import grp
 import grp
 import os
 import os
 import pwd
 import pwd
 import re
 import re
 import sys
 import sys
 import time
 import time
+import unicodedata
+
 from datetime import datetime, timezone, timedelta
 from datetime import datetime, timezone, timedelta
 from fnmatch import translate
 from fnmatch import translate
 from operator import attrgetter
 from operator import attrgetter
@@ -220,6 +223,23 @@ def exclude_path(path, patterns):
 # unify the two cases, we add a path separator to the end of
 # unify the two cases, we add a path separator to the end of
 # the path before matching.
 # the path before matching.
 
 
+def normalized(func):
+    """ Decorator for the Pattern match methods, returning a wrapper that
+    normalizes OSX paths to match the normalized pattern on OSX, and 
+    returning the original method on other platforms"""
+    @wraps(func)
+    def normalize_wrapper(self, path):
+        return func(self, unicodedata.normalize("NFD", path))
+
+    if sys.platform in ('darwin',):
+        # HFS+ converts paths to a canonical form, so users shouldn't be
+        # required to enter an exact match
+        return normalize_wrapper
+    else:
+        # Windows and Unix filesystems allow different forms, so users
+        # always have to enter an exact match
+        return func
+
 class IncludePattern:
 class IncludePattern:
     """Literal files or directories listed on the command line
     """Literal files or directories listed on the command line
     for some operations (e.g. extract, but not create).
     for some operations (e.g. extract, but not create).
@@ -227,8 +247,12 @@ class IncludePattern:
     path match as well.  A trailing slash makes no difference.
     path match as well.  A trailing slash makes no difference.
     """
     """
     def __init__(self, pattern):
     def __init__(self, pattern):
+        if sys.platform in ('darwin',):
+            pattern = unicodedata.normalize("NFD", pattern)
+
         self.pattern = os.path.normpath(pattern).rstrip(os.path.sep)+os.path.sep
         self.pattern = os.path.normpath(pattern).rstrip(os.path.sep)+os.path.sep
 
 
+    @normalized
     def match(self, path):
     def match(self, path):
         return (path+os.path.sep).startswith(self.pattern)
         return (path+os.path.sep).startswith(self.pattern)
 
 
@@ -245,10 +269,15 @@ class ExcludePattern(IncludePattern):
             self.pattern = os.path.normpath(pattern).rstrip(os.path.sep)+os.path.sep+'*'+os.path.sep
             self.pattern = os.path.normpath(pattern).rstrip(os.path.sep)+os.path.sep+'*'+os.path.sep
         else:
         else:
             self.pattern = os.path.normpath(pattern)+os.path.sep+'*'
             self.pattern = os.path.normpath(pattern)+os.path.sep+'*'
+
+        if sys.platform in ('darwin',):
+            self.pattern = unicodedata.normalize("NFD", self.pattern)
+
         # fnmatch and re.match both cache compiled regular expressions.
         # fnmatch and re.match both cache compiled regular expressions.
         # Nevertheless, this is about 10 times faster.
         # Nevertheless, this is about 10 times faster.
         self.regex = re.compile(translate(self.pattern))
         self.regex = re.compile(translate(self.pattern))
 
 
+    @normalized
     def match(self, path):
     def match(self, path):
         return self.regex.match(path+os.path.sep) is not None
         return self.regex.match(path+os.path.sep) is not None
 
 
@@ -466,13 +495,34 @@ class Location:
                          r'(?P<path>[^:]+)(?:::(?P<archive>.+))?$')
                          r'(?P<path>[^:]+)(?:::(?P<archive>.+))?$')
     scp_re = re.compile(r'((?:(?P<user>[^@]+)@)?(?P<host>[^:/]+):)?'
     scp_re = re.compile(r'((?:(?P<user>[^@]+)@)?(?P<host>[^:/]+):)?'
                         r'(?P<path>[^:]+)(?:::(?P<archive>.+))?$')
                         r'(?P<path>[^:]+)(?:::(?P<archive>.+))?$')
+    # get the repo from BORG_RE env and the optional archive from param.
+    # if the syntax requires giving REPOSITORY (see "borg mount"),
+    # use "::" to let it use the env var.
+    # if REPOSITORY argument is optional, it'll automatically use the env.
+    env_re = re.compile(r'(?:::(?P<archive>.+)?)?$')
 
 
-    def __init__(self, text):
+    def __init__(self, text=''):
         self.orig = text
         self.orig = text
-        if not self.parse(text):
+        if not self.parse(self.orig):
             raise ValueError
             raise ValueError
 
 
     def parse(self, text):
     def parse(self, text):
+        valid = self._parse(text)
+        if valid:
+            return True
+        m = self.env_re.match(text)
+        if not m:
+            return False
+        repo = os.environ.get('BORG_REPO')
+        if repo is None:
+            return False
+        valid = self._parse(repo)
+        if not valid:
+            return False
+        self.archive = m.group('archive')
+        return True
+
+    def _parse(self, text):
         m = self.ssh_re.match(text)
         m = self.ssh_re.match(text)
         if m:
         if m:
             self.proto = m.group('proto')
             self.proto = m.group('proto')

+ 3 - 3
borg/repository.py

@@ -50,14 +50,14 @@ class Repository:
         """Object with key {} not found in repository {}."""
         """Object with key {} not found in repository {}."""
 
 
     def __init__(self, path, create=False, exclusive=False):
     def __init__(self, path, create=False, exclusive=False):
-        self.path = path
+        self.path = os.path.abspath(path)
         self.io = None
         self.io = None
         self.lock = None
         self.lock = None
         self.index = None
         self.index = None
         self._active_txn = False
         self._active_txn = False
         if create:
         if create:
-            self.create(path)
-        self.open(path, exclusive)
+            self.create(self.path)
+        self.open(self.path, exclusive)
 
 
     def __del__(self):
     def __del__(self):
         self.close()
         self.close()

+ 8 - 0
borg/testsuite/archiver.py

@@ -485,6 +485,14 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         mode = os.stat(self.repository_path).st_mode
         mode = os.stat(self.repository_path).st_mode
         self.assertEqual(stat.S_IMODE(mode), 0o700)
         self.assertEqual(stat.S_IMODE(mode), 0o700)
 
 
+    def test_create_dry_run(self):
+        self.cmd('init', self.repository_location)
+        self.cmd('create', '--dry-run', self.repository_location + '::test', 'input')
+        # Make sure no archive has been created
+        repository = Repository(self.repository_path)
+        manifest, key = Manifest.load(repository)
+        self.assert_equal(len(manifest.archives), 0)
+
     def test_cmdline_compatibility(self):
     def test_cmdline_compatibility(self):
         self.create_regular_file('file1', size=1024 * 80)
         self.create_regular_file('file1', size=1024 * 80)
         self.cmd('init', self.repository_location)
         self.cmd('init', self.repository_location)

+ 173 - 33
borg/testsuite/helpers.py

@@ -3,9 +3,10 @@ from time import mktime, strptime
 from datetime import datetime, timezone, timedelta
 from datetime import datetime, timezone, timedelta
 
 
 import pytest
 import pytest
+import sys
 import msgpack
 import msgpack
 
 
-from ..helpers import adjust_patterns, exclude_path, Location, format_timedelta, ExcludePattern, make_path_safe, \
+from ..helpers import adjust_patterns, exclude_path, Location, format_timedelta, IncludePattern, ExcludePattern, make_path_safe, \
     prune_within, prune_split, \
     prune_within, prune_split, \
     StableDict, int_to_bigint, bigint_to_int, parse_timestamp, CompressionSpec, ChunkerParams
     StableDict, int_to_bigint, bigint_to_int, parse_timestamp, CompressionSpec, ChunkerParams
 from . import BaseTestCase
 from . import BaseTestCase
@@ -23,42 +24,115 @@ class BigIntTestCase(BaseTestCase):
         self.assert_equal(bigint_to_int(int_to_bigint(2**70)), 2**70)
         self.assert_equal(bigint_to_int(int_to_bigint(2**70)), 2**70)
 
 
 
 
-class LocationTestCase(BaseTestCase):
-
-    def test(self):
-        self.assert_equal(
-            repr(Location('ssh://user@host:1234/some/path::archive')),
-            "Location(proto='ssh', user='user', host='host', port=1234, path='/some/path', archive='archive')"
-        )
-        self.assert_equal(
-            repr(Location('file:///some/path::archive')),
-            "Location(proto='file', user=None, host=None, port=None, path='/some/path', archive='archive')"
-        )
-        self.assert_equal(
-            repr(Location('user@host:/some/path::archive')),
-            "Location(proto='ssh', user='user', host='host', port=None, path='/some/path', archive='archive')"
-        )
-        self.assert_equal(
-            repr(Location('path::archive')),
-            "Location(proto='file', user=None, host=None, port=None, path='path', archive='archive')"
-        )
-        self.assert_equal(
-            repr(Location('/some/absolute/path::archive')),
-            "Location(proto='file', user=None, host=None, port=None, path='/some/absolute/path', archive='archive')"
-        )
-        self.assert_equal(
-            repr(Location('some/relative/path::archive')),
-            "Location(proto='file', user=None, host=None, port=None, path='some/relative/path', archive='archive')"
-        )
-        self.assert_raises(ValueError, lambda: Location('ssh://localhost:22/path:archive'))
-
-    def test_canonical_path(self):
+class TestLocationWithoutEnv:
+    def test_ssh(self, monkeypatch):
+        monkeypatch.delenv('BORG_REPO', raising=False)
+        assert repr(Location('ssh://user@host:1234/some/path::archive')) == \
+               "Location(proto='ssh', user='user', host='host', port=1234, path='/some/path', archive='archive')"
+        assert repr(Location('ssh://user@host:1234/some/path')) == \
+               "Location(proto='ssh', user='user', host='host', port=1234, path='/some/path', archive=None)"
+
+    def test_file(self, monkeypatch):
+        monkeypatch.delenv('BORG_REPO', raising=False)
+        assert repr(Location('file:///some/path::archive')) == \
+               "Location(proto='file', user=None, host=None, port=None, path='/some/path', archive='archive')"
+        assert repr(Location('file:///some/path')) == \
+               "Location(proto='file', user=None, host=None, port=None, path='/some/path', archive=None)"
+
+    def test_scp(self, monkeypatch):
+        monkeypatch.delenv('BORG_REPO', raising=False)
+        assert repr(Location('user@host:/some/path::archive')) == \
+               "Location(proto='ssh', user='user', host='host', port=None, path='/some/path', archive='archive')"
+        assert repr(Location('user@host:/some/path')) == \
+               "Location(proto='ssh', user='user', host='host', port=None, path='/some/path', archive=None)"
+
+    def test_folder(self, monkeypatch):
+        monkeypatch.delenv('BORG_REPO', raising=False)
+        assert repr(Location('path::archive')) == \
+               "Location(proto='file', user=None, host=None, port=None, path='path', archive='archive')"
+        assert repr(Location('path')) == \
+               "Location(proto='file', user=None, host=None, port=None, path='path', archive=None)"
+
+    def test_abspath(self, monkeypatch):
+        monkeypatch.delenv('BORG_REPO', raising=False)
+        assert repr(Location('/some/absolute/path::archive')) == \
+               "Location(proto='file', user=None, host=None, port=None, path='/some/absolute/path', archive='archive')"
+        assert repr(Location('/some/absolute/path')) == \
+               "Location(proto='file', user=None, host=None, port=None, path='/some/absolute/path', archive=None)"
+
+    def test_relpath(self, monkeypatch):
+        monkeypatch.delenv('BORG_REPO', raising=False)
+        assert repr(Location('some/relative/path::archive')) == \
+               "Location(proto='file', user=None, host=None, port=None, path='some/relative/path', archive='archive')"
+        assert repr(Location('some/relative/path')) == \
+               "Location(proto='file', user=None, host=None, port=None, path='some/relative/path', archive=None)"
+
+    def test_underspecified(self, monkeypatch):
+        monkeypatch.delenv('BORG_REPO', raising=False)
+        with pytest.raises(ValueError):
+            Location('::archive')
+        with pytest.raises(ValueError):
+            Location('::')
+        with pytest.raises(ValueError):
+            Location()
+
+    def test_no_double_colon(self, monkeypatch):
+        monkeypatch.delenv('BORG_REPO', raising=False)
+        with pytest.raises(ValueError):
+            Location('ssh://localhost:22/path:archive')
+
+    def test_canonical_path(self, monkeypatch):
+        monkeypatch.delenv('BORG_REPO', raising=False)
         locations = ['some/path::archive', 'file://some/path::archive', 'host:some/path::archive',
         locations = ['some/path::archive', 'file://some/path::archive', 'host:some/path::archive',
                      'host:~user/some/path::archive', 'ssh://host/some/path::archive',
                      'host:~user/some/path::archive', 'ssh://host/some/path::archive',
                      'ssh://user@host:1234/some/path::archive']
                      'ssh://user@host:1234/some/path::archive']
         for location in locations:
         for location in locations:
-            self.assert_equal(Location(location).canonical_path(),
-                              Location(Location(location).canonical_path()).canonical_path())
+            assert Location(location).canonical_path() == \
+                   Location(Location(location).canonical_path()).canonical_path()
+
+
+class TestLocationWithEnv:
+    def test_ssh(self, monkeypatch):
+        monkeypatch.setenv('BORG_REPO', 'ssh://user@host:1234/some/path')
+        assert repr(Location('::archive')) == \
+               "Location(proto='ssh', user='user', host='host', port=1234, path='/some/path', archive='archive')"
+        assert repr(Location()) == \
+               "Location(proto='ssh', user='user', host='host', port=1234, path='/some/path', archive=None)"
+
+    def test_file(self, monkeypatch):
+        monkeypatch.setenv('BORG_REPO', 'file:///some/path')
+        assert repr(Location('::archive')) == \
+               "Location(proto='file', user=None, host=None, port=None, path='/some/path', archive='archive')"
+        assert repr(Location()) == \
+               "Location(proto='file', user=None, host=None, port=None, path='/some/path', archive=None)"
+
+    def test_scp(self, monkeypatch):
+        monkeypatch.setenv('BORG_REPO', 'user@host:/some/path')
+        assert repr(Location('::archive')) == \
+               "Location(proto='ssh', user='user', host='host', port=None, path='/some/path', archive='archive')"
+        assert repr(Location()) == \
+               "Location(proto='ssh', user='user', host='host', port=None, path='/some/path', archive=None)"
+
+    def test_folder(self, monkeypatch):
+        monkeypatch.setenv('BORG_REPO', 'path')
+        assert repr(Location('::archive')) == \
+               "Location(proto='file', user=None, host=None, port=None, path='path', archive='archive')"
+        assert repr(Location()) == \
+               "Location(proto='file', user=None, host=None, port=None, path='path', archive=None)"
+
+    def test_abspath(self, monkeypatch):
+        monkeypatch.setenv('BORG_REPO', '/some/absolute/path')
+        assert repr(Location('::archive')) == \
+               "Location(proto='file', user=None, host=None, port=None, path='/some/absolute/path', archive='archive')"
+        assert repr(Location()) == \
+               "Location(proto='file', user=None, host=None, port=None, path='/some/absolute/path', archive=None)"
+
+    def test_relpath(self, monkeypatch):
+        monkeypatch.setenv('BORG_REPO', 'some/relative/path')
+        assert repr(Location('::archive')) == \
+               "Location(proto='file', user=None, host=None, port=None, path='some/relative/path', archive='archive')"
+        assert repr(Location()) == \
+               "Location(proto='file', user=None, host=None, port=None, path='some/relative/path', archive=None)"
 
 
 
 
 class FormatTimedeltaTestCase(BaseTestCase):
 class FormatTimedeltaTestCase(BaseTestCase):
@@ -105,6 +179,72 @@ class PatternTestCase(BaseTestCase):
                           ['/etc/passwd', '/etc/hosts', '/var/log/messages', '/var/log/dmesg'])
                           ['/etc/passwd', '/etc/hosts', '/var/log/messages', '/var/log/dmesg'])
 
 
 
 
+@pytest.mark.skipif(sys.platform in ('darwin',), reason='all but OS X test')
+class PatternNonAsciiTestCase(BaseTestCase):
+    def testComposedUnicode(self):
+        pattern = 'b\N{LATIN SMALL LETTER A WITH ACUTE}'
+        i = IncludePattern(pattern)
+        e = ExcludePattern(pattern)
+
+        assert i.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
+        assert not i.match("ba\N{COMBINING ACUTE ACCENT}/foo")
+        assert e.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
+        assert not e.match("ba\N{COMBINING ACUTE ACCENT}/foo")
+
+    def testDecomposedUnicode(self):
+        pattern = 'ba\N{COMBINING ACUTE ACCENT}'
+        i = IncludePattern(pattern)
+        e = ExcludePattern(pattern)
+
+        assert not i.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
+        assert i.match("ba\N{COMBINING ACUTE ACCENT}/foo")
+        assert not e.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
+        assert e.match("ba\N{COMBINING ACUTE ACCENT}/foo")
+    
+    def testInvalidUnicode(self):
+        pattern = str(b'ba\x80', 'latin1')
+        i = IncludePattern(pattern)
+        e = ExcludePattern(pattern)
+
+        assert not i.match("ba/foo")
+        assert i.match(str(b"ba\x80/foo", 'latin1'))
+        assert not e.match("ba/foo")
+        assert e.match(str(b"ba\x80/foo", 'latin1'))
+
+
+@pytest.mark.skipif(sys.platform not in ('darwin',), reason='OS X test')
+class OSXPatternNormalizationTestCase(BaseTestCase):
+    def testComposedUnicode(self):
+        pattern = 'b\N{LATIN SMALL LETTER A WITH ACUTE}'
+        i = IncludePattern(pattern)
+        e = ExcludePattern(pattern)
+
+        assert i.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
+        assert i.match("ba\N{COMBINING ACUTE ACCENT}/foo")
+        assert e.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
+        assert e.match("ba\N{COMBINING ACUTE ACCENT}/foo")
+    
+    def testDecomposedUnicode(self):
+        pattern = 'ba\N{COMBINING ACUTE ACCENT}'
+        i = IncludePattern(pattern)
+        e = ExcludePattern(pattern)
+
+        assert i.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
+        assert i.match("ba\N{COMBINING ACUTE ACCENT}/foo")
+        assert e.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
+        assert e.match("ba\N{COMBINING ACUTE ACCENT}/foo")
+    
+    def testInvalidUnicode(self):
+        pattern = str(b'ba\x80', 'latin1')
+        i = IncludePattern(pattern)
+        e = ExcludePattern(pattern)
+
+        assert not i.match("ba/foo")
+        assert i.match(str(b"ba\x80/foo", 'latin1'))
+        assert not e.match("ba/foo")
+        assert e.match(str(b"ba\x80/foo", 'latin1'))
+
+
 def test_compression_specs():
 def test_compression_specs():
     with pytest.raises(ValueError):
     with pytest.raises(ValueError):
         CompressionSpec('')
         CompressionSpec('')

+ 17 - 5
docs/usage.rst

@@ -41,9 +41,15 @@ Environment Variables
 
 
 |project_name| uses some environment variables for automation:
 |project_name| uses some environment variables for automation:
 
 
-Specifying a passphrase:
+General:
+    BORG_REPO
+        When set, use the value to give the default repository location. If a command needs an archive
+        parameter, you can abbreviate as `::archive`. If a command needs a repository parameter, you
+        can either leave it away or abbreviate as `::`, if a positional parameter is required.
     BORG_PASSPHRASE
     BORG_PASSPHRASE
         When set, use the value to answer the passphrase question for encrypted repositories.
         When set, use the value to answer the passphrase question for encrypted repositories.
+    TMPDIR
+        where temporary files are stored (might need a lot of temporary space for some operations)
 
 
 Some "yes" sayers (if set, they automatically confirm that you really want to do X even if there is that warning):
 Some "yes" sayers (if set, they automatically confirm that you really want to do X even if there is that warning):
     BORG_UNKNOWN_UNENCRYPTED_REPO_ACCESS_IS_OK
     BORG_UNKNOWN_UNENCRYPTED_REPO_ACCESS_IS_OK
@@ -64,10 +70,6 @@ Building:
     BORG_OPENSSL_PREFIX
     BORG_OPENSSL_PREFIX
         Adds given OpenSSL header file directory to the default locations (setup.py).
         Adds given OpenSSL header file directory to the default locations (setup.py).
 
 
-General:
-    TMPDIR
-        where temporary files are stored (might need a lot of temporary space for some operations)
-
 
 
 Please note:
 Please note:
 
 
@@ -210,6 +212,11 @@ Examples
     # Even slower, even higher compression (N = 0..9)
     # Even slower, even higher compression (N = 0..9)
     $ borg create --compression lzma,N /mnt/backup::repo ~
     $ borg create --compression lzma,N /mnt/backup::repo ~
 
 
+    # Backup some LV snapshots (you have to create the snapshots before this
+    # and remove them afterwards). We also backup the output of lvdisplay so
+    # we can see the LV sizes at restore time. See also "borg extract" examples.
+    $ lvdisplay > lvdisplay.txt
+    $ borg create --read-special /mnt/backup::repo lvdisplay.txt /dev/vg0/*-snapshot
 
 
 .. include:: usage/extract.rst.inc
 .. include:: usage/extract.rst.inc
 
 
@@ -229,6 +236,11 @@ Examples
     # Extract the "src" directory but exclude object files
     # Extract the "src" directory but exclude object files
     $ borg extract /mnt/backup::my-files home/USERNAME/src --exclude '*.o'
     $ borg extract /mnt/backup::my-files home/USERNAME/src --exclude '*.o'
 
 
+    # Restore LV snapshots (the target LVs /dev/vg0/* of correct size have
+    # to be already available and will be overwritten by this command!)
+    $ borg extract --stdout /mnt/backup::repo dev/vg0/root-snapshot > /dev/vg0/root
+    $ borg extract --stdout /mnt/backup::repo dev/vg0/home-snapshot > /dev/vg0/home
+
 Note: currently, extract always writes into the current working directory ("."),
 Note: currently, extract always writes into the current working directory ("."),
       so make sure you ``cd`` to the right place before calling ``borg extract``.
       so make sure you ``cd`` to the right place before calling ``borg extract``.