Browse Source

Merge branch 'master' of github.com:borgbackup/borg

Thomas Waldmann 9 years ago
parent
commit
601313836d
7 changed files with 259 additions and 40 deletions
  1. 48 18
      borg/archive.py
  2. 26 4
      borg/archiver.py
  3. 29 10
      borg/helpers.py
  4. 43 0
      borg/testsuite/archiver.py
  5. 46 0
      docs/usage.rst
  6. 9 0
      docs/usage/diff.rst.inc
  7. 58 8
      docs/usage/list.rst.inc

+ 48 - 18
borg/archive.py

@@ -298,7 +298,19 @@ Number of files: {0.stats.nfiles}'''.format(
         cache.rollback()
         cache.rollback()
         return stats
         return stats
 
 
-    def extract_item(self, item, restore_attrs=True, dry_run=False, stdout=False, sparse=False):
+    def extract_item(self, item, restore_attrs=True, dry_run=False, stdout=False, sparse=False,
+                     hardlink_masters=None, original_path=None):
+        """
+        Extract archive item.
+
+        :param item: the item to extract
+        :param restore_attrs: restore file attributes
+        :param dry_run: do not write any data
+        :param stdout: write extracted data to stdout
+        :param sparse: write sparse files (chunk-granularity, independent of the original being sparse)
+        :param hardlink_masters: maps paths to (chunks, link_target) for extracting subtrees with hardlinks correctly
+        :param original_path: b'path' key as stored in archive
+        """
         if dry_run or stdout:
         if dry_run or stdout:
             if b'chunks' in item:
             if b'chunks' in item:
                 for data in self.pipeline.fetch_many([c[0] for c in item[b'chunks']], is_preloaded=True):
                 for data in self.pipeline.fetch_many([c[0] for c in item[b'chunks']], is_preloaded=True):
@@ -308,6 +320,7 @@ Number of files: {0.stats.nfiles}'''.format(
                     sys.stdout.buffer.flush()
                     sys.stdout.buffer.flush()
             return
             return
 
 
+        original_path = original_path or item[b'path']
         dest = self.cwd
         dest = self.cwd
         if item[b'path'].startswith('/') or item[b'path'].startswith('..'):
         if item[b'path'].startswith('/') or item[b'path'].startswith('..'):
             raise Exception('Path should be relative and local')
             raise Exception('Path should be relative and local')
@@ -327,25 +340,36 @@ Number of files: {0.stats.nfiles}'''.format(
         if stat.S_ISREG(mode):
         if stat.S_ISREG(mode):
             if not os.path.exists(os.path.dirname(path)):
             if not os.path.exists(os.path.dirname(path)):
                 os.makedirs(os.path.dirname(path))
                 os.makedirs(os.path.dirname(path))
+
             # Hard link?
             # Hard link?
             if b'source' in item:
             if b'source' in item:
                 source = os.path.join(dest, item[b'source'])
                 source = os.path.join(dest, item[b'source'])
                 if os.path.exists(path):
                 if os.path.exists(path):
                     os.unlink(path)
                     os.unlink(path)
-                os.link(source, path)
-            else:
-                with open(path, 'wb') as fd:
-                    ids = [c[0] for c in item[b'chunks']]
-                    for data in self.pipeline.fetch_many(ids, is_preloaded=True):
-                        if sparse and self.zeros.startswith(data):
-                            # all-zero chunk: create a hole in a sparse file
-                            fd.seek(len(data), 1)
-                        else:
-                            fd.write(data)
-                    pos = fd.tell()
-                    fd.truncate(pos)
-                    fd.flush()
-                    self.restore_attrs(path, item, fd=fd.fileno())
+                if not hardlink_masters:
+                    os.link(source, path)
+                    return
+                item[b'chunks'], link_target = hardlink_masters[item[b'source']]
+                if link_target:
+                    # Hard link was extracted previously, just link
+                    os.link(link_target, path)
+                    return
+                # Extract chunks, since the item which had the chunks was not extracted
+            with open(path, 'wb') as fd:
+                ids = [c[0] for c in item[b'chunks']]
+                for data in self.pipeline.fetch_many(ids, is_preloaded=True):
+                    if sparse and self.zeros.startswith(data):
+                        # all-zero chunk: create a hole in a sparse file
+                        fd.seek(len(data), 1)
+                    else:
+                        fd.write(data)
+                pos = fd.tell()
+                fd.truncate(pos)
+                fd.flush()
+                self.restore_attrs(path, item, fd=fd.fileno())
+            if hardlink_masters:
+                # Update master entry with extracted file path, so that following hardlinks don't extract twice.
+                hardlink_masters[item.get(b'source') or original_path] = (None, path)
         elif stat.S_ISDIR(mode):
         elif stat.S_ISDIR(mode):
             if not os.path.exists(path):
             if not os.path.exists(path):
                 os.makedirs(path)
                 os.makedirs(path)
@@ -527,7 +551,10 @@ Number of files: {0.stats.nfiles}'''.format(
             source = self.hard_links.get((st.st_ino, st.st_dev))
             source = self.hard_links.get((st.st_ino, st.st_dev))
             if (st.st_ino, st.st_dev) in self.hard_links:
             if (st.st_ino, st.st_dev) in self.hard_links:
                 item = self.stat_attrs(st, path)
                 item = self.stat_attrs(st, path)
-                item.update({b'path': safe_path, b'source': source})
+                item.update({
+                    b'path': safe_path,
+                    b'source': source,
+                })
                 self.add_item(item)
                 self.add_item(item)
                 status = 'h'  # regular file, hardlink (to already seen inodes)
                 status = 'h'  # regular file, hardlink (to already seen inodes)
                 return status
                 return status
@@ -549,7 +576,10 @@ Number of files: {0.stats.nfiles}'''.format(
                 status = 'U'  # regular file, unchanged
                 status = 'U'  # regular file, unchanged
         else:
         else:
             status = 'A'  # regular file, added
             status = 'A'  # regular file, added
-        item = {b'path': safe_path}
+        item = {
+            b'path': safe_path,
+            b'hardlink_master': st.st_nlink > 1,  # item is a hard link and has the chunks
+        }
         # Only chunkify the file if needed
         # Only chunkify the file if needed
         if chunks is None:
         if chunks is None:
             fh = Archive._open_rb(path)
             fh = Archive._open_rb(path)
@@ -587,7 +617,7 @@ Number of files: {0.stats.nfiles}'''.format(
 
 
 
 
 # this set must be kept complete, otherwise the RobustUnpacker might malfunction:
 # this set must be kept complete, otherwise the RobustUnpacker might malfunction:
-ITEM_KEYS = set([b'path', b'source', b'rdev', b'chunks',
+ITEM_KEYS = set([b'path', b'source', b'rdev', b'chunks', b'hardlink_master',
                  b'mode', b'user', b'group', b'uid', b'gid', b'mtime', b'atime', b'ctime',
                  b'mode', b'user', b'group', b'uid', b'gid', b'mtime', b'atime', b'ctime',
                  b'xattrs', b'bsdflags', b'acl_nfs4', b'acl_access', b'acl_default', b'acl_extended', ])
                  b'xattrs', b'bsdflags', b'acl_nfs4', b'acl_access', b'acl_default', b'acl_extended', ])
 
 

+ 26 - 4
borg/archiver.py

@@ -359,8 +359,20 @@ class Archiver:
         sparse = args.sparse
         sparse = args.sparse
         strip_components = args.strip_components
         strip_components = args.strip_components
         dirs = []
         dirs = []
-        for item in archive.iter_items(lambda item: matcher.match(item[b'path']), preload=True):
+        partial_extract = not matcher.empty() or strip_components
+        hardlink_masters = {} if partial_extract else None
+
+        def item_is_hardlink_master(item):
+            return (partial_extract and stat.S_ISREG(item[b'mode']) and
+                    item.get(b'hardlink_master', True) and b'source' not in item)
+
+        for item in archive.iter_items(preload=True,
+                filter=lambda item: item_is_hardlink_master(item) or matcher.match(item[b'path'])):
             orig_path = item[b'path']
             orig_path = item[b'path']
+            if item_is_hardlink_master(item):
+                hardlink_masters[orig_path] = (item.get(b'chunks'), item.get(b'source'))
+            if not matcher.match(item[b'path']):
+                continue
             if strip_components:
             if strip_components:
                 item[b'path'] = os.sep.join(orig_path.split(os.sep)[strip_components:])
                 item[b'path'] = os.sep.join(orig_path.split(os.sep)[strip_components:])
                 if not item[b'path']:
                 if not item[b'path']:
@@ -378,7 +390,8 @@ class Archiver:
                         dirs.append(item)
                         dirs.append(item)
                         archive.extract_item(item, restore_attrs=False)
                         archive.extract_item(item, restore_attrs=False)
                     else:
                     else:
-                        archive.extract_item(item, stdout=stdout, sparse=sparse)
+                        archive.extract_item(item, stdout=stdout, sparse=sparse, hardlink_masters=hardlink_masters,
+                                             original_path=orig_path)
             except OSError as e:
             except OSError as e:
                 self.print_warning('%s: %s', remove_surrogates(orig_path), e)
                 self.print_warning('%s: %s', remove_surrogates(orig_path), e)
 
 
@@ -1205,6 +1218,15 @@ class Archiver:
             Both archives need to be in the same repository, and a repository location may only
             Both archives need to be in the same repository, and a repository location may only
             be specified for ARCHIVE1.
             be specified for ARCHIVE1.
 
 
+            For archives created with Borg 1.1 or newer diff automatically detects whether
+            the archives are created with the same chunker params. If so, only chunk IDs
+            are compared, which is very fast.
+
+            For archives prior to Borg 1.1 chunk contents are compared by default.
+            If you did not create the archives with different chunker params,
+            pass --same-chunker-params.
+            Note that the chunker params changed from Borg 0.xx to 1.0.
+
             See the output of the "borg help patterns" command for more help on exclude patterns.
             See the output of the "borg help patterns" command for more help on exclude patterns.
             """)
             """)
         subparser = subparsers.add_parser('diff', parents=[common_parser],
         subparser = subparsers.add_parser('diff', parents=[common_parser],
@@ -1282,7 +1304,7 @@ class Archiver:
 
 
         See the "borg help patterns" command for more help on exclude patterns.
         See the "borg help patterns" command for more help on exclude patterns.
 
 
-        The following keys are available for --format:
+        The following keys are available for --format when listing files:
 
 
         """) + ItemFormatter.keys_help()
         """) + ItemFormatter.keys_help()
         subparser = subparsers.add_parser('list', parents=[common_parser],
         subparser = subparsers.add_parser('list', parents=[common_parser],
@@ -1309,7 +1331,7 @@ class Archiver:
                                type=location_validator(),
                                type=location_validator(),
                                help='repository/archive to list contents of')
                                help='repository/archive to list contents of')
         subparser.add_argument('paths', metavar='PATH', nargs='*', type=str,
         subparser.add_argument('paths', metavar='PATH', nargs='*', type=str,
-                               help='paths to extract; patterns are supported')
+                               help='paths to list; patterns are supported')
 
 
         mount_epilog = textwrap.dedent("""
         mount_epilog = textwrap.dedent("""
         This command mounts an archive as a FUSE filesystem. This can be useful for
         This command mounts an archive as a FUSE filesystem. This can be useful for

+ 29 - 10
borg/helpers.py

@@ -293,6 +293,9 @@ class PatternMatcher:
         # Value to return from match function when none of the patterns match.
         # Value to return from match function when none of the patterns match.
         self.fallback = fallback
         self.fallback = fallback
 
 
+    def empty(self):
+        return not len(self._items)
+
     def add(self, patterns, value):
     def add(self, patterns, value):
         """Add list of patterns to internal list. The given value is returned from the match function when one of the
         """Add list of patterns to internal list. The given value is returned from the match function when one of the
         given patterns matches.
         given patterns matches.
@@ -1125,16 +1128,27 @@ class ItemFormatter:
         'NL': os.linesep,
         'NL': os.linesep,
     }
     }
     KEY_DESCRIPTIONS = {
     KEY_DESCRIPTIONS = {
-        'NEWLINE': 'OS dependent line separator',
-        'NL': 'alias of NEWLINE',
-        'NUL': 'NUL character for creating print0 / xargs -0 like ouput, see bpath',
-        'csize': 'compressed size',
         'bpath': 'verbatim POSIX path, can contain any character except NUL',
         'bpath': 'verbatim POSIX path, can contain any character except NUL',
         'path': 'path interpreted as text (might be missing non-text characters, see bpath)',
         'path': 'path interpreted as text (might be missing non-text characters, see bpath)',
         'source': 'link target for links (identical to linktarget)',
         'source': 'link target for links (identical to linktarget)',
+        'extra': 'prepends {source} with " -> " for soft links and " link to " for hard links',
+
+        'csize': 'compressed size',
         'num_chunks': 'number of chunks in this file',
         'num_chunks': 'number of chunks in this file',
         'unique_chunks': 'number of unique chunks in this file',
         'unique_chunks': 'number of unique chunks in this file',
+
+        'NEWLINE': 'OS dependent line separator',
+        'NL': 'alias of NEWLINE',
+        'NUL': 'NUL character for creating print0 / xargs -0 like ouput, see bpath',
     }
     }
+    KEY_GROUPS = (
+        ('type', 'mode', 'uid', 'gid', 'user', 'group', 'path', 'bpath', 'source', 'linktarget'),
+        ('size', 'csize', 'num_chunks', 'unique_chunks'),
+        ('mtime', 'ctime', 'atime', 'isomtime', 'isoctime', 'isoatime'),
+        tuple(sorted(hashlib.algorithms_guaranteed)),
+        ('archiveid', 'archivename', 'extra'),
+        ('NEWLINE', 'NL', 'NUL', 'SPACE', 'TAB', 'CR', 'LF'),
+    )
 
 
     @classmethod
     @classmethod
     def available_keys(cls):
     def available_keys(cls):
@@ -1149,16 +1163,21 @@ class ItemFormatter:
         keys = []
         keys = []
         keys.extend(formatter.call_keys.keys())
         keys.extend(formatter.call_keys.keys())
         keys.extend(formatter.get_item_data(fake_item).keys())
         keys.extend(formatter.get_item_data(fake_item).keys())
-        return sorted(keys, key=lambda s: (s.isupper(), s))
+        return keys
 
 
     @classmethod
     @classmethod
     def keys_help(cls):
     def keys_help(cls):
         help = []
         help = []
-        for key in cls.available_keys():
-            text = " - " + key
-            if key in cls.KEY_DESCRIPTIONS:
-                text += ": " + cls.KEY_DESCRIPTIONS[key]
-            help.append(text)
+        keys = cls.available_keys()
+        for group in cls.KEY_GROUPS:
+            for key in group:
+                keys.remove(key)
+                text = " - " + key
+                if key in cls.KEY_DESCRIPTIONS:
+                    text += ": " + cls.KEY_DESCRIPTIONS[key]
+                help.append(text)
+            help.append("")
+        assert not keys, str(keys)
         return "\n".join(help)
         return "\n".join(help)
 
 
     def __init__(self, archive, format):
     def __init__(self, archive, format):

+ 43 - 0
borg/testsuite/archiver.py

@@ -467,6 +467,49 @@ class ArchiverTestCase(ArchiverTestCaseBase):
             with self.assert_creates_file('input/dir/file'):
             with self.assert_creates_file('input/dir/file'):
                 self.cmd('extract', self.repository_location + '::test', '--strip-components', '0')
                 self.cmd('extract', self.repository_location + '::test', '--strip-components', '0')
 
 
+    def _extract_hardlinks_setup(self):
+        os.mkdir(os.path.join(self.input_path, 'dir1'))
+        os.mkdir(os.path.join(self.input_path, 'dir1/subdir'))
+
+        self.create_regular_file('source')
+        os.link(os.path.join(self.input_path, 'source'),
+                os.path.join(self.input_path, 'abba'))
+        os.link(os.path.join(self.input_path, 'source'),
+                os.path.join(self.input_path, 'dir1/hardlink'))
+        os.link(os.path.join(self.input_path, 'source'),
+                os.path.join(self.input_path, 'dir1/subdir/hardlink'))
+
+        self.create_regular_file('dir1/source2')
+        os.link(os.path.join(self.input_path, 'dir1/source2'),
+                os.path.join(self.input_path, 'dir1/aaaa'))
+
+        self.cmd('init', self.repository_location)
+        self.cmd('create', self.repository_location + '::test', 'input')
+
+    def test_strip_components_links(self):
+        self._extract_hardlinks_setup()
+        with changedir('output'):
+            self.cmd('extract', self.repository_location + '::test', '--strip-components', '2')
+            assert os.stat('hardlink').st_nlink == 2
+            assert os.stat('subdir/hardlink').st_nlink == 2
+            assert os.stat('aaaa').st_nlink == 2
+            assert os.stat('source2').st_nlink == 2
+        with changedir('output'):
+            self.cmd('extract', self.repository_location + '::test')
+            assert os.stat('input/dir1/hardlink').st_nlink == 4
+
+    def test_extract_hardlinks(self):
+        self._extract_hardlinks_setup()
+        with changedir('output'):
+            self.cmd('extract', self.repository_location + '::test', 'input/dir1')
+            assert os.stat('input/dir1/hardlink').st_nlink == 2
+            assert os.stat('input/dir1/subdir/hardlink').st_nlink == 2
+            assert os.stat('input/dir1/aaaa').st_nlink == 2
+            assert os.stat('input/dir1/source2').st_nlink == 2
+        with changedir('output'):
+            self.cmd('extract', self.repository_location + '::test')
+            assert os.stat('input/dir1/hardlink').st_nlink == 4
+
     def test_extract_include_exclude(self):
     def test_extract_include_exclude(self):
         self.cmd('init', self.repository_location)
         self.cmd('init', self.repository_location)
         self.create_regular_file('file1', size=1024 * 80)
         self.create_regular_file('file1', size=1024 * 80)

+ 46 - 0
docs/usage.rst

@@ -374,6 +374,52 @@ Examples
     ...
     ...
 
 
 
 
+
+.. include:: usage/diff.rst.inc
+
+Examples
+~~~~~~~~
+::
+
+    $ borg init testrepo
+    $ mkdir testdir
+    $ cd testdir
+    $ echo asdf > file1
+    $ dd if=/dev/urandom bs=1M count=4 > file2
+    $ touch file3
+    $ borg create ../testrepo::archive1 .
+
+    $ chmod a+x file1
+    $ echo "something" >> file2
+    $ borg create ../testrepo::archive2 .
+
+    $ rm file3
+    $ touch file4
+    $ borg create ../testrepo::archive3 .
+
+    $ cd ..
+    $ borg diff testrepo::archive1 archive2
+    file1 different mode
+             archive1 -rw-r--r--
+             archive2 -rwxr-xr-x
+    file2 different contents
+             +28 B, -31 B, 4.19 MB, 4.19 MB
+
+    $ borg diff testrepo::archive2 archive3
+    file3 different contents
+             +0 B, -0 B, 0 B, <deleted>
+
+    $ borg diff testrepo::archive1 archive3
+    file1 different mode
+             archive1 -rw-r--r--
+             archive3 -rwxr-xr-x
+    file2 different contents
+             +28 B, -31 B, 4.19 MB, 4.19 MB
+    file3 different contents
+             +0 B, -0 B, 0 B, <deleted>
+    file4 different contents
+             +0 B, -0 B, <deleted>, 0 B
+
 .. include:: usage/delete.rst.inc
 .. include:: usage/delete.rst.inc
 
 
 Examples
 Examples

+ 9 - 0
docs/usage/diff.rst.inc

@@ -48,4 +48,13 @@ This command finds differences in files (contents, user, group, mode) between ar
 Both archives need to be in the same repository, and a repository location may only
 Both archives need to be in the same repository, and a repository location may only
 be specified for ARCHIVE1.
 be specified for ARCHIVE1.
 
 
+For archives created with Borg 1.1 or newer diff automatically detects whether
+the archives are created with the same chunker params. If so, only chunk IDs
+are compared, which is very fast.
+
+For archives prior to Borg 1.1 chunk contents are compared by default.
+If you did not create the archives with different chunker params,
+pass --same-chunker-params.
+Note that the chunker params changed from Borg 0.xx to 1.0.
+
 See the output of the "borg help patterns" command for more help on exclude patterns.
 See the output of the "borg help patterns" command for more help on exclude patterns.

+ 58 - 8
docs/usage/list.rst.inc

@@ -6,15 +6,16 @@ borg list
 
 
     usage: borg list [-h] [-v] [--debug] [--lock-wait N] [--show-version]
     usage: borg list [-h] [-v] [--debug] [--lock-wait N] [--show-version]
                      [--show-rc] [--no-files-cache] [--umask M]
                      [--show-rc] [--no-files-cache] [--umask M]
-                     [--remote-path PATH] [--short] [--list-format LISTFORMAT]
-                     [-P PREFIX]
-                     [REPOSITORY_OR_ARCHIVE]
+                     [--remote-path PATH] [--short] [--format FORMAT] [-P PREFIX]
+                     [-e PATTERN] [--exclude-from EXCLUDEFILE]
+                     [REPOSITORY_OR_ARCHIVE] [PATH [PATH ...]]
     
     
     List archive or repository contents
     List archive or repository contents
     
     
     positional arguments:
     positional arguments:
       REPOSITORY_OR_ARCHIVE
       REPOSITORY_OR_ARCHIVE
                             repository/archive to list contents of
                             repository/archive to list contents of
+      PATH                  paths to list; patterns are supported
     
     
     optional arguments:
     optional arguments:
       -h, --help            show this help message and exit
       -h, --help            show this help message and exit
@@ -30,15 +31,64 @@ borg list
       --umask M             set umask to M (local and remote, default: 0077)
       --umask M             set umask to M (local and remote, default: 0077)
       --remote-path PATH    set remote path to executable (default: "borg")
       --remote-path PATH    set remote path to executable (default: "borg")
       --short               only print file/directory names, nothing else
       --short               only print file/directory names, nothing else
-      --list-format LISTFORMAT
-                            specify format for archive file listing (default:
-                            "{mode} {user:6} {group:6} {size:8d} {isomtime}
-                            {path}{extra}{NEWLINE}") Special "{formatkeys}" exists
-                            to list available keys
+      --format FORMAT, --list-format FORMAT
+                            specify format for file listing (default: "{mode}
+                            {user:6} {group:6} {size:8d} {isomtime}
+                            {path}{extra}{NL}")
       -P PREFIX, --prefix PREFIX
       -P PREFIX, --prefix PREFIX
                             only consider archive names starting with this prefix
                             only consider archive names starting with this prefix
+      -e PATTERN, --exclude PATTERN
+                            exclude paths matching PATTERN
+      --exclude-from EXCLUDEFILE
+                            read exclude patterns from EXCLUDEFILE, one per line
     
     
 Description
 Description
 ~~~~~~~~~~~
 ~~~~~~~~~~~
 
 
 This command lists the contents of a repository or an archive.
 This command lists the contents of a repository or an archive.
+
+See the "borg help patterns" command for more help on exclude patterns.
+
+The following keys are available for --format when listing files:
+
+ - type
+ - mode
+ - uid
+ - gid
+ - user
+ - group
+ - path: path interpreted as text (might be missing non-text characters, see bpath)
+ - bpath: verbatim POSIX path, can contain any character except NUL
+ - source: link target for links (identical to linktarget)
+ - linktarget
+
+ - size
+ - csize: compressed size
+ - num_chunks: number of chunks in this file
+ - unique_chunks: number of unique chunks in this file
+
+ - mtime
+ - ctime
+ - atime
+ - isomtime
+ - isoctime
+ - isoatime
+
+ - md5
+ - sha1
+ - sha224
+ - sha256
+ - sha384
+ - sha512
+
+ - archiveid
+ - archivename
+ - extra: prepends {source} with " -> " for soft links and " link to " for hard links
+
+ - NEWLINE: OS dependent line separator
+ - NL: alias of NEWLINE
+ - NUL: NUL character for creating print0 / xargs -0 like ouput, see bpath
+ - SPACE
+ - TAB
+ - CR
+ - LF