Browse Source

Merge branch 'master' of github.com:borgbackup/borg

Thomas Waldmann 9 years ago
parent
commit
601313836d
7 changed files with 259 additions and 40 deletions
  1. 48 18
      borg/archive.py
  2. 26 4
      borg/archiver.py
  3. 29 10
      borg/helpers.py
  4. 43 0
      borg/testsuite/archiver.py
  5. 46 0
      docs/usage.rst
  6. 9 0
      docs/usage/diff.rst.inc
  7. 58 8
      docs/usage/list.rst.inc

+ 48 - 18
borg/archive.py

@@ -298,7 +298,19 @@ Number of files: {0.stats.nfiles}'''.format(
         cache.rollback()
         return stats
 
-    def extract_item(self, item, restore_attrs=True, dry_run=False, stdout=False, sparse=False):
+    def extract_item(self, item, restore_attrs=True, dry_run=False, stdout=False, sparse=False,
+                     hardlink_masters=None, original_path=None):
+        """
+        Extract archive item.
+
+        :param item: the item to extract
+        :param restore_attrs: restore file attributes
+        :param dry_run: do not write any data
+        :param stdout: write extracted data to stdout
+        :param sparse: write sparse files (chunk-granularity, independent of the original being sparse)
+        :param hardlink_masters: maps paths to (chunks, link_target) for extracting subtrees with hardlinks correctly
+        :param original_path: b'path' key as stored in archive
+        """
         if dry_run or stdout:
             if b'chunks' in item:
                 for data in self.pipeline.fetch_many([c[0] for c in item[b'chunks']], is_preloaded=True):
@@ -308,6 +320,7 @@ Number of files: {0.stats.nfiles}'''.format(
                     sys.stdout.buffer.flush()
             return
 
+        original_path = original_path or item[b'path']
         dest = self.cwd
         if item[b'path'].startswith('/') or item[b'path'].startswith('..'):
             raise Exception('Path should be relative and local')
@@ -327,25 +340,36 @@ Number of files: {0.stats.nfiles}'''.format(
         if stat.S_ISREG(mode):
             if not os.path.exists(os.path.dirname(path)):
                 os.makedirs(os.path.dirname(path))
+
             # Hard link?
             if b'source' in item:
                 source = os.path.join(dest, item[b'source'])
                 if os.path.exists(path):
                     os.unlink(path)
-                os.link(source, path)
-            else:
-                with open(path, 'wb') as fd:
-                    ids = [c[0] for c in item[b'chunks']]
-                    for data in self.pipeline.fetch_many(ids, is_preloaded=True):
-                        if sparse and self.zeros.startswith(data):
-                            # all-zero chunk: create a hole in a sparse file
-                            fd.seek(len(data), 1)
-                        else:
-                            fd.write(data)
-                    pos = fd.tell()
-                    fd.truncate(pos)
-                    fd.flush()
-                    self.restore_attrs(path, item, fd=fd.fileno())
+                if not hardlink_masters:
+                    os.link(source, path)
+                    return
+                item[b'chunks'], link_target = hardlink_masters[item[b'source']]
+                if link_target:
+                    # Hard link was extracted previously, just link
+                    os.link(link_target, path)
+                    return
+                # Extract chunks, since the item which had the chunks was not extracted
+            with open(path, 'wb') as fd:
+                ids = [c[0] for c in item[b'chunks']]
+                for data in self.pipeline.fetch_many(ids, is_preloaded=True):
+                    if sparse and self.zeros.startswith(data):
+                        # all-zero chunk: create a hole in a sparse file
+                        fd.seek(len(data), 1)
+                    else:
+                        fd.write(data)
+                pos = fd.tell()
+                fd.truncate(pos)
+                fd.flush()
+                self.restore_attrs(path, item, fd=fd.fileno())
+            if hardlink_masters:
+                # Update master entry with extracted file path, so that following hardlinks don't extract twice.
+                hardlink_masters[item.get(b'source') or original_path] = (None, path)
         elif stat.S_ISDIR(mode):
             if not os.path.exists(path):
                 os.makedirs(path)
@@ -527,7 +551,10 @@ Number of files: {0.stats.nfiles}'''.format(
             source = self.hard_links.get((st.st_ino, st.st_dev))
             if (st.st_ino, st.st_dev) in self.hard_links:
                 item = self.stat_attrs(st, path)
-                item.update({b'path': safe_path, b'source': source})
+                item.update({
+                    b'path': safe_path,
+                    b'source': source,
+                })
                 self.add_item(item)
                 status = 'h'  # regular file, hardlink (to already seen inodes)
                 return status
@@ -549,7 +576,10 @@ Number of files: {0.stats.nfiles}'''.format(
                 status = 'U'  # regular file, unchanged
         else:
             status = 'A'  # regular file, added
-        item = {b'path': safe_path}
+        item = {
+            b'path': safe_path,
+            b'hardlink_master': st.st_nlink > 1,  # item is a hard link and has the chunks
+        }
         # Only chunkify the file if needed
         if chunks is None:
             fh = Archive._open_rb(path)
@@ -587,7 +617,7 @@ Number of files: {0.stats.nfiles}'''.format(
 
 
 # this set must be kept complete, otherwise the RobustUnpacker might malfunction:
-ITEM_KEYS = set([b'path', b'source', b'rdev', b'chunks',
+ITEM_KEYS = set([b'path', b'source', b'rdev', b'chunks', b'hardlink_master',
                  b'mode', b'user', b'group', b'uid', b'gid', b'mtime', b'atime', b'ctime',
                  b'xattrs', b'bsdflags', b'acl_nfs4', b'acl_access', b'acl_default', b'acl_extended', ])
 

+ 26 - 4
borg/archiver.py

@@ -359,8 +359,20 @@ class Archiver:
         sparse = args.sparse
         strip_components = args.strip_components
         dirs = []
-        for item in archive.iter_items(lambda item: matcher.match(item[b'path']), preload=True):
+        partial_extract = not matcher.empty() or strip_components
+        hardlink_masters = {} if partial_extract else None
+
+        def item_is_hardlink_master(item):
+            return (partial_extract and stat.S_ISREG(item[b'mode']) and
+                    item.get(b'hardlink_master', True) and b'source' not in item)
+
+        for item in archive.iter_items(preload=True,
+                filter=lambda item: item_is_hardlink_master(item) or matcher.match(item[b'path'])):
             orig_path = item[b'path']
+            if item_is_hardlink_master(item):
+                hardlink_masters[orig_path] = (item.get(b'chunks'), item.get(b'source'))
+            if not matcher.match(item[b'path']):
+                continue
             if strip_components:
                 item[b'path'] = os.sep.join(orig_path.split(os.sep)[strip_components:])
                 if not item[b'path']:
@@ -378,7 +390,8 @@ class Archiver:
                         dirs.append(item)
                         archive.extract_item(item, restore_attrs=False)
                     else:
-                        archive.extract_item(item, stdout=stdout, sparse=sparse)
+                        archive.extract_item(item, stdout=stdout, sparse=sparse, hardlink_masters=hardlink_masters,
+                                             original_path=orig_path)
             except OSError as e:
                 self.print_warning('%s: %s', remove_surrogates(orig_path), e)
 
@@ -1205,6 +1218,15 @@ class Archiver:
             Both archives need to be in the same repository, and a repository location may only
             be specified for ARCHIVE1.
 
+            For archives created with Borg 1.1 or newer diff automatically detects whether
+            the archives are created with the same chunker params. If so, only chunk IDs
+            are compared, which is very fast.
+
+            For archives prior to Borg 1.1 chunk contents are compared by default.
+            If you did not create the archives with different chunker params,
+            pass --same-chunker-params.
+            Note that the chunker params changed from Borg 0.xx to 1.0.
+
             See the output of the "borg help patterns" command for more help on exclude patterns.
             """)
         subparser = subparsers.add_parser('diff', parents=[common_parser],
@@ -1282,7 +1304,7 @@ class Archiver:
 
         See the "borg help patterns" command for more help on exclude patterns.
 
-        The following keys are available for --format:
+        The following keys are available for --format when listing files:
 
         """) + ItemFormatter.keys_help()
         subparser = subparsers.add_parser('list', parents=[common_parser],
@@ -1309,7 +1331,7 @@ class Archiver:
                                type=location_validator(),
                                help='repository/archive to list contents of')
         subparser.add_argument('paths', metavar='PATH', nargs='*', type=str,
-                               help='paths to extract; patterns are supported')
+                               help='paths to list; patterns are supported')
 
         mount_epilog = textwrap.dedent("""
         This command mounts an archive as a FUSE filesystem. This can be useful for

+ 29 - 10
borg/helpers.py

@@ -293,6 +293,9 @@ class PatternMatcher:
         # Value to return from match function when none of the patterns match.
         self.fallback = fallback
 
+    def empty(self):
+        return not len(self._items)
+
     def add(self, patterns, value):
         """Add list of patterns to internal list. The given value is returned from the match function when one of the
         given patterns matches.
@@ -1125,16 +1128,27 @@ class ItemFormatter:
         'NL': os.linesep,
     }
     KEY_DESCRIPTIONS = {
-        'NEWLINE': 'OS dependent line separator',
-        'NL': 'alias of NEWLINE',
-        'NUL': 'NUL character for creating print0 / xargs -0 like ouput, see bpath',
-        'csize': 'compressed size',
         'bpath': 'verbatim POSIX path, can contain any character except NUL',
         'path': 'path interpreted as text (might be missing non-text characters, see bpath)',
         'source': 'link target for links (identical to linktarget)',
+        'extra': 'prepends {source} with " -> " for soft links and " link to " for hard links',
+
+        'csize': 'compressed size',
         'num_chunks': 'number of chunks in this file',
         'unique_chunks': 'number of unique chunks in this file',
+
+        'NEWLINE': 'OS dependent line separator',
+        'NL': 'alias of NEWLINE',
+        'NUL': 'NUL character for creating print0 / xargs -0 like ouput, see bpath',
     }
+    KEY_GROUPS = (
+        ('type', 'mode', 'uid', 'gid', 'user', 'group', 'path', 'bpath', 'source', 'linktarget'),
+        ('size', 'csize', 'num_chunks', 'unique_chunks'),
+        ('mtime', 'ctime', 'atime', 'isomtime', 'isoctime', 'isoatime'),
+        tuple(sorted(hashlib.algorithms_guaranteed)),
+        ('archiveid', 'archivename', 'extra'),
+        ('NEWLINE', 'NL', 'NUL', 'SPACE', 'TAB', 'CR', 'LF'),
+    )
 
     @classmethod
     def available_keys(cls):
@@ -1149,16 +1163,21 @@ class ItemFormatter:
         keys = []
         keys.extend(formatter.call_keys.keys())
         keys.extend(formatter.get_item_data(fake_item).keys())
-        return sorted(keys, key=lambda s: (s.isupper(), s))
+        return keys
 
     @classmethod
     def keys_help(cls):
         help = []
-        for key in cls.available_keys():
-            text = " - " + key
-            if key in cls.KEY_DESCRIPTIONS:
-                text += ": " + cls.KEY_DESCRIPTIONS[key]
-            help.append(text)
+        keys = cls.available_keys()
+        for group in cls.KEY_GROUPS:
+            for key in group:
+                keys.remove(key)
+                text = " - " + key
+                if key in cls.KEY_DESCRIPTIONS:
+                    text += ": " + cls.KEY_DESCRIPTIONS[key]
+                help.append(text)
+            help.append("")
+        assert not keys, str(keys)
         return "\n".join(help)
 
     def __init__(self, archive, format):

+ 43 - 0
borg/testsuite/archiver.py

@@ -467,6 +467,49 @@ class ArchiverTestCase(ArchiverTestCaseBase):
             with self.assert_creates_file('input/dir/file'):
                 self.cmd('extract', self.repository_location + '::test', '--strip-components', '0')
 
+    def _extract_hardlinks_setup(self):
+        os.mkdir(os.path.join(self.input_path, 'dir1'))
+        os.mkdir(os.path.join(self.input_path, 'dir1/subdir'))
+
+        self.create_regular_file('source')
+        os.link(os.path.join(self.input_path, 'source'),
+                os.path.join(self.input_path, 'abba'))
+        os.link(os.path.join(self.input_path, 'source'),
+                os.path.join(self.input_path, 'dir1/hardlink'))
+        os.link(os.path.join(self.input_path, 'source'),
+                os.path.join(self.input_path, 'dir1/subdir/hardlink'))
+
+        self.create_regular_file('dir1/source2')
+        os.link(os.path.join(self.input_path, 'dir1/source2'),
+                os.path.join(self.input_path, 'dir1/aaaa'))
+
+        self.cmd('init', self.repository_location)
+        self.cmd('create', self.repository_location + '::test', 'input')
+
+    def test_strip_components_links(self):
+        self._extract_hardlinks_setup()
+        with changedir('output'):
+            self.cmd('extract', self.repository_location + '::test', '--strip-components', '2')
+            assert os.stat('hardlink').st_nlink == 2
+            assert os.stat('subdir/hardlink').st_nlink == 2
+            assert os.stat('aaaa').st_nlink == 2
+            assert os.stat('source2').st_nlink == 2
+        with changedir('output'):
+            self.cmd('extract', self.repository_location + '::test')
+            assert os.stat('input/dir1/hardlink').st_nlink == 4
+
+    def test_extract_hardlinks(self):
+        self._extract_hardlinks_setup()
+        with changedir('output'):
+            self.cmd('extract', self.repository_location + '::test', 'input/dir1')
+            assert os.stat('input/dir1/hardlink').st_nlink == 2
+            assert os.stat('input/dir1/subdir/hardlink').st_nlink == 2
+            assert os.stat('input/dir1/aaaa').st_nlink == 2
+            assert os.stat('input/dir1/source2').st_nlink == 2
+        with changedir('output'):
+            self.cmd('extract', self.repository_location + '::test')
+            assert os.stat('input/dir1/hardlink').st_nlink == 4
+
     def test_extract_include_exclude(self):
         self.cmd('init', self.repository_location)
         self.create_regular_file('file1', size=1024 * 80)

+ 46 - 0
docs/usage.rst

@@ -374,6 +374,52 @@ Examples
     ...
 
 
+
+.. include:: usage/diff.rst.inc
+
+Examples
+~~~~~~~~
+::
+
+    $ borg init testrepo
+    $ mkdir testdir
+    $ cd testdir
+    $ echo asdf > file1
+    $ dd if=/dev/urandom bs=1M count=4 > file2
+    $ touch file3
+    $ borg create ../testrepo::archive1 .
+
+    $ chmod a+x file1
+    $ echo "something" >> file2
+    $ borg create ../testrepo::archive2 .
+
+    $ rm file3
+    $ touch file4
+    $ borg create ../testrepo::archive3 .
+
+    $ cd ..
+    $ borg diff testrepo::archive1 archive2
+    file1 different mode
+             archive1 -rw-r--r--
+             archive2 -rwxr-xr-x
+    file2 different contents
+             +28 B, -31 B, 4.19 MB, 4.19 MB
+
+    $ borg diff testrepo::archive2 archive3
+    file3 different contents
+             +0 B, -0 B, 0 B, <deleted>
+
+    $ borg diff testrepo::archive1 archive3
+    file1 different mode
+             archive1 -rw-r--r--
+             archive3 -rwxr-xr-x
+    file2 different contents
+             +28 B, -31 B, 4.19 MB, 4.19 MB
+    file3 different contents
+             +0 B, -0 B, 0 B, <deleted>
+    file4 different contents
+             +0 B, -0 B, <deleted>, 0 B
+
 .. include:: usage/delete.rst.inc
 
 Examples

+ 9 - 0
docs/usage/diff.rst.inc

@@ -48,4 +48,13 @@ This command finds differences in files (contents, user, group, mode) between ar
 Both archives need to be in the same repository, and a repository location may only
 be specified for ARCHIVE1.
 
+For archives created with Borg 1.1 or newer diff automatically detects whether
+the archives are created with the same chunker params. If so, only chunk IDs
+are compared, which is very fast.
+
+For archives prior to Borg 1.1 chunk contents are compared by default.
+If you did not create the archives with different chunker params,
+pass --same-chunker-params.
+Note that the chunker params changed from Borg 0.xx to 1.0.
+
 See the output of the "borg help patterns" command for more help on exclude patterns.

+ 58 - 8
docs/usage/list.rst.inc

@@ -6,15 +6,16 @@ borg list
 
     usage: borg list [-h] [-v] [--debug] [--lock-wait N] [--show-version]
                      [--show-rc] [--no-files-cache] [--umask M]
-                     [--remote-path PATH] [--short] [--list-format LISTFORMAT]
-                     [-P PREFIX]
-                     [REPOSITORY_OR_ARCHIVE]
+                     [--remote-path PATH] [--short] [--format FORMAT] [-P PREFIX]
+                     [-e PATTERN] [--exclude-from EXCLUDEFILE]
+                     [REPOSITORY_OR_ARCHIVE] [PATH [PATH ...]]
     
     List archive or repository contents
     
     positional arguments:
       REPOSITORY_OR_ARCHIVE
                             repository/archive to list contents of
+      PATH                  paths to list; patterns are supported
     
     optional arguments:
       -h, --help            show this help message and exit
@@ -30,15 +31,64 @@ borg list
       --umask M             set umask to M (local and remote, default: 0077)
       --remote-path PATH    set remote path to executable (default: "borg")
       --short               only print file/directory names, nothing else
-      --list-format LISTFORMAT
-                            specify format for archive file listing (default:
-                            "{mode} {user:6} {group:6} {size:8d} {isomtime}
-                            {path}{extra}{NEWLINE}") Special "{formatkeys}" exists
-                            to list available keys
+      --format FORMAT, --list-format FORMAT
+                            specify format for file listing (default: "{mode}
+                            {user:6} {group:6} {size:8d} {isomtime}
+                            {path}{extra}{NL}")
       -P PREFIX, --prefix PREFIX
                             only consider archive names starting with this prefix
+      -e PATTERN, --exclude PATTERN
+                            exclude paths matching PATTERN
+      --exclude-from EXCLUDEFILE
+                            read exclude patterns from EXCLUDEFILE, one per line
     
 Description
 ~~~~~~~~~~~
 
 This command lists the contents of a repository or an archive.
+
+See the "borg help patterns" command for more help on exclude patterns.
+
+The following keys are available for --format when listing files:
+
+ - type
+ - mode
+ - uid
+ - gid
+ - user
+ - group
+ - path: path interpreted as text (might be missing non-text characters, see bpath)
+ - bpath: verbatim POSIX path, can contain any character except NUL
+ - source: link target for links (identical to linktarget)
+ - linktarget
+
+ - size
+ - csize: compressed size
+ - num_chunks: number of chunks in this file
+ - unique_chunks: number of unique chunks in this file
+
+ - mtime
+ - ctime
+ - atime
+ - isomtime
+ - isoctime
+ - isoatime
+
+ - md5
+ - sha1
+ - sha224
+ - sha256
+ - sha384
+ - sha512
+
+ - archiveid
+ - archivename
+ - extra: prepends {source} with " -> " for soft links and " link to " for hard links
+
+ - NEWLINE: OS dependent line separator
+ - NL: alias of NEWLINE
+ - NUL: NUL character for creating print0 / xargs -0 like ouput, see bpath
+ - SPACE
+ - TAB
+ - CR
+ - LF