9 years ago · 601313836d
--- a/borg/archive.py
+++ b/borg/archive.py
@@ -298,7 +298,19 @@ Number of files: {0.stats.nfiles}'''.format(
 
															         cache.rollback()
														
 
															         return stats
														
 
															-    def extract_item(self, item, restore_attrs=True, dry_run=False, stdout=False, sparse=False):
														
 
															+    def extract_item(self, item, restore_attrs=True, dry_run=False, stdout=False, sparse=False,
														
 
															+                     hardlink_masters=None, original_path=None):
														
 
															+        """
														
 
															+        Extract archive item.
														
 
															+
														
 
															+        :param item: the item to extract
														
 
															+        :param restore_attrs: restore file attributes
														
 
															+        :param dry_run: do not write any data
														
 
															+        :param stdout: write extracted data to stdout
														
 
															+        :param sparse: write sparse files (chunk-granularity, independent of the original being sparse)
														
 
															+        :param hardlink_masters: maps paths to (chunks, link_target) for extracting subtrees with hardlinks correctly
														
 
															+        :param original_path: b'path' key as stored in archive
														
 
															+        """
														
 
															         if dry_run or stdout:
														
 
															             if b'chunks' in item:
														
 
															                 for data in self.pipeline.fetch_many([c[0] for c in item[b'chunks']], is_preloaded=True):
														
@@ -308,6 +320,7 @@ Number of files: {0.stats.nfiles}'''.format(
 
															                     sys.stdout.buffer.flush()
														
 
															             return
														
 
															+        original_path = original_path or item[b'path']
														
 
															         dest = self.cwd
														
 
															         if item[b'path'].startswith('/') or item[b'path'].startswith('..'):
														
 
															             raise Exception('Path should be relative and local')
														
@@ -327,25 +340,36 @@ Number of files: {0.stats.nfiles}'''.format(
 
															         if stat.S_ISREG(mode):
														
 
															             if not os.path.exists(os.path.dirname(path)):
														
 
															                 os.makedirs(os.path.dirname(path))
														
 
															+
														
 
															             # Hard link?
														
 
															             if b'source' in item:
														
 
															                 source = os.path.join(dest, item[b'source'])
														
 
															                 if os.path.exists(path):
														
 
															                     os.unlink(path)
														
 
															-                os.link(source, path)
														
 
															-            else:
														
 
															-                with open(path, 'wb') as fd:
														
 
															-                    ids = [c[0] for c in item[b'chunks']]
														
 
															-                    for data in self.pipeline.fetch_many(ids, is_preloaded=True):
														
 
															-                        if sparse and self.zeros.startswith(data):
														
 
															-                            # all-zero chunk: create a hole in a sparse file
														
 
															-                            fd.seek(len(data), 1)
														
 
															-                        else:
														
 
															-                            fd.write(data)
														
 
															-                    pos = fd.tell()
														
 
															-                    fd.truncate(pos)
														
 
															-                    fd.flush()
														
 
															-                    self.restore_attrs(path, item, fd=fd.fileno())
														
 
															+                if not hardlink_masters:
														
 
															+                    os.link(source, path)
														
 
															+                    return
														
 
															+                item[b'chunks'], link_target = hardlink_masters[item[b'source']]
														
 
															+                if link_target:
														
 
															+                    # Hard link was extracted previously, just link
														
 
															+                    os.link(link_target, path)
														
 
															+                    return
														
 
															+                # Extract chunks, since the item which had the chunks was not extracted
														
 
															+            with open(path, 'wb') as fd:
														
 
															+                ids = [c[0] for c in item[b'chunks']]
														
 
															+                for data in self.pipeline.fetch_many(ids, is_preloaded=True):
														
 
															+                    if sparse and self.zeros.startswith(data):
														
 
															+                        # all-zero chunk: create a hole in a sparse file
														
 
															+                        fd.seek(len(data), 1)
														
 
															+                    else:
														
 
															+                        fd.write(data)
														
 
															+                pos = fd.tell()
														
 
															+                fd.truncate(pos)
														
 
															+                fd.flush()
														
 
															+                self.restore_attrs(path, item, fd=fd.fileno())
														
 
															+            if hardlink_masters:
														
 
															+                # Update master entry with extracted file path, so that following hardlinks don't extract twice.
														
 
															+                hardlink_masters[item.get(b'source') or original_path] = (None, path)
														
 
															         elif stat.S_ISDIR(mode):
														
 
															             if not os.path.exists(path):
														
 
															                 os.makedirs(path)
														
@@ -527,7 +551,10 @@ Number of files: {0.stats.nfiles}'''.format(
 
															             source = self.hard_links.get((st.st_ino, st.st_dev))
														
 
															             if (st.st_ino, st.st_dev) in self.hard_links:
														
 
															                 item = self.stat_attrs(st, path)
														
 
															-                item.update({b'path': safe_path, b'source': source})
														
 
															+                item.update({
														
 
															+                    b'path': safe_path,
														
 
															+                    b'source': source,
														
 
															+                })
														
 
															                 self.add_item(item)
														
 
															                 status = 'h'  # regular file, hardlink (to already seen inodes)
														
 
															                 return status
														
@@ -549,7 +576,10 @@ Number of files: {0.stats.nfiles}'''.format(
 
															                 status = 'U'  # regular file, unchanged
														
 
															         else:
														
 
															             status = 'A'  # regular file, added
														
 
															-        item = {b'path': safe_path}
														
 
															+        item = {
														
 
															+            b'path': safe_path,
														
 
															+            b'hardlink_master': st.st_nlink > 1,  # item is a hard link and has the chunks
														
 
															+        }
														
 
															         # Only chunkify the file if needed
														
 
															         if chunks is None:
														
 
															             fh = Archive._open_rb(path)
														
@@ -587,7 +617,7 @@ Number of files: {0.stats.nfiles}'''.format(
 
															 # this set must be kept complete, otherwise the RobustUnpacker might malfunction:
														
 
															-ITEM_KEYS = set([b'path', b'source', b'rdev', b'chunks',
														
 
															+ITEM_KEYS = set([b'path', b'source', b'rdev', b'chunks', b'hardlink_master',
														
 
															                  b'mode', b'user', b'group', b'uid', b'gid', b'mtime', b'atime', b'ctime',
														
 
															                  b'xattrs', b'bsdflags', b'acl_nfs4', b'acl_access', b'acl_default', b'acl_extended', ])
														
--- a/borg/archiver.py
+++ b/borg/archiver.py
@@ -359,8 +359,20 @@ class Archiver:
 
															         sparse = args.sparse
														
 
															         strip_components = args.strip_components
														
 
															         dirs = []
														
 
															-        for item in archive.iter_items(lambda item: matcher.match(item[b'path']), preload=True):
														
 
															+        partial_extract = not matcher.empty() or strip_components
														
 
															+        hardlink_masters = {} if partial_extract else None
														
 
															+
														
 
															+        def item_is_hardlink_master(item):
														
 
															+            return (partial_extract and stat.S_ISREG(item[b'mode']) and
														
 
															+                    item.get(b'hardlink_master', True) and b'source' not in item)
														
 
															+
														
 
															+        for item in archive.iter_items(preload=True,
														
 
															+                filter=lambda item: item_is_hardlink_master(item) or matcher.match(item[b'path'])):
														
 
															             orig_path = item[b'path']
														
 
															+            if item_is_hardlink_master(item):
														
 
															+                hardlink_masters[orig_path] = (item.get(b'chunks'), item.get(b'source'))
														
 
															+            if not matcher.match(item[b'path']):
														
 
															+                continue
														
 
															             if strip_components:
														
 
															                 item[b'path'] = os.sep.join(orig_path.split(os.sep)[strip_components:])
														
 
															                 if not item[b'path']:
														
@@ -378,7 +390,8 @@ class Archiver:
 
															                         dirs.append(item)
														
 
															                         archive.extract_item(item, restore_attrs=False)
														
 
															                     else:
														
 
															-                        archive.extract_item(item, stdout=stdout, sparse=sparse)
														
 
															+                        archive.extract_item(item, stdout=stdout, sparse=sparse, hardlink_masters=hardlink_masters,
														
 
															+                                             original_path=orig_path)
														
 
															             except OSError as e:
														
 
															                 self.print_warning('%s: %s', remove_surrogates(orig_path), e)
														
@@ -1205,6 +1218,15 @@ class Archiver:
 
															             Both archives need to be in the same repository, and a repository location may only
														
 
															             be specified for ARCHIVE1.
														
 
															+            For archives created with Borg 1.1 or newer diff automatically detects whether
														
 
															+            the archives are created with the same chunker params. If so, only chunk IDs
														
 
															+            are compared, which is very fast.
														
 
															+
														
 
															+            For archives prior to Borg 1.1 chunk contents are compared by default.
														
 
															+            If you did not create the archives with different chunker params,
														
 
															+            pass --same-chunker-params.
														
 
															+            Note that the chunker params changed from Borg 0.xx to 1.0.
														
 
															+
														
 
															             See the output of the "borg help patterns" command for more help on exclude patterns.
														
 
															             """)
														
 
															         subparser = subparsers.add_parser('diff', parents=[common_parser],
														
@@ -1282,7 +1304,7 @@ class Archiver:
 
															         See the "borg help patterns" command for more help on exclude patterns.
														
 
															-        The following keys are available for --format:
														
 
															+        The following keys are available for --format when listing files:
														
 
															         """) + ItemFormatter.keys_help()
														
 
															         subparser = subparsers.add_parser('list', parents=[common_parser],
														
@@ -1309,7 +1331,7 @@ class Archiver:
 
															                                type=location_validator(),
														
 
															                                help='repository/archive to list contents of')
														
 
															         subparser.add_argument('paths', metavar='PATH', nargs='*', type=str,
														
 
															-                               help='paths to extract; patterns are supported')
														
 
															+                               help='paths to list; patterns are supported')
														
 
															         mount_epilog = textwrap.dedent("""
														
 
															         This command mounts an archive as a FUSE filesystem. This can be useful for
														
--- a/borg/helpers.py
+++ b/borg/helpers.py
@@ -293,6 +293,9 @@ class PatternMatcher:
 
															         # Value to return from match function when none of the patterns match.
														
 
															         self.fallback = fallback
														
 
															+    def empty(self):
														
 
															+        return not len(self._items)
														
 
															+
														
 
															     def add(self, patterns, value):
														
 
															         """Add list of patterns to internal list. The given value is returned from the match function when one of the
														
 
															         given patterns matches.
														
@@ -1125,16 +1128,27 @@ class ItemFormatter:
 
															         'NL': os.linesep,
														
 
															     }
														
 
															     KEY_DESCRIPTIONS = {
														
 
															-        'NEWLINE': 'OS dependent line separator',
														
 
															-        'NL': 'alias of NEWLINE',
														
 
															-        'NUL': 'NUL character for creating print0 / xargs -0 like ouput, see bpath',
														
 
															-        'csize': 'compressed size',
														
 
															         'bpath': 'verbatim POSIX path, can contain any character except NUL',
														
 
															         'path': 'path interpreted as text (might be missing non-text characters, see bpath)',
														
 
															         'source': 'link target for links (identical to linktarget)',
														
 
															+        'extra': 'prepends {source} with " -> " for soft links and " link to " for hard links',
														
 
															+
														
 
															+        'csize': 'compressed size',
														
 
															         'num_chunks': 'number of chunks in this file',
														
 
															         'unique_chunks': 'number of unique chunks in this file',
														
 
															+
														
 
															+        'NEWLINE': 'OS dependent line separator',
														
 
															+        'NL': 'alias of NEWLINE',
														
 
															+        'NUL': 'NUL character for creating print0 / xargs -0 like ouput, see bpath',
														
 
															     }
														
 
															+    KEY_GROUPS = (
														
 
															+        ('type', 'mode', 'uid', 'gid', 'user', 'group', 'path', 'bpath', 'source', 'linktarget'),
														
 
															+        ('size', 'csize', 'num_chunks', 'unique_chunks'),
														
 
															+        ('mtime', 'ctime', 'atime', 'isomtime', 'isoctime', 'isoatime'),
														
 
															+        tuple(sorted(hashlib.algorithms_guaranteed)),
														
 
															+        ('archiveid', 'archivename', 'extra'),
														
 
															+        ('NEWLINE', 'NL', 'NUL', 'SPACE', 'TAB', 'CR', 'LF'),
														
 
															+    )
														
 
															     @classmethod
														
 
															     def available_keys(cls):
														
@@ -1149,16 +1163,21 @@ class ItemFormatter:
 
															         keys = []
														
 
															         keys.extend(formatter.call_keys.keys())
														
 
															         keys.extend(formatter.get_item_data(fake_item).keys())
														
 
															-        return sorted(keys, key=lambda s: (s.isupper(), s))
														
 
															+        return keys
														
 
															     @classmethod
														
 
															     def keys_help(cls):
														
 
															         help = []
														
 
															-        for key in cls.available_keys():
														
 
															-            text = " - " + key
														
 
															-            if key in cls.KEY_DESCRIPTIONS:
														
 
															-                text += ": " + cls.KEY_DESCRIPTIONS[key]
														
 
															-            help.append(text)
														
 
															+        keys = cls.available_keys()
														
 
															+        for group in cls.KEY_GROUPS:
														
 
															+            for key in group:
														
 
															+                keys.remove(key)
														
 
															+                text = " - " + key
														
 
															+                if key in cls.KEY_DESCRIPTIONS:
														
 
															+                    text += ": " + cls.KEY_DESCRIPTIONS[key]
														
 
															+                help.append(text)
														
 
															+            help.append("")
														
 
															+        assert not keys, str(keys)
														
 
															         return "\n".join(help)
														
 
															     def __init__(self, archive, format):
														
--- a/borg/testsuite/archiver.py
+++ b/borg/testsuite/archiver.py
@@ -467,6 +467,49 @@ class ArchiverTestCase(ArchiverTestCaseBase):
 
															             with self.assert_creates_file('input/dir/file'):
														
 
															                 self.cmd('extract', self.repository_location + '::test', '--strip-components', '0')
														
 
															+    def _extract_hardlinks_setup(self):
														
 
															+        os.mkdir(os.path.join(self.input_path, 'dir1'))
														
 
															+        os.mkdir(os.path.join(self.input_path, 'dir1/subdir'))
														
 
															+
														
 
															+        self.create_regular_file('source')
														
 
															+        os.link(os.path.join(self.input_path, 'source'),
														
 
															+                os.path.join(self.input_path, 'abba'))
														
 
															+        os.link(os.path.join(self.input_path, 'source'),
														
 
															+                os.path.join(self.input_path, 'dir1/hardlink'))
														
 
															+        os.link(os.path.join(self.input_path, 'source'),
														
 
															+                os.path.join(self.input_path, 'dir1/subdir/hardlink'))
														
 
															+
														
 
															+        self.create_regular_file('dir1/source2')
														
 
															+        os.link(os.path.join(self.input_path, 'dir1/source2'),
														
 
															+                os.path.join(self.input_path, 'dir1/aaaa'))
														
 
															+
														
 
															+        self.cmd('init', self.repository_location)
														
 
															+        self.cmd('create', self.repository_location + '::test', 'input')
														
 
															+
														
 
															+    def test_strip_components_links(self):
														
 
															+        self._extract_hardlinks_setup()
														
 
															+        with changedir('output'):
														
 
															+            self.cmd('extract', self.repository_location + '::test', '--strip-components', '2')
														
 
															+            assert os.stat('hardlink').st_nlink == 2
														
 
															+            assert os.stat('subdir/hardlink').st_nlink == 2
														
 
															+            assert os.stat('aaaa').st_nlink == 2
														
 
															+            assert os.stat('source2').st_nlink == 2
														
 
															+        with changedir('output'):
														
 
															+            self.cmd('extract', self.repository_location + '::test')
														
 
															+            assert os.stat('input/dir1/hardlink').st_nlink == 4
														
 
															+
														
 
															+    def test_extract_hardlinks(self):
														
 
															+        self._extract_hardlinks_setup()
														
 
															+        with changedir('output'):
														
 
															+            self.cmd('extract', self.repository_location + '::test', 'input/dir1')
														
 
															+            assert os.stat('input/dir1/hardlink').st_nlink == 2
														
 
															+            assert os.stat('input/dir1/subdir/hardlink').st_nlink == 2
														
 
															+            assert os.stat('input/dir1/aaaa').st_nlink == 2
														
 
															+            assert os.stat('input/dir1/source2').st_nlink == 2
														
 
															+        with changedir('output'):
														
 
															+            self.cmd('extract', self.repository_location + '::test')
														
 
															+            assert os.stat('input/dir1/hardlink').st_nlink == 4
														
 
															+
														
 
															     def test_extract_include_exclude(self):
														
 
															         self.cmd('init', self.repository_location)
														
 
															         self.create_regular_file('file1', size=1024 * 80)
														
--- a/docs/usage.rst
+++ b/docs/usage.rst
@@ -374,6 +374,52 @@ Examples
 
															     ...
														
 
															+
														
 
															+.. include:: usage/diff.rst.inc
														
 
															+
														
 
															+Examples
														
 
															+~~~~~~~~
														
 
															+::
														
 
															+
														
 
															+    $ borg init testrepo
														
 
															+    $ mkdir testdir
														
 
															+    $ cd testdir
														
 
															+    $ echo asdf > file1
														
 
															+    $ dd if=/dev/urandom bs=1M count=4 > file2
														
 
															+    $ touch file3
														
 
															+    $ borg create ../testrepo::archive1 .
														
 
															+
														
 
															+    $ chmod a+x file1
														
 
															+    $ echo "something" >> file2
														
 
															+    $ borg create ../testrepo::archive2 .
														
 
															+
														
 
															+    $ rm file3
														
 
															+    $ touch file4
														
 
															+    $ borg create ../testrepo::archive3 .
														
 
															+
														
 
															+    $ cd ..
														
 
															+    $ borg diff testrepo::archive1 archive2
														
 
															+    file1 different mode
														
 
															+             archive1 -rw-r--r--
														
 
															+             archive2 -rwxr-xr-x
														
 
															+    file2 different contents
														
 
															+             +28 B, -31 B, 4.19 MB, 4.19 MB
														
 
															+
														
 
															+    $ borg diff testrepo::archive2 archive3
														
 
															+    file3 different contents
														
 
															+             +0 B, -0 B, 0 B, <deleted>
														
 
															+
														
 
															+    $ borg diff testrepo::archive1 archive3
														
 
															+    file1 different mode
														
 
															+             archive1 -rw-r--r--
														
 
															+             archive3 -rwxr-xr-x
														
 
															+    file2 different contents
														
 
															+             +28 B, -31 B, 4.19 MB, 4.19 MB
														
 
															+    file3 different contents
														
 
															+             +0 B, -0 B, 0 B, <deleted>
														
 
															+    file4 different contents
														
 
															+             +0 B, -0 B, <deleted>, 0 B
														
 
															+
														
 
															 .. include:: usage/delete.rst.inc
														
 
															 Examples
														
--- a/docs/usage/diff.rst.inc
+++ b/docs/usage/diff.rst.inc
@@ -48,4 +48,13 @@ This command finds differences in files (contents, user, group, mode) between ar
 
															 Both archives need to be in the same repository, and a repository location may only
														
 
															 be specified for ARCHIVE1.
														
 
															+For archives created with Borg 1.1 or newer diff automatically detects whether
														
 
															+the archives are created with the same chunker params. If so, only chunk IDs
														
 
															+are compared, which is very fast.
														
 
															+
														
 
															+For archives prior to Borg 1.1 chunk contents are compared by default.
														
 
															+If you did not create the archives with different chunker params,
														
 
															+pass --same-chunker-params.
														
 
															+Note that the chunker params changed from Borg 0.xx to 1.0.
														
 
															+
														
 
															 See the output of the "borg help patterns" command for more help on exclude patterns.
														
--- a/docs/usage/list.rst.inc
+++ b/docs/usage/list.rst.inc
@@ -6,15 +6,16 @@ borg list
 
															     usage: borg list [-h] [-v] [--debug] [--lock-wait N] [--show-version]
														
 
															                      [--show-rc] [--no-files-cache] [--umask M]
														
 
															-                     [--remote-path PATH] [--short] [--list-format LISTFORMAT]
														
 
															-                     [-P PREFIX]
														
 
															-                     [REPOSITORY_OR_ARCHIVE]
														
 
															+                     [--remote-path PATH] [--short] [--format FORMAT] [-P PREFIX]
														
 
															+                     [-e PATTERN] [--exclude-from EXCLUDEFILE]
														
 
															+                     [REPOSITORY_OR_ARCHIVE] [PATH [PATH ...]]
														
 
															     List archive or repository contents
														
 
															     positional arguments:
														
 
															       REPOSITORY_OR_ARCHIVE
														
 
															                             repository/archive to list contents of
														
 
															+      PATH                  paths to list; patterns are supported
														
 
															     optional arguments:
														
 
															       -h, --help            show this help message and exit
														
@@ -30,15 +31,64 @@ borg list
 
															       --umask M             set umask to M (local and remote, default: 0077)
														
 
															       --remote-path PATH    set remote path to executable (default: "borg")
														
 
															       --short               only print file/directory names, nothing else
														
 
															-      --list-format LISTFORMAT
														
 
															-                            specify format for archive file listing (default:
														
 
															-                            "{mode} {user:6} {group:6} {size:8d} {isomtime}
														
 
															-                            {path}{extra}{NEWLINE}") Special "{formatkeys}" exists
														
 
															-                            to list available keys
														
 
															+      --format FORMAT, --list-format FORMAT
														
 
															+                            specify format for file listing (default: "{mode}
														
 
															+                            {user:6} {group:6} {size:8d} {isomtime}
														
 
															+                            {path}{extra}{NL}")
														
 
															       -P PREFIX, --prefix PREFIX
														
 
															                             only consider archive names starting with this prefix
														
 
															+      -e PATTERN, --exclude PATTERN
														
 
															+                            exclude paths matching PATTERN
														
 
															+      --exclude-from EXCLUDEFILE
														
 
															+                            read exclude patterns from EXCLUDEFILE, one per line
														
 
															 Description
														
 
															 ~~~~~~~~~~~
														
 
															 This command lists the contents of a repository or an archive.
														
 
															+
														
 
															+See the "borg help patterns" command for more help on exclude patterns.
														
 
															+
														
 
															+The following keys are available for --format when listing files:
														
 
															+
														
 
															+ - type
														
 
															+ - mode
														
 
															+ - uid
														
 
															+ - gid
														
 
															+ - user
														
 
															+ - group
														
 
															+ - path: path interpreted as text (might be missing non-text characters, see bpath)
														
 
															+ - bpath: verbatim POSIX path, can contain any character except NUL
														
 
															+ - source: link target for links (identical to linktarget)
														
 
															+ - linktarget
														
 
															+
														
 
															+ - size
														
 
															+ - csize: compressed size
														
 
															+ - num_chunks: number of chunks in this file
														
 
															+ - unique_chunks: number of unique chunks in this file
														
 
															+
														
 
															+ - mtime
														
 
															+ - ctime
														
 
															+ - atime
														
 
															+ - isomtime
														
 
															+ - isoctime
														
 
															+ - isoatime
														
 
															+
														
 
															+ - md5
														
 
															+ - sha1
														
 
															+ - sha224
														
 
															+ - sha256
														
 
															+ - sha384
														
 
															+ - sha512
														
 
															+
														
 
															+ - archiveid
														
 
															+ - archivename
														
 
															+ - extra: prepends {source} with " -> " for soft links and " link to " for hard links
														
 
															+
														
 
															+ - NEWLINE: OS dependent line separator
														
 
															+ - NL: alias of NEWLINE
														
 
															+ - NUL: NUL character for creating print0 / xargs -0 like ouput, see bpath
														
 
															+ - SPACE
														
 
															+ - TAB
														
 
															+ - CR
														
 
															+ - LF