Selaa lähdekoodia

Merge pull request #751 from enkore/feature-newlist

ItemFormatter for printing items, borg list redo.
TW 9 vuotta sitten
vanhempi
sitoutus
61db791f02
4 muutettua tiedostoa jossa 246 lisäystä ja 87 poistoa
  1. 44 81
      borg/archiver.py
  2. 156 2
      borg/helpers.py
  3. 37 3
      borg/testsuite/archiver.py
  4. 9 1
      borg/testsuite/helpers.py

+ 44 - 81
borg/archiver.py

@@ -16,12 +16,12 @@ import textwrap
 import traceback
 
 from . import __version__
-from .helpers import Error, location_validator, archivename_validator, format_line, format_time, format_file_size, \
-    parse_pattern, PathPrefixPattern, to_localtime, timestamp, safe_timestamp, \
+from .helpers import Error, location_validator, archivename_validator, format_time, format_file_size, \
+    parse_pattern, PathPrefixPattern, to_localtime, timestamp, \
     get_cache_dir, prune_within, prune_split, \
     Manifest, remove_surrogates, update_excludes, format_archive, check_extension_modules, Statistics, \
-    dir_is_tagged, bigint_to_int, ChunkerParams, CompressionSpec, is_slow_msgpack, yes, sysinfo, \
-    EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR, log_multi, PatternMatcher
+    dir_is_tagged, ChunkerParams, CompressionSpec, is_slow_msgpack, yes, sysinfo, \
+    EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR, log_multi, PatternMatcher, ItemFormatter
 from .logger import create_logger, setup_logging
 logger = create_logger()
 from .compress import Compressor, COMPR_BUFFER
@@ -585,79 +585,29 @@ class Archiver:
         repository = self.open_repository(args)
         manifest, key = Manifest.load(repository)
         if args.location.archive:
-            archive = Archive(repository, key, manifest, args.location.archive)
-            """use_user_format flag is used to speed up default listing.
-            When user issues format options, listing is a bit slower, but more keys are available and
-            precalculated.
-            """
-            use_user_format = args.listformat is not None
-            if use_user_format:
-                list_format = args.listformat
-            elif args.short:
-                list_format = "{path}{LF}"
-            else:
-                list_format = "{mode} {user:6} {group:6} {size:8d} {isomtime} {path}{extra}{LF}"
+            matcher, _ = self.build_matcher(args.excludes, args.paths)
 
-            for item in archive.iter_items():
-                mode = stat.filemode(item[b'mode'])
-                type = mode[0]
-                size = 0
-                if type == '-':
-                    try:
-                        size = sum(size for _, size, _ in item[b'chunks'])
-                    except KeyError:
-                        pass
-
-                mtime = safe_timestamp(item[b'mtime'])
-                if use_user_format:
-                    atime = safe_timestamp(item.get(b'atime') or item[b'mtime'])
-                    ctime = safe_timestamp(item.get(b'ctime') or item[b'mtime'])
-
-                if b'source' in item:
-                    source = item[b'source']
-                    if type == 'l':
-                        extra = ' -> %s' % item[b'source']
-                    else:
-                        mode = 'h' + mode[1:]
-                        extra = ' link to %s' % item[b'source']
-                else:
-                    extra = ''
-                    source = ''
-
-                item_data = {
-                        'mode': mode,
-                        'user': item[b'user'] or item[b'uid'],
-                        'group': item[b'group'] or item[b'gid'],
-                        'size': size,
-                        'isomtime': format_time(mtime),
-                        'path': remove_surrogates(item[b'path']),
-                        'extra': extra,
-                        'LF': '\n',
-                        }
-                if use_user_format:
-                    item_data_advanced = {
-                        'bmode': item[b'mode'],
-                        'type': type,
-                        'source': source,
-                        'linktarget': source,
-                        'uid': item[b'uid'],
-                        'gid': item[b'gid'],
-                        'mtime': mtime,
-                        'isoctime': format_time(ctime),
-                        'ctime': ctime,
-                        'isoatime': format_time(atime),
-                        'atime': atime,
-                        'archivename': archive.name,
-                        'SPACE': ' ',
-                        'TAB': '\t',
-                        'CR': '\r',
-                        'NEWLINE': os.linesep,
-                        }
-                    item_data.update(item_data_advanced)
-                item_data['formatkeys'] = list(item_data.keys())
-
-                print(format_line(list_format, item_data), end='')
+            with Cache(repository, key, manifest, lock_wait=self.lock_wait) as cache:
+                archive = Archive(repository, key, manifest, args.location.archive, cache=cache)
 
+                if args.format:
+                    format = args.format
+                elif args.short:
+                    format = "{path}{NL}"
+                else:
+                    format = "{mode} {user:6} {group:6} {size:8} {isomtime} {path}{extra}{NL}"
+                formatter = ItemFormatter(archive, format)
+
+                if not hasattr(sys.stdout, 'buffer'):
+                    # This is a shim for supporting unit tests replacing sys.stdout with e.g. StringIO,
+                    # which doesn't have an underlying buffer (= lower file object).
+                    def write(bytestring):
+                        sys.stdout.write(bytestring.decode('utf-8', errors='replace'))
+                else:
+                    write = sys.stdout.buffer.write
+                for item in archive.iter_items(lambda item: matcher.match(item[b'path'])):
+                    write(formatter.format_item(item).encode('utf-8', errors='surrogateescape'))
+            repository.close()
         else:
             for archive_info in manifest.list_archive_infos(sort_by='ts'):
                 if args.prefix and not archive_info.name.startswith(args.prefix):
@@ -944,12 +894,13 @@ class Archiver:
     def preprocess_args(self, args):
         deprecations = [
             # ('--old', '--new', 'Warning: "--old" has been deprecated. Use "--new" instead.'),
+            ('--list-format', '--format', 'Warning: "--list-format" has been deprecated. Use "--format" instead.'),
         ]
         for i, arg in enumerate(args[:]):
             for old_name, new_name, warning in deprecations:
                 if arg.startswith(old_name):
                     args[i] = arg.replace(old_name, new_name)
-                    print(warning)
+                    self.print_warning(warning)
         return args
 
     def build_parser(self, args=None, prog=None):
@@ -1322,7 +1273,12 @@ class Archiver:
 
         list_epilog = textwrap.dedent("""
         This command lists the contents of a repository or an archive.
-        """)
+
+        See the "borg help patterns" command for more help on exclude patterns.
+
+        The following keys are available for --format:
+
+        """) + ItemFormatter.keys_help()
         subparser = subparsers.add_parser('list', parents=[common_parser],
                                           description=self.do_list.__doc__,
                                           epilog=list_epilog,
@@ -1332,15 +1288,22 @@ class Archiver:
         subparser.add_argument('--short', dest='short',
                                action='store_true', default=False,
                                help='only print file/directory names, nothing else')
-        subparser.add_argument('--list-format', dest='listformat', type=str,
-                               help="""specify format for archive file listing
-                                (default: "{mode} {user:6} {group:6} {size:8d} {isomtime} {path}{extra}{NEWLINE}")
-                                Special "{formatkeys}" exists to list available keys""")
+        subparser.add_argument('--format', '--list-format', dest='format', type=str,
+                               help="""specify format for file listing
+                                (default: "{mode} {user:6} {group:6} {size:8d} {isomtime} {path}{extra}{NL}")""")
         subparser.add_argument('-P', '--prefix', dest='prefix', type=str,
                                help='only consider archive names starting with this prefix')
+        subparser.add_argument('-e', '--exclude', dest='excludes',
+                               type=parse_pattern, action='append',
+                               metavar="PATTERN", help='exclude paths matching PATTERN')
+        subparser.add_argument('--exclude-from', dest='exclude_files',
+                               type=argparse.FileType('r'), action='append',
+                               metavar='EXCLUDEFILE', help='read exclude patterns from EXCLUDEFILE, one per line')
         subparser.add_argument('location', metavar='REPOSITORY_OR_ARCHIVE', nargs='?', default='',
                                type=location_validator(),
                                help='repository/archive to list contents of')
+        subparser.add_argument('paths', metavar='PATH', nargs='*', type=str,
+                               help='paths to extract; patterns are supported')
 
         mount_epilog = textwrap.dedent("""
         This command mounts an archive as a FUSE filesystem. This can be useful for

+ 156 - 2
borg/helpers.py

@@ -1,8 +1,9 @@
 import argparse
 from binascii import hexlify
 from collections import namedtuple
-from functools import wraps
+from functools import wraps, partial
 import grp
+import hashlib
 import os
 import stat
 import textwrap
@@ -10,6 +11,7 @@ import pwd
 import re
 from shutil import get_terminal_size
 import sys
+from string import Formatter
 import platform
 import time
 import unicodedata
@@ -548,6 +550,20 @@ def dir_is_tagged(path, exclude_caches, exclude_if_present):
     return tag_paths
 
 
+def partial_format(format, mapping):
+    """
+    Apply format.format_map(mapping) while preserving unknown keys
+
+    Does not support attribute access, indexing and ![rsa] conversions
+    """
+    for key, value in mapping.items():
+        key = re.escape(key)
+        format = re.sub(r'(?<!\{)((\{%s\})|(\{%s:[^\}]*\}))' % (key, key),
+                        lambda match: match.group(1).format_map(mapping),
+                        format)
+    return format
+
+
 def format_line(format, data):
     # TODO: Filter out unwanted properties of str.format(), because "format" is user provided.
 
@@ -556,7 +572,7 @@ def format_line(format, data):
     except (KeyError, ValueError) as e:
         # this should catch format errors
         print('Error in lineformat: "{}" - reason "{}"'.format(format, str(e)))
-    except:
+    except Exception as e:
         # something unexpected, print error and raise exception
         print('Error in lineformat: "{}" - reason "{}"'.format(format, str(e)))
         raise
@@ -1090,3 +1106,141 @@ def log_multi(*msgs, level=logging.INFO):
         lines.extend(msg.splitlines())
     for line in lines:
         logger.log(level, line)
+
+
+class ItemFormatter:
+    FIXED_KEYS = {
+        # Formatting aids
+        'LF': '\n',
+        'SPACE': ' ',
+        'TAB': '\t',
+        'CR': '\r',
+        'NUL': '\0',
+        'NEWLINE': os.linesep,
+        'NL': os.linesep,
+    }
+    KEY_DESCRIPTIONS = {
+        'NEWLINE': 'OS dependent line separator',
+        'NL': 'alias of NEWLINE',
+        'NUL': 'NUL character for creating print0 / xargs -0 like ouput, see bpath',
+        'csize': 'compressed size',
+        'bpath': 'verbatim POSIX path, can contain any character except NUL',
+        'path': 'path interpreted as text (might be missing non-text characters, see bpath)',
+        'source': 'link target for links (identical to linktarget)',
+        'num_chunks': 'number of chunks in this file',
+        'unique_chunks': 'number of unique chunks in this file',
+    }
+
+    @classmethod
+    def available_keys(cls):
+        class FakeArchive:
+            fpr = name = ""
+
+        fake_item = {
+            b'mode': 0, b'path': '', b'user': '', b'group': '', b'mtime': 0,
+            b'uid': 0, b'gid': 0,
+        }
+        formatter = cls(FakeArchive, "")
+        keys = []
+        keys.extend(formatter.call_keys.keys())
+        keys.extend(formatter.get_item_data(fake_item).keys())
+        return sorted(keys, key=lambda s: (s.isupper(), s))
+
+    @classmethod
+    def keys_help(cls):
+        help = []
+        for key in cls.available_keys():
+            text = " - " + key
+            if key in cls.KEY_DESCRIPTIONS:
+                text += ": " + cls.KEY_DESCRIPTIONS[key]
+            help.append(text)
+        return "\n".join(help)
+
+    def __init__(self, archive, format):
+        self.archive = archive
+        static_keys = {
+            'archivename': archive.name,
+            'archiveid': archive.fpr,
+        }
+        static_keys.update(self.FIXED_KEYS)
+        self.format = partial_format(format, static_keys)
+        self.format_keys = {f[1] for f in Formatter().parse(format)}
+        self.call_keys = {
+            'size': self.calculate_size,
+            'csize': self.calculate_csize,
+            'num_chunks': self.calculate_num_chunks,
+            'unique_chunks': self.calculate_unique_chunks,
+            'isomtime': partial(self.format_time, b'mtime'),
+            'isoctime': partial(self.format_time, b'ctime'),
+            'isoatime': partial(self.format_time, b'atime'),
+            'mtime': partial(self.time, b'mtime'),
+            'ctime': partial(self.time, b'ctime'),
+            'atime': partial(self.time, b'atime'),
+        }
+        for hash_function in hashlib.algorithms_guaranteed:
+            self.add_key(hash_function, partial(self.hash_item, hash_function))
+        self.used_call_keys = set(self.call_keys) & self.format_keys
+        self.item_data = static_keys
+
+    def add_key(self, key, callable_with_item):
+        self.call_keys[key] = callable_with_item
+        self.used_call_keys = set(self.call_keys) & self.format_keys
+
+    def get_item_data(self, item):
+        mode = stat.filemode(item[b'mode'])
+        item_type = mode[0]
+        item_data = self.item_data
+
+        source = item.get(b'source', '')
+        extra = ''
+        if source:
+            source = remove_surrogates(source)
+            if item_type == 'l':
+                extra = ' -> %s' % source
+            else:
+                mode = 'h' + mode[1:]
+                extra = ' link to %s' % source
+        item_data['type'] = item_type
+        item_data['mode'] = mode
+        item_data['user'] = item[b'user'] or item[b'uid']
+        item_data['group'] = item[b'group'] or item[b'gid']
+        item_data['uid'] = item[b'uid']
+        item_data['gid'] = item[b'gid']
+        item_data['path'] = remove_surrogates(item[b'path'])
+        item_data['bpath'] = item[b'path']
+        item_data['source'] = source
+        item_data['linktarget'] = source
+        item_data['extra'] = extra
+        for key in self.used_call_keys:
+            item_data[key] = self.call_keys[key](item)
+        return item_data
+
+    def format_item(self, item):
+        return self.format.format_map(self.get_item_data(item))
+
+    def calculate_num_chunks(self, item):
+        return len(item.get(b'chunks', []))
+
+    def calculate_unique_chunks(self, item):
+        chunk_index = self.archive.cache.chunks
+        return sum(1 for chunk_id, _, _ in item.get(b'chunks', []) if chunk_index[chunk_id][0] == 1)
+
+    def calculate_size(self, item):
+        return sum(size for _, size, _ in item.get(b'chunks', []))
+
+    def calculate_csize(self, item):
+        return sum(csize for _, _, csize in item.get(b'chunks', []))
+
+    def hash_item(self, hash_function, item):
+        if b'chunks' not in item:
+            return ""
+        hash = hashlib.new(hash_function)
+        for chunk in self.archive.pipeline.fetch_many([c[0] for c in item[b'chunks']]):
+            hash.update(chunk)
+        return hash.hexdigest()
+
+    def format_time(self, key, item):
+        return format_time(safe_timestamp(item.get(key) or item[b'mtime']))
+
+    def time(self, key, item):
+        return safe_timestamp(item.get(key) or item[b'mtime'])

+ 37 - 3
borg/testsuite/archiver.py

@@ -892,16 +892,50 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         self.assert_in('test-2', output)
         self.assert_not_in('something-else', output)
 
-    def test_list_list_format(self):
+    def test_list_format(self):
         self.cmd('init', self.repository_location)
         test_archive = self.repository_location + '::test'
         self.cmd('create', test_archive, src_dir)
+        self.cmd('list', '--list-format', '-', test_archive, exit_code=1)
+        self.archiver.exit_code = 0  # reset exit code for following tests
         output_1 = self.cmd('list', test_archive)
-        output_2 = self.cmd('list', '--list-format', '{mode} {user:6} {group:6} {size:8d} {isomtime} {path}{extra}{NEWLINE}', test_archive)
-        output_3 = self.cmd('list', '--list-format', '{mtime:%s} {path}{NL}', test_archive)
+        output_2 = self.cmd('list', '--format', '{mode} {user:6} {group:6} {size:8d} {isomtime} {path}{extra}{NEWLINE}', test_archive)
+        output_3 = self.cmd('list', '--format', '{mtime:%s} {path}{NL}', test_archive)
         self.assertEqual(output_1, output_2)
         self.assertNotEqual(output_1, output_3)
 
+    def test_list_hash(self):
+        self.create_regular_file('empty_file', size=0)
+        self.create_regular_file('amb', contents=b'a' * 1000000)
+        self.cmd('init', self.repository_location)
+        test_archive = self.repository_location + '::test'
+        self.cmd('create', test_archive, 'input')
+        output = self.cmd('list', '--format', '{sha256} {path}{NL}', test_archive)
+        assert "cdc76e5c9914fb9281a1c7e284d73e67f1809a48a497200e046d39ccc7112cd0 input/amb" in output
+        assert "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 input/empty_file" in output
+
+    def test_list_chunk_counts(self):
+        self.create_regular_file('empty_file', size=0)
+        self.create_regular_file('two_chunks')
+        with open(os.path.join(self.input_path, 'two_chunks'), 'wb') as fd:
+            fd.write(b'abba' * 2000000)
+            fd.write(b'baab' * 2000000)
+        self.cmd('init', self.repository_location)
+        test_archive = self.repository_location + '::test'
+        self.cmd('create', test_archive, 'input')
+        output = self.cmd('list', '--format', '{num_chunks} {unique_chunks} {path}{NL}', test_archive)
+        assert "0 0 input/empty_file" in output
+        assert "2 2 input/two_chunks" in output
+
+    def test_list_size(self):
+        self.create_regular_file('compressible_file', size=10000)
+        self.cmd('init', self.repository_location)
+        test_archive = self.repository_location + '::test'
+        self.cmd('create', '-C', 'lz4', test_archive, 'input')
+        output = self.cmd('list', '--format', '{size} {csize} {path}{NL}', test_archive)
+        size, csize, path = output.split("\n")[1].split(" ")
+        assert int(csize) < int(size)
+
     def test_break_lock(self):
         self.cmd('init', self.repository_location)
         self.cmd('break-lock', self.repository_location)

+ 9 - 1
borg/testsuite/helpers.py

@@ -15,7 +15,7 @@ from ..helpers import Location, format_file_size, format_timedelta, make_path_sa
     yes, TRUISH, FALSISH, DEFAULTISH, \
     StableDict, int_to_bigint, bigint_to_int, parse_timestamp, CompressionSpec, ChunkerParams, \
     ProgressIndicatorPercent, ProgressIndicatorEndless, load_excludes, parse_pattern, \
-    PatternMatcher, RegexPattern, PathPrefixPattern, FnmatchPattern, ShellPattern
+    PatternMatcher, RegexPattern, PathPrefixPattern, FnmatchPattern, ShellPattern, partial_format
 from . import BaseTestCase, environment_variable, FakeInputs
 
 
@@ -877,3 +877,11 @@ def test_progress_endless_step(capfd):
     pi.show()
     out, err = capfd.readouterr()
     assert err == '.'
+
+
+def test_partial_format():
+    assert partial_format('{space:10}', {'space': ' '}) == ' ' * 10
+    assert partial_format('{foobar}', {'bar': 'wrong', 'foobar': 'correct'}) == 'correct'
+    assert partial_format('{unknown_key}', {}) == '{unknown_key}'
+    assert partial_format('{key}{{escaped_key}}', {}) == '{key}{{escaped_key}}'
+    assert partial_format('{{escaped_key}}', {'escaped_key': 1234}) == '{{escaped_key}}'