9 سال پیش · 4151db270c
--- a/borg/archiver.py
+++ b/borg/archiver.py
@@ -16,12 +16,12 @@ import textwrap
 
				 import traceback
			
 
				 
			
 
				 from . import __version__
			
 
				-from .helpers import Error, location_validator, archivename_validator, format_line, format_time, format_file_size, \
			
 
				-    parse_pattern, PathPrefixPattern, to_localtime, timestamp, safe_timestamp, \
			
 
				+from .helpers import Error, location_validator, archivename_validator, format_time, format_file_size, \
			
 
				+    parse_pattern, PathPrefixPattern, to_localtime, timestamp, \
			
 
				     get_cache_dir, prune_within, prune_split, \
			
 
				     Manifest, remove_surrogates, update_excludes, format_archive, check_extension_modules, Statistics, \
			
 
				-    dir_is_tagged, bigint_to_int, ChunkerParams, CompressionSpec, is_slow_msgpack, yes, sysinfo, \
			
 
				-    EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR, log_multi, PatternMatcher
			
 
				+    dir_is_tagged, ChunkerParams, CompressionSpec, is_slow_msgpack, yes, sysinfo, \
			
 
				+    EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR, log_multi, PatternMatcher, ItemFormatter
			
 
				 from .logger import create_logger, setup_logging
			
 
				 logger = create_logger()
			
 
				 from .compress import Compressor, COMPR_BUFFER
			
@@ -585,79 +585,29 @@ class Archiver:
 
				         repository = self.open_repository(args)
			
 
				         manifest, key = Manifest.load(repository)
			
 
				         if args.location.archive:
			
 
				-            archive = Archive(repository, key, manifest, args.location.archive)
			
 
				-            """use_user_format flag is used to speed up default listing.
			
 
				-            When user issues format options, listing is a bit slower, but more keys are available and
			
 
				-            precalculated.
			
 
				-            """
			
 
				-            use_user_format = args.listformat is not None
			
 
				-            if use_user_format:
			
 
				-                list_format = args.listformat
			
 
				-            elif args.short:
			
 
				-                list_format = "{path}{LF}"
			
 
				-            else:
			
 
				-                list_format = "{mode} {user:6} {group:6} {size:8d} {isomtime} {path}{extra}{LF}"
			
 
				+            matcher, _ = self.build_matcher(args.excludes, args.paths)
			
 
				 
			
 
				-            for item in archive.iter_items():
			
 
				-                mode = stat.filemode(item[b'mode'])
			
 
				-                type = mode[0]
			
 
				-                size = 0
			
 
				-                if type == '-':
			
 
				-                    try:
			
 
				-                        size = sum(size for _, size, _ in item[b'chunks'])
			
 
				-                    except KeyError:
			
 
				-                        pass
			
 
				-
			
 
				-                mtime = safe_timestamp(item[b'mtime'])
			
 
				-                if use_user_format:
			
 
				-                    atime = safe_timestamp(item.get(b'atime') or item[b'mtime'])
			
 
				-                    ctime = safe_timestamp(item.get(b'ctime') or item[b'mtime'])
			
 
				-
			
 
				-                if b'source' in item:
			
 
				-                    source = item[b'source']
			
 
				-                    if type == 'l':
			
 
				-                        extra = ' -> %s' % item[b'source']
			
 
				-                    else:
			
 
				-                        mode = 'h' + mode[1:]
			
 
				-                        extra = ' link to %s' % item[b'source']
			
 
				-                else:
			
 
				-                    extra = ''
			
 
				-                    source = ''
			
 
				-
			
 
				-                item_data = {
			
 
				-                        'mode': mode,
			
 
				-                        'user': item[b'user'] or item[b'uid'],
			
 
				-                        'group': item[b'group'] or item[b'gid'],
			
 
				-                        'size': size,
			
 
				-                        'isomtime': format_time(mtime),
			
 
				-                        'path': remove_surrogates(item[b'path']),
			
 
				-                        'extra': extra,
			
 
				-                        'LF': '\n',
			
 
				-                        }
			
 
				-                if use_user_format:
			
 
				-                    item_data_advanced = {
			
 
				-                        'bmode': item[b'mode'],
			
 
				-                        'type': type,
			
 
				-                        'source': source,
			
 
				-                        'linktarget': source,
			
 
				-                        'uid': item[b'uid'],
			
 
				-                        'gid': item[b'gid'],
			
 
				-                        'mtime': mtime,
			
 
				-                        'isoctime': format_time(ctime),
			
 
				-                        'ctime': ctime,
			
 
				-                        'isoatime': format_time(atime),
			
 
				-                        'atime': atime,
			
 
				-                        'archivename': archive.name,
			
 
				-                        'SPACE': ' ',
			
 
				-                        'TAB': '\t',
			
 
				-                        'CR': '\r',
			
 
				-                        'NEWLINE': os.linesep,
			
 
				-                        }
			
 
				-                    item_data.update(item_data_advanced)
			
 
				-                item_data['formatkeys'] = list(item_data.keys())
			
 
				-
			
 
				-                print(format_line(list_format, item_data), end='')
			
 
				+            with Cache(repository, key, manifest, lock_wait=self.lock_wait) as cache:
			
 
				+                archive = Archive(repository, key, manifest, args.location.archive, cache=cache)
			
 
				 
			
 
				+                if args.format:
			
 
				+                    format = args.format
			
 
				+                elif args.short:
			
 
				+                    format = "{path}{NL}"
			
 
				+                else:
			
 
				+                    format = "{mode} {user:6} {group:6} {size:8} {isomtime} {path}{extra}{NL}"
			
 
				+                formatter = ItemFormatter(archive, format)
			
 
				+
			
 
				+                if not hasattr(sys.stdout, 'buffer'):
			
 
				+                    # This is a shim for supporting unit tests replacing sys.stdout with e.g. StringIO,
			
 
				+                    # which doesn't have an underlying buffer (= lower file object).
			
 
				+                    def write(bytestring):
			
 
				+                        sys.stdout.write(bytestring.decode('utf-8', errors='replace'))
			
 
				+                else:
			
 
				+                    write = sys.stdout.buffer.write
			
 
				+                for item in archive.iter_items(lambda item: matcher.match(item[b'path'])):
			
 
				+                    write(formatter.format_item(item).encode('utf-8', errors='surrogateescape'))
			
 
				+            repository.close()
			
 
				         else:
			
 
				             for archive_info in manifest.list_archive_infos(sort_by='ts'):
			
 
				                 if args.prefix and not archive_info.name.startswith(args.prefix):
			
@@ -944,12 +894,13 @@ class Archiver:
 
				     def preprocess_args(self, args):
			
 
				         deprecations = [
			
 
				             # ('--old', '--new', 'Warning: "--old" has been deprecated. Use "--new" instead.'),
			
 
				+            ('--list-format', '--format', 'Warning: "--list-format" has been deprecated. Use "--format" instead.'),
			
 
				         ]
			
 
				         for i, arg in enumerate(args[:]):
			
 
				             for old_name, new_name, warning in deprecations:
			
 
				                 if arg.startswith(old_name):
			
 
				                     args[i] = arg.replace(old_name, new_name)
			
 
				-                    print(warning)
			
 
				+                    self.print_warning(warning)
			
 
				         return args
			
 
				 
			
 
				     def build_parser(self, args=None, prog=None):
			
@@ -1322,7 +1273,12 @@ class Archiver:
 
				 
			
 
				         list_epilog = textwrap.dedent("""
			
 
				         This command lists the contents of a repository or an archive.
			
 
				-        """)
			
 
				+
			
 
				+        See the "borg help patterns" command for more help on exclude patterns.
			
 
				+
			
 
				+        The following keys are available for --format:
			
 
				+
			
 
				+        """) + ItemFormatter.keys_help()
			
 
				         subparser = subparsers.add_parser('list', parents=[common_parser],
			
 
				                                           description=self.do_list.__doc__,
			
 
				                                           epilog=list_epilog,
			
@@ -1332,15 +1288,22 @@ class Archiver:
 
				         subparser.add_argument('--short', dest='short',
			
 
				                                action='store_true', default=False,
			
 
				                                help='only print file/directory names, nothing else')
			
 
				-        subparser.add_argument('--list-format', dest='listformat', type=str,
			
 
				-                               help="""specify format for archive file listing
			
 
				-                                (default: "{mode} {user:6} {group:6} {size:8d} {isomtime} {path}{extra}{NEWLINE}")
			
 
				-                                Special "{formatkeys}" exists to list available keys""")
			
 
				+        subparser.add_argument('--format', '--list-format', dest='format', type=str,
			
 
				+                               help="""specify format for file listing
			
 
				+                                (default: "{mode} {user:6} {group:6} {size:8d} {isomtime} {path}{extra}{NL}")""")
			
 
				         subparser.add_argument('-P', '--prefix', dest='prefix', type=str,
			
 
				                                help='only consider archive names starting with this prefix')
			
 
				+        subparser.add_argument('-e', '--exclude', dest='excludes',
			
 
				+                               type=parse_pattern, action='append',
			
 
				+                               metavar="PATTERN", help='exclude paths matching PATTERN')
			
 
				+        subparser.add_argument('--exclude-from', dest='exclude_files',
			
 
				+                               type=argparse.FileType('r'), action='append',
			
 
				+                               metavar='EXCLUDEFILE', help='read exclude patterns from EXCLUDEFILE, one per line')
			
 
				         subparser.add_argument('location', metavar='REPOSITORY_OR_ARCHIVE', nargs='?', default='',
			
 
				                                type=location_validator(),
			
 
				                                help='repository/archive to list contents of')
			
 
				+        subparser.add_argument('paths', metavar='PATH', nargs='*', type=str,
			
 
				+                               help='paths to extract; patterns are supported')
			
 
				 
			
 
				         mount_epilog = textwrap.dedent("""
			
 
				         This command mounts an archive as a FUSE filesystem. This can be useful for
			
--- a/borg/helpers.py
+++ b/borg/helpers.py
@@ -1,8 +1,9 @@
 
				 import argparse
			
 
				 from binascii import hexlify
			
 
				 from collections import namedtuple
			
 
				-from functools import wraps
			
 
				+from functools import wraps, partial
			
 
				 import grp
			
 
				+import hashlib
			
 
				 import os
			
 
				 import stat
			
 
				 import textwrap
			
@@ -10,6 +11,7 @@ import pwd
 
				 import re
			
 
				 from shutil import get_terminal_size
			
 
				 import sys
			
 
				+from string import Formatter
			
 
				 import platform
			
 
				 import time
			
 
				 import unicodedata
			
@@ -548,6 +550,20 @@ def dir_is_tagged(path, exclude_caches, exclude_if_present):
 
				     return tag_paths
			
 
				 
			
 
				 
			
 
				+def partial_format(format, mapping):
			
 
				+    """
			
 
				+    Apply format.format_map(mapping) while preserving unknown keys
			
 
				+
			
 
				+    Does not support attribute access, indexing and ![rsa] conversions
			
 
				+    """
			
 
				+    for key, value in mapping.items():
			
 
				+        key = re.escape(key)
			
 
				+        format = re.sub(r'(?<!\{)((\{%s\})|(\{%s:[^\}]*\}))' % (key, key),
			
 
				+                        lambda match: match.group(1).format_map(mapping),
			
 
				+                        format)
			
 
				+    return format
			
 
				+
			
 
				+
			
 
				 def format_line(format, data):
			
 
				     # TODO: Filter out unwanted properties of str.format(), because "format" is user provided.
			
 
				 
			
@@ -556,7 +572,7 @@ def format_line(format, data):
 
				     except (KeyError, ValueError) as e:
			
 
				         # this should catch format errors
			
 
				         print('Error in lineformat: "{}" - reason "{}"'.format(format, str(e)))
			
 
				-    except:
			
 
				+    except Exception as e:
			
 
				         # something unexpected, print error and raise exception
			
 
				         print('Error in lineformat: "{}" - reason "{}"'.format(format, str(e)))
			
 
				         raise
			
@@ -1090,3 +1106,141 @@ def log_multi(*msgs, level=logging.INFO):
 
				         lines.extend(msg.splitlines())
			
 
				     for line in lines:
			
 
				         logger.log(level, line)
			
 
				+
			
 
				+
			
 
				+class ItemFormatter:
			
 
				+    FIXED_KEYS = {
			
 
				+        # Formatting aids
			
 
				+        'LF': '\n',
			
 
				+        'SPACE': ' ',
			
 
				+        'TAB': '\t',
			
 
				+        'CR': '\r',
			
 
				+        'NUL': '\0',
			
 
				+        'NEWLINE': os.linesep,
			
 
				+        'NL': os.linesep,
			
 
				+    }
			
 
				+    KEY_DESCRIPTIONS = {
			
 
				+        'NEWLINE': 'OS dependent line separator',
			
 
				+        'NL': 'alias of NEWLINE',
			
 
				+        'NUL': 'NUL character for creating print0 / xargs -0 like ouput, see bpath',
			
 
				+        'csize': 'compressed size',
			
 
				+        'bpath': 'verbatim POSIX path, can contain any character except NUL',
			
 
				+        'path': 'path interpreted as text (might be missing non-text characters, see bpath)',
			
 
				+        'source': 'link target for links (identical to linktarget)',
			
 
				+        'num_chunks': 'number of chunks in this file',
			
 
				+        'unique_chunks': 'number of unique chunks in this file',
			
 
				+    }
			
 
				+
			
 
				+    @classmethod
			
 
				+    def available_keys(cls):
			
 
				+        class FakeArchive:
			
 
				+            fpr = name = ""
			
 
				+
			
 
				+        fake_item = {
			
 
				+            b'mode': 0, b'path': '', b'user': '', b'group': '', b'mtime': 0,
			
 
				+            b'uid': 0, b'gid': 0,
			
 
				+        }
			
 
				+        formatter = cls(FakeArchive, "")
			
 
				+        keys = []
			
 
				+        keys.extend(formatter.call_keys.keys())
			
 
				+        keys.extend(formatter.get_item_data(fake_item).keys())
			
 
				+        return sorted(keys, key=lambda s: (s.isupper(), s))
			
 
				+
			
 
				+    @classmethod
			
 
				+    def keys_help(cls):
			
 
				+        help = []
			
 
				+        for key in cls.available_keys():
			
 
				+            text = " - " + key
			
 
				+            if key in cls.KEY_DESCRIPTIONS:
			
 
				+                text += ": " + cls.KEY_DESCRIPTIONS[key]
			
 
				+            help.append(text)
			
 
				+        return "\n".join(help)
			
 
				+
			
 
				+    def __init__(self, archive, format):
			
 
				+        self.archive = archive
			
 
				+        static_keys = {
			
 
				+            'archivename': archive.name,
			
 
				+            'archiveid': archive.fpr,
			
 
				+        }
			
 
				+        static_keys.update(self.FIXED_KEYS)
			
 
				+        self.format = partial_format(format, static_keys)
			
 
				+        self.format_keys = {f[1] for f in Formatter().parse(format)}
			
 
				+        self.call_keys = {
			
 
				+            'size': self.calculate_size,
			
 
				+            'csize': self.calculate_csize,
			
 
				+            'num_chunks': self.calculate_num_chunks,
			
 
				+            'unique_chunks': self.calculate_unique_chunks,
			
 
				+            'isomtime': partial(self.format_time, b'mtime'),
			
 
				+            'isoctime': partial(self.format_time, b'ctime'),
			
 
				+            'isoatime': partial(self.format_time, b'atime'),
			
 
				+            'mtime': partial(self.time, b'mtime'),
			
 
				+            'ctime': partial(self.time, b'ctime'),
			
 
				+            'atime': partial(self.time, b'atime'),
			
 
				+        }
			
 
				+        for hash_function in hashlib.algorithms_guaranteed:
			
 
				+            self.add_key(hash_function, partial(self.hash_item, hash_function))
			
 
				+        self.used_call_keys = set(self.call_keys) & self.format_keys
			
 
				+        self.item_data = static_keys
			
 
				+
			
 
				+    def add_key(self, key, callable_with_item):
			
 
				+        self.call_keys[key] = callable_with_item
			
 
				+        self.used_call_keys = set(self.call_keys) & self.format_keys
			
 
				+
			
 
				+    def get_item_data(self, item):
			
 
				+        mode = stat.filemode(item[b'mode'])
			
 
				+        item_type = mode[0]
			
 
				+        item_data = self.item_data
			
 
				+
			
 
				+        source = item.get(b'source', '')
			
 
				+        extra = ''
			
 
				+        if source:
			
 
				+            source = remove_surrogates(source)
			
 
				+            if item_type == 'l':
			
 
				+                extra = ' -> %s' % source
			
 
				+            else:
			
 
				+                mode = 'h' + mode[1:]
			
 
				+                extra = ' link to %s' % source
			
 
				+        item_data['type'] = item_type
			
 
				+        item_data['mode'] = mode
			
 
				+        item_data['user'] = item[b'user'] or item[b'uid']
			
 
				+        item_data['group'] = item[b'group'] or item[b'gid']
			
 
				+        item_data['uid'] = item[b'uid']
			
 
				+        item_data['gid'] = item[b'gid']
			
 
				+        item_data['path'] = remove_surrogates(item[b'path'])
			
 
				+        item_data['bpath'] = item[b'path']
			
 
				+        item_data['source'] = source
			
 
				+        item_data['linktarget'] = source
			
 
				+        item_data['extra'] = extra
			
 
				+        for key in self.used_call_keys:
			
 
				+            item_data[key] = self.call_keys[key](item)
			
 
				+        return item_data
			
 
				+
			
 
				+    def format_item(self, item):
			
 
				+        return self.format.format_map(self.get_item_data(item))
			
 
				+
			
 
				+    def calculate_num_chunks(self, item):
			
 
				+        return len(item.get(b'chunks', []))
			
 
				+
			
 
				+    def calculate_unique_chunks(self, item):
			
 
				+        chunk_index = self.archive.cache.chunks
			
 
				+        return sum(1 for chunk_id, _, _ in item.get(b'chunks', []) if chunk_index[chunk_id][0] == 1)
			
 
				+
			
 
				+    def calculate_size(self, item):
			
 
				+        return sum(size for _, size, _ in item.get(b'chunks', []))
			
 
				+
			
 
				+    def calculate_csize(self, item):
			
 
				+        return sum(csize for _, _, csize in item.get(b'chunks', []))
			
 
				+
			
 
				+    def hash_item(self, hash_function, item):
			
 
				+        if b'chunks' not in item:
			
 
				+            return ""
			
 
				+        hash = hashlib.new(hash_function)
			
 
				+        for chunk in self.archive.pipeline.fetch_many([c[0] for c in item[b'chunks']]):
			
 
				+            hash.update(chunk)
			
 
				+        return hash.hexdigest()
			
 
				+
			
 
				+    def format_time(self, key, item):
			
 
				+        return format_time(safe_timestamp(item.get(key) or item[b'mtime']))
			
 
				+
			
 
				+    def time(self, key, item):
			
 
				+        return safe_timestamp(item.get(key) or item[b'mtime'])
			
--- a/borg/testsuite/archiver.py
+++ b/borg/testsuite/archiver.py
@@ -892,16 +892,50 @@ class ArchiverTestCase(ArchiverTestCaseBase):
 
				         self.assert_in('test-2', output)
			
 
				         self.assert_not_in('something-else', output)
			
 
				 
			
 
				-    def test_list_list_format(self):
			
 
				+    def test_list_format(self):
			
 
				         self.cmd('init', self.repository_location)
			
 
				         test_archive = self.repository_location + '::test'
			
 
				         self.cmd('create', test_archive, src_dir)
			
 
				+        self.cmd('list', '--list-format', '-', test_archive, exit_code=1)
			
 
				+        self.archiver.exit_code = 0  # reset exit code for following tests
			
 
				         output_1 = self.cmd('list', test_archive)
			
 
				-        output_2 = self.cmd('list', '--list-format', '{mode} {user:6} {group:6} {size:8d} {isomtime} {path}{extra}{NEWLINE}', test_archive)
			
 
				-        output_3 = self.cmd('list', '--list-format', '{mtime:%s} {path}{NL}', test_archive)
			
 
				+        output_2 = self.cmd('list', '--format', '{mode} {user:6} {group:6} {size:8d} {isomtime} {path}{extra}{NEWLINE}', test_archive)
			
 
				+        output_3 = self.cmd('list', '--format', '{mtime:%s} {path}{NL}', test_archive)
			
 
				         self.assertEqual(output_1, output_2)
			
 
				         self.assertNotEqual(output_1, output_3)
			
 
				 
			
 
				+    def test_list_hash(self):
			
 
				+        self.create_regular_file('empty_file', size=0)
			
 
				+        self.create_regular_file('amb', contents=b'a' * 1000000)
			
 
				+        self.cmd('init', self.repository_location)
			
 
				+        test_archive = self.repository_location + '::test'
			
 
				+        self.cmd('create', test_archive, 'input')
			
 
				+        output = self.cmd('list', '--format', '{sha256} {path}{NL}', test_archive)
			
 
				+        assert "cdc76e5c9914fb9281a1c7e284d73e67f1809a48a497200e046d39ccc7112cd0 input/amb" in output
			
 
				+        assert "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 input/empty_file" in output
			
 
				+
			
 
				+    def test_list_chunk_counts(self):
			
 
				+        self.create_regular_file('empty_file', size=0)
			
 
				+        self.create_regular_file('two_chunks')
			
 
				+        with open(os.path.join(self.input_path, 'two_chunks'), 'wb') as fd:
			
 
				+            fd.write(b'abba' * 2000000)
			
 
				+            fd.write(b'baab' * 2000000)
			
 
				+        self.cmd('init', self.repository_location)
			
 
				+        test_archive = self.repository_location + '::test'
			
 
				+        self.cmd('create', test_archive, 'input')
			
 
				+        output = self.cmd('list', '--format', '{num_chunks} {unique_chunks} {path}{NL}', test_archive)
			
 
				+        assert "0 0 input/empty_file" in output
			
 
				+        assert "2 2 input/two_chunks" in output
			
 
				+
			
 
				+    def test_list_size(self):
			
 
				+        self.create_regular_file('compressible_file', size=10000)
			
 
				+        self.cmd('init', self.repository_location)
			
 
				+        test_archive = self.repository_location + '::test'
			
 
				+        self.cmd('create', '-C', 'lz4', test_archive, 'input')
			
 
				+        output = self.cmd('list', '--format', '{size} {csize} {path}{NL}', test_archive)
			
 
				+        size, csize, path = output.split("\n")[1].split(" ")
			
 
				+        assert int(csize) < int(size)
			
 
				+
			
 
				     def test_break_lock(self):
			
 
				         self.cmd('init', self.repository_location)
			
 
				         self.cmd('break-lock', self.repository_location)
			
--- a/borg/testsuite/helpers.py
+++ b/borg/testsuite/helpers.py
@@ -15,7 +15,7 @@ from ..helpers import Location, format_file_size, format_timedelta, make_path_sa
 
				     yes, TRUISH, FALSISH, DEFAULTISH, \
			
 
				     StableDict, int_to_bigint, bigint_to_int, parse_timestamp, CompressionSpec, ChunkerParams, \
			
 
				     ProgressIndicatorPercent, ProgressIndicatorEndless, load_excludes, parse_pattern, \
			
 
				-    PatternMatcher, RegexPattern, PathPrefixPattern, FnmatchPattern, ShellPattern
			
 
				+    PatternMatcher, RegexPattern, PathPrefixPattern, FnmatchPattern, ShellPattern, partial_format
			
 
				 from . import BaseTestCase, environment_variable, FakeInputs
			
 
				 
			
 
				 
			
@@ -877,3 +877,11 @@ def test_progress_endless_step(capfd):
 
				     pi.show()
			
 
				     out, err = capfd.readouterr()
			
 
				     assert err == '.'
			
 
				+
			
 
				+
			
 
				+def test_partial_format():
			
 
				+    assert partial_format('{space:10}', {'space': ' '}) == ' ' * 10
			
 
				+    assert partial_format('{foobar}', {'bar': 'wrong', 'foobar': 'correct'}) == 'correct'
			
 
				+    assert partial_format('{unknown_key}', {}) == '{unknown_key}'
			
 
				+    assert partial_format('{key}{{escaped_key}}', {}) == '{key}{{escaped_key}}'
			
 
				+    assert partial_format('{{escaped_key}}', {'escaped_key': 1234}) == '{{escaped_key}}'