浏览代码

create: implement --paths-from-stdin and --paths-from-command (#5538)

create: implement --paths-from-stdin and --paths-from-command, see #5492

These switches read paths to archive from stdin. Delimiter can specified
by --paths-delimiter=DELIM. Paths read will be added honoring every
option but exclusion options and --one-file-system. Directories aren't
recursed into.
Lapinot 4 年之前
父节点
当前提交
e1af909d2b
共有 5 个文件被更改,包括 135 次插入3 次删除
  1. 45 3
      src/borg/archiver.py
  2. 17 0
      src/borg/helpers/misc.py
  3. 5 0
      src/borg/helpers/parseformat.py
  4. 41 0
      src/borg/testsuite/archiver.py
  5. 27 0
      src/borg/testsuite/helpers.py

+ 45 - 3
src/borg/archiver.py

@@ -27,6 +27,7 @@ try:
     from binascii import unhexlify
     from binascii import unhexlify
     from contextlib import contextmanager
     from contextlib import contextmanager
     from datetime import datetime, timedelta
     from datetime import datetime, timedelta
+    from io import TextIOWrapper
 
 
     from .logger import create_logger, setup_logging
     from .logger import create_logger, setup_logging
 
 
@@ -51,7 +52,7 @@ try:
     from .helpers import PrefixSpec, GlobSpec, CommentSpec, SortBySpec, FilesCacheMode
     from .helpers import PrefixSpec, GlobSpec, CommentSpec, SortBySpec, FilesCacheMode
     from .helpers import BaseFormatter, ItemFormatter, ArchiveFormatter
     from .helpers import BaseFormatter, ItemFormatter, ArchiveFormatter
     from .helpers import format_timedelta, format_file_size, parse_file_size, format_archive
     from .helpers import format_timedelta, format_file_size, parse_file_size, format_archive
-    from .helpers import safe_encode, remove_surrogates, bin_to_hex, prepare_dump_dict
+    from .helpers import safe_encode, remove_surrogates, bin_to_hex, prepare_dump_dict, eval_escapes
     from .helpers import interval, prune_within, prune_split, PRUNING_PATTERNS
     from .helpers import interval, prune_within, prune_split, PRUNING_PATTERNS
     from .helpers import timestamp
     from .helpers import timestamp
     from .helpers import get_cache_dir, os_stat
     from .helpers import get_cache_dir, os_stat
@@ -73,6 +74,7 @@ try:
     from .helpers import flags_root, flags_dir, flags_special_follow, flags_special
     from .helpers import flags_root, flags_dir, flags_special_follow, flags_special
     from .helpers import msgpack
     from .helpers import msgpack
     from .helpers import sig_int
     from .helpers import sig_int
+    from .helpers import iter_separated
     from .nanorst import rst_to_terminal
     from .nanorst import rst_to_terminal
     from .patterns import ArgparsePatternAction, ArgparseExcludeFileAction, ArgparsePatternFileAction, parse_exclude_pattern
     from .patterns import ArgparsePatternAction, ArgparseExcludeFileAction, ArgparsePatternFileAction, parse_exclude_pattern
     from .patterns import PatternMatcher
     from .patterns import PatternMatcher
@@ -533,6 +535,37 @@ class Archiver:
                 else:
                 else:
                     status = '-'
                     status = '-'
                 self.print_file_status(status, path)
                 self.print_file_status(status, path)
+            elif args.paths_from_command or args.paths_from_stdin:
+                paths_sep = eval_escapes(args.paths_delimiter) if args.paths_delimiter is not None else '\n'
+                if args.paths_from_command:
+                    try:
+                        proc = subprocess.Popen(args.paths, stdout=subprocess.PIPE)
+                    except (FileNotFoundError, PermissionError) as e:
+                        self.print_error('Failed to execute command: %s', e)
+                        return self.exit_code
+                    pipe_bin = proc.stdout
+                else:  # args.paths_from_stdin == True
+                    pipe_bin = sys.stdin.buffer
+                pipe = TextIOWrapper(pipe_bin, errors='surrogateescape')
+                for path in iter_separated(pipe, paths_sep):
+                    try:
+                        with backup_io('stat'):
+                            st = os_stat(path=path, parent_fd=None, name=None, follow_symlinks=False)
+                        status = self._process_any(path=path, parent_fd=None, name=None, st=st, fso=fso,
+                                                   cache=cache, read_special=args.read_special, dry_run=dry_run)
+                    except (BackupOSError, BackupError) as e:
+                        self.print_warning('%s: %s', path, e)
+                        status = 'E'
+                    if status == 'C':
+                        self.print_warning('%s: file changed while we backed it up', path)
+                    if status is None:
+                        status = '?'
+                    self.print_file_status(status, path)
+                if args.paths_from_command:
+                    rc = proc.wait()
+                    if rc != 0:
+                        self.print_error('Command %r exited with status %d', args.paths[0], rc)
+                        return self.exit_code
             else:
             else:
                 for path in args.paths:
                 for path in args.paths:
                     if path == '-':  # stdin
                     if path == '-':  # stdin
@@ -3277,6 +3310,13 @@ class Archiver:
         subparser.add_argument('--content-from-command', action='store_true',
         subparser.add_argument('--content-from-command', action='store_true',
                                help='interpret PATH as command and store its stdout. See also section Reading from'
                                help='interpret PATH as command and store its stdout. See also section Reading from'
                                     ' stdin below.')
                                     ' stdin below.')
+        subparser.add_argument('--paths-from-stdin', action='store_true',
+                               help='read DELIM-separated list of paths to backup from stdin. Will not '
+                                    'recurse into directories.')
+        subparser.add_argument('--paths-from-command', action='store_true',
+                               help='interpret PATH as command and treat its output as ``--paths-from-stdin``')
+        subparser.add_argument('--paths-delimiter', metavar='DELIM',
+                               help='set path delimiter for ``--paths-from-stdin`` and ``--paths-from-command`` (default: \\n) ')
 
 
         exclude_group = define_exclusion_group(subparser, tag_files=True)
         exclude_group = define_exclusion_group(subparser, tag_files=True)
         exclude_group.add_argument('--exclude-nodump', dest='exclude_nodump', action='store_true',
         exclude_group.add_argument('--exclude-nodump', dest='exclude_nodump', action='store_true',
@@ -4522,10 +4562,12 @@ class Archiver:
         args = parser.parse_args(args or ['-h'])
         args = parser.parse_args(args or ['-h'])
         parser.common_options.resolve(args)
         parser.common_options.resolve(args)
         func = get_func(args)
         func = get_func(args)
+        if func == self.do_create and args.paths and args.paths_from_stdin:
+            parser.error('Must not pass PATH with ``--paths-from-stdin``.')
         if func == self.do_create and not args.paths:
         if func == self.do_create and not args.paths:
-            if args.content_from_command:
+            if args.content_from_command or args.paths_from_command:
                 parser.error('No command given.')
                 parser.error('No command given.')
-            else:
+            elif not args.paths_from_stdin:
                 # need at least 1 path but args.paths may also be populated from patterns
                 # need at least 1 path but args.paths may also be populated from patterns
                 parser.error('Need at least one PATH argument.')
                 parser.error('Need at least one PATH argument.')
         if not getattr(args, 'lock', True):  # Option --bypass-lock sets args.lock = False
         if not getattr(args, 'lock', True):  # Option --bypass-lock sets args.lock = False

+ 17 - 0
src/borg/helpers/misc.py

@@ -213,3 +213,20 @@ class ErrorIgnoringTextIOWrapper(io.TextIOWrapper):
                 except OSError:
                 except OSError:
                     pass
                     pass
         return len(s)
         return len(s)
+
+
+def iter_separated(fd, sep=None, read_size=4096):
+    """Iter over chunks of open file ``fd`` delimited by ``sep``. Doesn't trim."""
+    buf = fd.read(read_size)
+    is_str = isinstance(buf, str)
+    part = '' if is_str else b''
+    sep = sep or ('\n' if is_str else b'\n')
+    while len(buf) > 0:
+        part2, *items = buf.split(sep)
+        *full, part = (part + part2, *items)
+        yield from full
+        buf = fd.read(read_size)
+    # won't yield an empty part if stream ended with `sep`
+    # or if there was no data before EOF
+    if len(part) > 0:
+        yield part

+ 5 - 0
src/borg/helpers/parseformat.py

@@ -48,6 +48,11 @@ def remove_surrogates(s, errors='replace'):
     return s.encode('utf-8', errors).decode('utf-8')
     return s.encode('utf-8', errors).decode('utf-8')
 
 
 
 
+def eval_escapes(s):
+    """Evaluate literal escape sequences in a string (eg `\\n` -> `\n`)."""
+    return s.encode('ascii', 'backslashreplace').decode('unicode-escape')
+
+
 def decode_dict(d, keys, encoding='utf-8', errors='surrogateescape'):
 def decode_dict(d, keys, encoding='utf-8', errors='surrogateescape'):
     for key in keys:
     for key in keys:
         if isinstance(d.get(key), bytes):
         if isinstance(d.get(key), bytes):

+ 41 - 0
src/borg/testsuite/archiver.py

@@ -1065,6 +1065,47 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         output = self.cmd('create', '--content-from-command', self.repository_location + '::test', exit_code=2)
         output = self.cmd('create', '--content-from-command', self.repository_location + '::test', exit_code=2)
         assert output.endswith('No command given.\n')
         assert output.endswith('No command given.\n')
 
 
+    def test_create_paths_from_stdin(self):
+        self.cmd('init', '--encryption=repokey', self.repository_location)
+        self.create_regular_file("file1", size=1024 * 80)
+        self.create_regular_file("dir1/file2", size=1024 * 80)
+        self.create_regular_file("dir1/file3", size=1024 * 80)
+        self.create_regular_file("file4", size=1024 * 80)
+
+        input_data = b'input/file1\0input/dir1\0input/file4'
+        self.cmd('create', '--paths-from-stdin', '--paths-delimiter', '\\0',
+                 self.repository_location + '::test', input=input_data)
+        archive_list = self.cmd('list', '--json-lines', self.repository_location + '::test')
+        paths = [json.loads(line)['path'] for line in archive_list.split('\n') if line]
+        assert paths == ['input/file1', 'input/dir1', 'input/file4']
+
+    def test_create_paths_from_command(self):
+        self.cmd('init', '--encryption=repokey', self.repository_location)
+        self.create_regular_file("file1", size=1024 * 80)
+        self.create_regular_file("file2", size=1024 * 80)
+        self.create_regular_file("file3", size=1024 * 80)
+        self.create_regular_file("file4", size=1024 * 80)
+
+        input_data = 'input/file1\ninput/file2\ninput/file3'
+        self.cmd('create', '--paths-from-command',
+                 self.repository_location + '::test', '--', 'echo', input_data)
+        archive_list = self.cmd('list', '--json-lines', self.repository_location + '::test')
+        paths = [json.loads(line)['path'] for line in archive_list.split('\n') if line]
+        assert paths == ['input/file1', 'input/file2', 'input/file3']
+
+    def test_create_paths_from_command_with_failed_command(self):
+        self.cmd('init', '--encryption=repokey', self.repository_location)
+        output = self.cmd('create', '--paths-from-command', self.repository_location + '::test',
+                          '--', 'sh', '-c', 'exit 73;', exit_code=2)
+        assert output.endswith("Command 'sh' exited with status 73\n")
+        archive_list = json.loads(self.cmd('list', '--json', self.repository_location))
+        assert archive_list['archives'] == []
+
+    def test_create_paths_from_command_missing_command(self):
+        self.cmd('init', '--encryption=repokey', self.repository_location)
+        output = self.cmd('create', '--paths-from-command', self.repository_location + '::test', exit_code=2)
+        assert output.endswith('No command given.\n')
+
     def test_create_without_root(self):
     def test_create_without_root(self):
         """test create without a root"""
         """test create without a root"""
         self.cmd('init', '--encryption=repokey', self.repository_location)
         self.cmd('init', '--encryption=repokey', self.repository_location)

+ 27 - 0
src/borg/testsuite/helpers.py

@@ -4,6 +4,7 @@ import shutil
 import sys
 import sys
 from argparse import ArgumentTypeError
 from argparse import ArgumentTypeError
 from datetime import datetime, timezone, timedelta
 from datetime import datetime, timezone, timedelta
+from io import StringIO, BytesIO
 from time import sleep
 from time import sleep
 
 
 import pytest
 import pytest
@@ -27,6 +28,8 @@ from ..helpers import chunkit
 from ..helpers import safe_ns, safe_s, SUPPORT_32BIT_PLATFORMS
 from ..helpers import safe_ns, safe_s, SUPPORT_32BIT_PLATFORMS
 from ..helpers import popen_with_error_handling
 from ..helpers import popen_with_error_handling
 from ..helpers import dash_open
 from ..helpers import dash_open
+from ..helpers import iter_separated
+from ..helpers import eval_escapes
 
 
 from . import BaseTestCase, FakeInputs
 from . import BaseTestCase, FakeInputs
 
 
@@ -1022,3 +1025,27 @@ def test_dash_open():
     assert dash_open('-', 'w') is sys.stdout
     assert dash_open('-', 'w') is sys.stdout
     assert dash_open('-', 'rb') is sys.stdin.buffer
     assert dash_open('-', 'rb') is sys.stdin.buffer
     assert dash_open('-', 'wb') is sys.stdout.buffer
     assert dash_open('-', 'wb') is sys.stdout.buffer
+
+
+def test_iter_separated():
+    # newline and utf-8
+    sep, items = '\n', ['foo', 'bar/baz', 'αáčő']
+    fd = StringIO(sep.join(items))
+    assert list(iter_separated(fd)) == items
+    # null and bogus ending
+    sep, items = '\0', ['foo/bar', 'baz', 'spam']
+    fd = StringIO(sep.join(items) + '\0')
+    assert list(iter_separated(fd, sep=sep)) == ['foo/bar', 'baz', 'spam']
+    # multichar
+    sep, items = 'SEP', ['foo/bar', 'baz', 'spam']
+    fd = StringIO(sep.join(items))
+    assert list(iter_separated(fd, sep=sep)) == items
+    # bytes
+    sep, items = b'\n', [b'foo', b'blop\t', b'gr\xe4ezi']
+    fd = BytesIO(sep.join(items))
+    assert list(iter_separated(fd)) == items
+
+
+def test_eval_escapes():
+    assert eval_escapes('\\n\\0\\x23') == '\n\0#'
+    assert eval_escapes('äç\\n') == 'äç\n'