Bladeren bron

create: implement --paths-from-stdin and --paths-from-command (#5538)

create: implement --paths-from-stdin and --paths-from-command, see #5492

These switches read paths to archive from stdin. Delimiter can specified
by --paths-delimiter=DELIM. Paths read will be added honoring every
option but exclusion options and --one-file-system. Directories aren't
recursed into.
Lapinot 4 jaren geleden
bovenliggende
commit
e1af909d2b
5 gewijzigde bestanden met toevoegingen van 135 en 3 verwijderingen
  1. 45 3
      src/borg/archiver.py
  2. 17 0
      src/borg/helpers/misc.py
  3. 5 0
      src/borg/helpers/parseformat.py
  4. 41 0
      src/borg/testsuite/archiver.py
  5. 27 0
      src/borg/testsuite/helpers.py

+ 45 - 3
src/borg/archiver.py

@@ -27,6 +27,7 @@ try:
     from binascii import unhexlify
     from contextlib import contextmanager
     from datetime import datetime, timedelta
+    from io import TextIOWrapper
 
     from .logger import create_logger, setup_logging
 
@@ -51,7 +52,7 @@ try:
     from .helpers import PrefixSpec, GlobSpec, CommentSpec, SortBySpec, FilesCacheMode
     from .helpers import BaseFormatter, ItemFormatter, ArchiveFormatter
     from .helpers import format_timedelta, format_file_size, parse_file_size, format_archive
-    from .helpers import safe_encode, remove_surrogates, bin_to_hex, prepare_dump_dict
+    from .helpers import safe_encode, remove_surrogates, bin_to_hex, prepare_dump_dict, eval_escapes
     from .helpers import interval, prune_within, prune_split, PRUNING_PATTERNS
     from .helpers import timestamp
     from .helpers import get_cache_dir, os_stat
@@ -73,6 +74,7 @@ try:
     from .helpers import flags_root, flags_dir, flags_special_follow, flags_special
     from .helpers import msgpack
     from .helpers import sig_int
+    from .helpers import iter_separated
     from .nanorst import rst_to_terminal
     from .patterns import ArgparsePatternAction, ArgparseExcludeFileAction, ArgparsePatternFileAction, parse_exclude_pattern
     from .patterns import PatternMatcher
@@ -533,6 +535,37 @@ class Archiver:
                 else:
                     status = '-'
                 self.print_file_status(status, path)
+            elif args.paths_from_command or args.paths_from_stdin:
+                paths_sep = eval_escapes(args.paths_delimiter) if args.paths_delimiter is not None else '\n'
+                if args.paths_from_command:
+                    try:
+                        proc = subprocess.Popen(args.paths, stdout=subprocess.PIPE)
+                    except (FileNotFoundError, PermissionError) as e:
+                        self.print_error('Failed to execute command: %s', e)
+                        return self.exit_code
+                    pipe_bin = proc.stdout
+                else:  # args.paths_from_stdin == True
+                    pipe_bin = sys.stdin.buffer
+                pipe = TextIOWrapper(pipe_bin, errors='surrogateescape')
+                for path in iter_separated(pipe, paths_sep):
+                    try:
+                        with backup_io('stat'):
+                            st = os_stat(path=path, parent_fd=None, name=None, follow_symlinks=False)
+                        status = self._process_any(path=path, parent_fd=None, name=None, st=st, fso=fso,
+                                                   cache=cache, read_special=args.read_special, dry_run=dry_run)
+                    except (BackupOSError, BackupError) as e:
+                        self.print_warning('%s: %s', path, e)
+                        status = 'E'
+                    if status == 'C':
+                        self.print_warning('%s: file changed while we backed it up', path)
+                    if status is None:
+                        status = '?'
+                    self.print_file_status(status, path)
+                if args.paths_from_command:
+                    rc = proc.wait()
+                    if rc != 0:
+                        self.print_error('Command %r exited with status %d', args.paths[0], rc)
+                        return self.exit_code
             else:
                 for path in args.paths:
                     if path == '-':  # stdin
@@ -3277,6 +3310,13 @@ class Archiver:
         subparser.add_argument('--content-from-command', action='store_true',
                                help='interpret PATH as command and store its stdout. See also section Reading from'
                                     ' stdin below.')
+        subparser.add_argument('--paths-from-stdin', action='store_true',
+                               help='read DELIM-separated list of paths to backup from stdin. Will not '
+                                    'recurse into directories.')
+        subparser.add_argument('--paths-from-command', action='store_true',
+                               help='interpret PATH as command and treat its output as ``--paths-from-stdin``')
+        subparser.add_argument('--paths-delimiter', metavar='DELIM',
+                               help='set path delimiter for ``--paths-from-stdin`` and ``--paths-from-command`` (default: \\n) ')
 
         exclude_group = define_exclusion_group(subparser, tag_files=True)
         exclude_group.add_argument('--exclude-nodump', dest='exclude_nodump', action='store_true',
@@ -4522,10 +4562,12 @@ class Archiver:
         args = parser.parse_args(args or ['-h'])
         parser.common_options.resolve(args)
         func = get_func(args)
+        if func == self.do_create and args.paths and args.paths_from_stdin:
+            parser.error('Must not pass PATH with ``--paths-from-stdin``.')
         if func == self.do_create and not args.paths:
-            if args.content_from_command:
+            if args.content_from_command or args.paths_from_command:
                 parser.error('No command given.')
-            else:
+            elif not args.paths_from_stdin:
                 # need at least 1 path but args.paths may also be populated from patterns
                 parser.error('Need at least one PATH argument.')
         if not getattr(args, 'lock', True):  # Option --bypass-lock sets args.lock = False

+ 17 - 0
src/borg/helpers/misc.py

@@ -213,3 +213,20 @@ class ErrorIgnoringTextIOWrapper(io.TextIOWrapper):
                 except OSError:
                     pass
         return len(s)
+
+
+def iter_separated(fd, sep=None, read_size=4096):
+    """Iter over chunks of open file ``fd`` delimited by ``sep``. Doesn't trim."""
+    buf = fd.read(read_size)
+    is_str = isinstance(buf, str)
+    part = '' if is_str else b''
+    sep = sep or ('\n' if is_str else b'\n')
+    while len(buf) > 0:
+        part2, *items = buf.split(sep)
+        *full, part = (part + part2, *items)
+        yield from full
+        buf = fd.read(read_size)
+    # won't yield an empty part if stream ended with `sep`
+    # or if there was no data before EOF
+    if len(part) > 0:
+        yield part

+ 5 - 0
src/borg/helpers/parseformat.py

@@ -48,6 +48,11 @@ def remove_surrogates(s, errors='replace'):
     return s.encode('utf-8', errors).decode('utf-8')
 
 
+def eval_escapes(s):
+    """Evaluate literal escape sequences in a string (eg `\\n` -> `\n`)."""
+    return s.encode('ascii', 'backslashreplace').decode('unicode-escape')
+
+
 def decode_dict(d, keys, encoding='utf-8', errors='surrogateescape'):
     for key in keys:
         if isinstance(d.get(key), bytes):

+ 41 - 0
src/borg/testsuite/archiver.py

@@ -1065,6 +1065,47 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         output = self.cmd('create', '--content-from-command', self.repository_location + '::test', exit_code=2)
         assert output.endswith('No command given.\n')
 
+    def test_create_paths_from_stdin(self):
+        self.cmd('init', '--encryption=repokey', self.repository_location)
+        self.create_regular_file("file1", size=1024 * 80)
+        self.create_regular_file("dir1/file2", size=1024 * 80)
+        self.create_regular_file("dir1/file3", size=1024 * 80)
+        self.create_regular_file("file4", size=1024 * 80)
+
+        input_data = b'input/file1\0input/dir1\0input/file4'
+        self.cmd('create', '--paths-from-stdin', '--paths-delimiter', '\\0',
+                 self.repository_location + '::test', input=input_data)
+        archive_list = self.cmd('list', '--json-lines', self.repository_location + '::test')
+        paths = [json.loads(line)['path'] for line in archive_list.split('\n') if line]
+        assert paths == ['input/file1', 'input/dir1', 'input/file4']
+
+    def test_create_paths_from_command(self):
+        self.cmd('init', '--encryption=repokey', self.repository_location)
+        self.create_regular_file("file1", size=1024 * 80)
+        self.create_regular_file("file2", size=1024 * 80)
+        self.create_regular_file("file3", size=1024 * 80)
+        self.create_regular_file("file4", size=1024 * 80)
+
+        input_data = 'input/file1\ninput/file2\ninput/file3'
+        self.cmd('create', '--paths-from-command',
+                 self.repository_location + '::test', '--', 'echo', input_data)
+        archive_list = self.cmd('list', '--json-lines', self.repository_location + '::test')
+        paths = [json.loads(line)['path'] for line in archive_list.split('\n') if line]
+        assert paths == ['input/file1', 'input/file2', 'input/file3']
+
+    def test_create_paths_from_command_with_failed_command(self):
+        self.cmd('init', '--encryption=repokey', self.repository_location)
+        output = self.cmd('create', '--paths-from-command', self.repository_location + '::test',
+                          '--', 'sh', '-c', 'exit 73;', exit_code=2)
+        assert output.endswith("Command 'sh' exited with status 73\n")
+        archive_list = json.loads(self.cmd('list', '--json', self.repository_location))
+        assert archive_list['archives'] == []
+
+    def test_create_paths_from_command_missing_command(self):
+        self.cmd('init', '--encryption=repokey', self.repository_location)
+        output = self.cmd('create', '--paths-from-command', self.repository_location + '::test', exit_code=2)
+        assert output.endswith('No command given.\n')
+
     def test_create_without_root(self):
         """test create without a root"""
         self.cmd('init', '--encryption=repokey', self.repository_location)

+ 27 - 0
src/borg/testsuite/helpers.py

@@ -4,6 +4,7 @@ import shutil
 import sys
 from argparse import ArgumentTypeError
 from datetime import datetime, timezone, timedelta
+from io import StringIO, BytesIO
 from time import sleep
 
 import pytest
@@ -27,6 +28,8 @@ from ..helpers import chunkit
 from ..helpers import safe_ns, safe_s, SUPPORT_32BIT_PLATFORMS
 from ..helpers import popen_with_error_handling
 from ..helpers import dash_open
+from ..helpers import iter_separated
+from ..helpers import eval_escapes
 
 from . import BaseTestCase, FakeInputs
 
@@ -1022,3 +1025,27 @@ def test_dash_open():
     assert dash_open('-', 'w') is sys.stdout
     assert dash_open('-', 'rb') is sys.stdin.buffer
     assert dash_open('-', 'wb') is sys.stdout.buffer
+
+
+def test_iter_separated():
+    # newline and utf-8
+    sep, items = '\n', ['foo', 'bar/baz', 'αáčő']
+    fd = StringIO(sep.join(items))
+    assert list(iter_separated(fd)) == items
+    # null and bogus ending
+    sep, items = '\0', ['foo/bar', 'baz', 'spam']
+    fd = StringIO(sep.join(items) + '\0')
+    assert list(iter_separated(fd, sep=sep)) == ['foo/bar', 'baz', 'spam']
+    # multichar
+    sep, items = 'SEP', ['foo/bar', 'baz', 'spam']
+    fd = StringIO(sep.join(items))
+    assert list(iter_separated(fd, sep=sep)) == items
+    # bytes
+    sep, items = b'\n', [b'foo', b'blop\t', b'gr\xe4ezi']
+    fd = BytesIO(sep.join(items))
+    assert list(iter_separated(fd)) == items
+
+
+def test_eval_escapes():
+    assert eval_escapes('\\n\\0\\x23') == '\n\0#'
+    assert eval_escapes('äç\\n') == 'äç\n'