Browse Source

create: add the slashdot hack, fixes #4685

Thomas Waldmann 1 year ago
parent
commit
e7bd18d7f3

+ 3 - 0
docs/usage/create.rst

@@ -10,6 +10,9 @@ Examples
     # same, but list all files as we process them
     $ borg create --list my-documents ~/Documents
 
+    # Backup /mnt/disk/docs, but strip path prefix using the slashdot hack
+    $ borg create /path/to/repo::docs /mnt/disk/./docs
+
     # Backup ~/Documents and ~/src but exclude pyc files
     $ borg create my-files                \
         ~/Documents                       \

+ 64 - 14
src/borg/archive.py

@@ -1361,7 +1361,16 @@ class FilesystemObjectProcessors:
         self.chunker = get_chunker(*chunker_params, seed=key.chunk_seed, sparse=sparse)
 
     @contextmanager
-    def create_helper(self, path, st, status=None, hardlinkable=True):
+    def create_helper(self, path, st, status=None, hardlinkable=True, strip_prefix=None):
+        if strip_prefix is not None:
+            assert not path.endswith(os.sep)
+            if strip_prefix.startswith(path + os.sep):
+                # still on a directory level that shall be stripped - do not create an item for this!
+                yield None, "x", False, None
+                return
+            # adjust path, remove stripped directory levels
+            path = path.removeprefix(strip_prefix)
+
         sanitized_path = remove_dotdot_prefixes(path)
         item = Item(path=sanitized_path)
         hardlinked = hardlinkable and st.st_nlink > 1
@@ -1384,13 +1393,26 @@ class FilesystemObjectProcessors:
             chunks = item.chunks if "chunks" in item else None
             self.hlm.remember(id=(st.st_ino, st.st_dev), info=chunks)
 
-    def process_dir_with_fd(self, *, path, fd, st):
-        with self.create_helper(path, st, "d", hardlinkable=False) as (item, status, hardlinked, hl_chunks):
-            item.update(self.metadata_collector.stat_attrs(st, path, fd=fd))
+    def process_dir_with_fd(self, *, path, fd, st, strip_prefix):
+        with self.create_helper(path, st, "d", hardlinkable=False, strip_prefix=strip_prefix) as (
+            item,
+            status,
+            hardlinked,
+            hl_chunks,
+        ):
+            if item is not None:
+                item.update(self.metadata_collector.stat_attrs(st, path, fd=fd))
             return status
 
-    def process_dir(self, *, path, parent_fd, name, st):
-        with self.create_helper(path, st, "d", hardlinkable=False) as (item, status, hardlinked, hl_chunks):
+    def process_dir(self, *, path, parent_fd, name, st, strip_prefix):
+        with self.create_helper(path, st, "d", hardlinkable=False, strip_prefix=strip_prefix) as (
+            item,
+            status,
+            hardlinked,
+            hl_chunks,
+        ):
+            if item is None:
+                return status
             with OsOpen(path=path, parent_fd=parent_fd, name=name, flags=flags_dir, noatime=True, op="dir_open") as fd:
                 # fd is None for directories on windows, in that case a race condition check is not possible.
                 if fd is not None:
@@ -1399,25 +1421,46 @@ class FilesystemObjectProcessors:
                 item.update(self.metadata_collector.stat_attrs(st, path, fd=fd))
                 return status
 
-    def process_fifo(self, *, path, parent_fd, name, st):
-        with self.create_helper(path, st, "f") as (item, status, hardlinked, hl_chunks):  # fifo
+    def process_fifo(self, *, path, parent_fd, name, st, strip_prefix):
+        with self.create_helper(path, st, "f", strip_prefix=strip_prefix) as (
+            item,
+            status,
+            hardlinked,
+            hl_chunks,
+        ):  # fifo
+            if item is None:
+                return status
             with OsOpen(path=path, parent_fd=parent_fd, name=name, flags=flags_normal, noatime=True) as fd:
                 with backup_io("fstat"):
                     st = stat_update_check(st, os.fstat(fd))
                 item.update(self.metadata_collector.stat_attrs(st, path, fd=fd))
                 return status
 
-    def process_dev(self, *, path, parent_fd, name, st, dev_type):
-        with self.create_helper(path, st, dev_type) as (item, status, hardlinked, hl_chunks):  # char/block device
+    def process_dev(self, *, path, parent_fd, name, st, dev_type, strip_prefix):
+        with self.create_helper(path, st, dev_type, strip_prefix=strip_prefix) as (
+            item,
+            status,
+            hardlinked,
+            hl_chunks,
+        ):  # char/block device
             # looks like we can not work fd-based here without causing issues when trying to open/close the device
+            if item is None:
+                return status
             with backup_io("stat"):
                 st = stat_update_check(st, os_stat(path=path, parent_fd=parent_fd, name=name, follow_symlinks=False))
             item.rdev = st.st_rdev
             item.update(self.metadata_collector.stat_attrs(st, path))
             return status
 
-    def process_symlink(self, *, path, parent_fd, name, st):
-        with self.create_helper(path, st, "s", hardlinkable=True) as (item, status, hardlinked, hl_chunks):
+    def process_symlink(self, *, path, parent_fd, name, st, strip_prefix):
+        with self.create_helper(path, st, "s", hardlinkable=True, strip_prefix=strip_prefix) as (
+            item,
+            status,
+            hardlinked,
+            hl_chunks,
+        ):
+            if item is None:
+                return status
             fname = name if name is not None and parent_fd is not None else path
             with backup_io("readlink"):
                 target = os.readlink(fname, dir_fd=parent_fd)
@@ -1466,8 +1509,15 @@ class FilesystemObjectProcessors:
             self.add_item(item, stats=self.stats)
             return status
 
-    def process_file(self, *, path, parent_fd, name, st, cache, flags=flags_normal, last_try=False):
-        with self.create_helper(path, st, None) as (item, status, hardlinked, hl_chunks):  # no status yet
+    def process_file(self, *, path, parent_fd, name, st, cache, flags=flags_normal, last_try=False, strip_prefix):
+        with self.create_helper(path, st, None, strip_prefix=strip_prefix) as (
+            item,
+            status,
+            hardlinked,
+            hl_chunks,
+        ):  # no status yet
+            if item is None:
+                return status
             with OsOpen(path=path, parent_fd=parent_fd, name=name, flags=flags, noatime=True) as fd:
                 with backup_io("fstat"):
                     st = stat_update_check(st, os.fstat(fd))

+ 64 - 13
src/borg/archiver/create_cmd.py

@@ -20,7 +20,7 @@ from ..helpers import comment_validator, ChunkerParams, PathSpec
 from ..helpers import archivename_validator, FilesCacheMode
 from ..helpers import eval_escapes
 from ..helpers import timestamp, archive_ts_now
-from ..helpers import get_cache_dir, os_stat
+from ..helpers import get_cache_dir, os_stat, get_strip_prefix
 from ..helpers import dir_is_tagged
 from ..helpers import log_multi
 from ..helpers import basic_json_data, json_print
@@ -107,6 +107,7 @@ class CreateMixIn:
                     pipe_bin = sys.stdin.buffer
                 pipe = TextIOWrapper(pipe_bin, errors="surrogateescape")
                 for path in iter_separated(pipe, paths_sep):
+                    strip_prefix = get_strip_prefix(path)
                     path = os.path.normpath(path)
                     try:
                         with backup_io("stat"):
@@ -120,6 +121,7 @@ class CreateMixIn:
                             cache=cache,
                             read_special=args.read_special,
                             dry_run=dry_run,
+                            strip_prefix=strip_prefix,
                         )
                     except BackupError as e:
                         self.print_warning_instance(BackupWarning(path, e))
@@ -157,6 +159,8 @@ class CreateMixIn:
                         if not dry_run and status is not None:
                             fso.stats.files_stats[status] += 1
                         continue
+
+                    strip_prefix = get_strip_prefix(path)
                     path = os.path.normpath(path)
                     try:
                         with backup_io("stat"):
@@ -176,6 +180,7 @@ class CreateMixIn:
                             restrict_dev=restrict_dev,
                             read_special=args.read_special,
                             dry_run=dry_run,
+                            strip_prefix=strip_prefix,
                         )
                         # if we get back here, we've finished recursing into <path>,
                         # we do not ever want to get back in there (even if path is given twice as recursion root)
@@ -274,7 +279,7 @@ class CreateMixIn:
         else:
             create_inner(None, None, None)
 
-    def _process_any(self, *, path, parent_fd, name, st, fso, cache, read_special, dry_run):
+    def _process_any(self, *, path, parent_fd, name, st, fso, cache, read_special, dry_run, strip_prefix):
         """
         Call the right method on the given FilesystemObjectProcessor.
         """
@@ -287,13 +292,21 @@ class CreateMixIn:
             try:
                 if stat.S_ISREG(st.st_mode):
                     return fso.process_file(
-                        path=path, parent_fd=parent_fd, name=name, st=st, cache=cache, last_try=last_try
+                        path=path,
+                        parent_fd=parent_fd,
+                        name=name,
+                        st=st,
+                        cache=cache,
+                        last_try=last_try,
+                        strip_prefix=strip_prefix,
                     )
                 elif stat.S_ISDIR(st.st_mode):
-                    return fso.process_dir(path=path, parent_fd=parent_fd, name=name, st=st)
+                    return fso.process_dir(path=path, parent_fd=parent_fd, name=name, st=st, strip_prefix=strip_prefix)
                 elif stat.S_ISLNK(st.st_mode):
                     if not read_special:
-                        return fso.process_symlink(path=path, parent_fd=parent_fd, name=name, st=st)
+                        return fso.process_symlink(
+                            path=path, parent_fd=parent_fd, name=name, st=st, strip_prefix=strip_prefix
+                        )
                     else:
                         try:
                             st_target = os_stat(path=path, parent_fd=parent_fd, name=name, follow_symlinks=True)
@@ -310,12 +323,17 @@ class CreateMixIn:
                                 cache=cache,
                                 flags=flags_special_follow,
                                 last_try=last_try,
+                                strip_prefix=strip_prefix,
                             )
                         else:
-                            return fso.process_symlink(path=path, parent_fd=parent_fd, name=name, st=st)
+                            return fso.process_symlink(
+                                path=path, parent_fd=parent_fd, name=name, st=st, strip_prefix=strip_prefix
+                            )
                 elif stat.S_ISFIFO(st.st_mode):
                     if not read_special:
-                        return fso.process_fifo(path=path, parent_fd=parent_fd, name=name, st=st)
+                        return fso.process_fifo(
+                            path=path, parent_fd=parent_fd, name=name, st=st, strip_prefix=strip_prefix
+                        )
                     else:
                         return fso.process_file(
                             path=path,
@@ -325,10 +343,13 @@ class CreateMixIn:
                             cache=cache,
                             flags=flags_special,
                             last_try=last_try,
+                            strip_prefix=strip_prefix,
                         )
                 elif stat.S_ISCHR(st.st_mode):
                     if not read_special:
-                        return fso.process_dev(path=path, parent_fd=parent_fd, name=name, st=st, dev_type="c")
+                        return fso.process_dev(
+                            path=path, parent_fd=parent_fd, name=name, st=st, dev_type="c", strip_prefix=strip_prefix
+                        )
                     else:
                         return fso.process_file(
                             path=path,
@@ -338,10 +359,13 @@ class CreateMixIn:
                             cache=cache,
                             flags=flags_special,
                             last_try=last_try,
+                            strip_prefix=strip_prefix,
                         )
                 elif stat.S_ISBLK(st.st_mode):
                     if not read_special:
-                        return fso.process_dev(path=path, parent_fd=parent_fd, name=name, st=st, dev_type="b")
+                        return fso.process_dev(
+                            path=path, parent_fd=parent_fd, name=name, st=st, dev_type="b", strip_prefix=strip_prefix
+                        )
                     else:
                         return fso.process_file(
                             path=path,
@@ -351,6 +375,7 @@ class CreateMixIn:
                             cache=cache,
                             flags=flags_special,
                             last_try=last_try,
+                            strip_prefix=strip_prefix,
                         )
                 elif stat.S_ISSOCK(st.st_mode):
                     # Ignore unix sockets
@@ -401,6 +426,7 @@ class CreateMixIn:
         restrict_dev,
         read_special,
         dry_run,
+        strip_prefix,
     ):
         """
         Process *path* (or, preferably, parent_fd/name) recursively according to the various parameters.
@@ -457,6 +483,7 @@ class CreateMixIn:
                     cache=cache,
                     read_special=read_special,
                     dry_run=dry_run,
+                    strip_prefix=strip_prefix,
                 )
             else:
                 with OsOpen(
@@ -474,7 +501,9 @@ class CreateMixIn:
                             if not recurse_excluded_dir:
                                 if keep_exclude_tags:
                                     if not dry_run:
-                                        fso.process_dir_with_fd(path=path, fd=child_fd, st=st)
+                                        fso.process_dir_with_fd(
+                                            path=path, fd=child_fd, st=st, strip_prefix=strip_prefix
+                                        )
                                     for tag_name in tag_names:
                                         tag_path = os.path.join(path, tag_name)
                                         self._rec_walk(
@@ -491,12 +520,13 @@ class CreateMixIn:
                                             restrict_dev=restrict_dev,
                                             read_special=read_special,
                                             dry_run=dry_run,
+                                            strip_prefix=strip_prefix,
                                         )
                                 self.print_file_status("-", path)  # excluded
                             return
                     if not recurse_excluded_dir:
                         if not dry_run:
-                            status = fso.process_dir_with_fd(path=path, fd=child_fd, st=st)
+                            status = fso.process_dir_with_fd(path=path, fd=child_fd, st=st, strip_prefix=strip_prefix)
                         else:
                             status = "+"  # included (dir)
                     if recurse:
@@ -518,6 +548,7 @@ class CreateMixIn:
                                 restrict_dev=restrict_dev,
                                 read_special=read_special,
                                 dry_run=dry_run,
+                                strip_prefix=strip_prefix,
                             )
 
         except BackupError as e:
@@ -541,6 +572,11 @@ class CreateMixIn:
         that means if relative paths are desired, the command has to be run from the correct
         directory.
 
+        The slashdot hack in paths (recursion roots) is triggered by using ``/./``:
+        ``/this/gets/stripped/./this/gets/archived`` means to process that fs object, but
+        strip the prefix on the left side of ``./`` from the archived items (in this case,
+        ``this/gets/archived`` will be the path in the archived item).
+
         When giving '-' as path, borg will read data from standard input and create a
         file 'stdin' in the created archive from that data. In some cases it's more
         appropriate to use --content-from-command, however. See section *Reading from
@@ -680,8 +716,8 @@ class CreateMixIn:
         - 'i' = backup data was read from standard input (stdin)
         - '?' = missing status code (if you see this, please file a bug report!)
 
-        Reading from stdin
-        ++++++++++++++++++
+        Reading backup data from stdin
+        ++++++++++++++++++++++++++++++
 
         There are two methods to read from stdin. Either specify ``-`` as path and
         pipe directly to borg::
@@ -712,6 +748,21 @@ class CreateMixIn:
 
         By default, the content read from stdin is stored in a file called 'stdin'.
         Use ``--stdin-name`` to change the name.
+
+        Feeding all file paths from externally
+        ++++++++++++++++++++++++++++++++++++++
+
+        Usually, you give a starting path (recursion root) to borg and then borg
+        automatically recurses, finds and backs up all fs objects contained in
+        there (optionally considering include/exclude rules).
+
+        If you need more control and you want to give every single fs object path
+        to borg (maybe implementing your own recursion or your own rules), you can use
+        ``--paths-from-stdin`` or ``--paths-from-command`` (with the latter, borg will
+        fail to create an archive should the command fail).
+
+        Borg supports paths with the slashdot hack to strip path prefixes here also.
+        So, be careful not to unintentionally trigger that.
         """
         )
 

+ 1 - 1
src/borg/helpers/__init__.py

@@ -20,7 +20,7 @@ from .errors import BackupPermissionError, BackupIOError, BackupFileNotFoundErro
 from .fs import ensure_dir, join_base_dir, get_socket_filename
 from .fs import get_security_dir, get_keys_dir, get_base_dir, get_cache_dir, get_config_dir, get_runtime_dir
 from .fs import dir_is_tagged, dir_is_cachedir, remove_dotdot_prefixes, make_path_safe, scandir_inorder
-from .fs import secure_erase, safe_unlink, dash_open, os_open, os_stat, umount
+from .fs import secure_erase, safe_unlink, dash_open, os_open, os_stat, get_strip_prefix, umount
 from .fs import O_, flags_dir, flags_special_follow, flags_special, flags_base, flags_normal, flags_noatime
 from .fs import HardLinkManager
 from .misc import sysinfo, log_multi, consume

+ 15 - 0
src/borg/helpers/fs.py

@@ -233,6 +233,21 @@ def make_path_safe(path):
     return path
 
 
+def get_strip_prefix(path):
+    # similar to how rsync does it, we allow users to give paths like:
+    # /this/gets/stripped/./this/is/kept
+    # the whole path is what is used to read from the fs,
+    # the strip_prefix will be /this/gets/stripped/ and
+    # this/is/kept is the path being archived.
+    pos = path.find("/./")  # detect slashdot hack
+    if pos > 0:
+        # found a prefix to strip! make sure it ends with one "/"!
+        return os.path.normpath(path[:pos]) + os.sep
+    else:
+        # no or empty prefix, nothing to strip!
+        return None
+
+
 _dotdot_re = re.compile(r"^(\.\./)+")
 
 

+ 26 - 0
src/borg/testsuite/archiver/create_cmd.py

@@ -908,6 +908,32 @@ def test_create_read_special_broken_symlink(archivers, request):
     assert "input/link -> somewhere does not exist" in output
 
 
+def test_create_dotslash_hack(archivers, request):
+    archiver = request.getfixturevalue(archivers)
+    os.makedirs(os.path.join(archiver.input_path, "first", "secondA", "thirdA"))
+    os.makedirs(os.path.join(archiver.input_path, "first", "secondB", "thirdB"))
+    cmd(archiver, "rcreate", RK_ENCRYPTION)
+    cmd(archiver, "create", "test", "input/first/./")  # hack!
+    output = cmd(archiver, "list", "test")
+    # dir levels left of slashdot (= input, first) not in archive:
+    assert "input" not in output
+    assert "input/first" not in output
+    assert "input/first/secondA" not in output
+    assert "input/first/secondA/thirdA" not in output
+    assert "input/first/secondB" not in output
+    assert "input/first/secondB/thirdB" not in output
+    assert "first" not in output
+    assert "first/secondA" not in output
+    assert "first/secondA/thirdA" not in output
+    assert "first/secondB" not in output
+    assert "first/secondB/thirdB" not in output
+    # dir levels right of slashdot are in archive:
+    assert "secondA" in output
+    assert "secondA/thirdA" in output
+    assert "secondB" in output
+    assert "secondB/thirdB" in output
+
+
 def test_log_json(archivers, request):
     archiver = request.getfixturevalue(archivers)
     create_test_files(archiver.input_path)