소스 검색

Merge pull request #1242 from ThomasWaldmann/fix-1214

fixes for --read-special mode
TW 9 년 전
부모
커밋
458edf351b
3개의 변경된 파일61개의 추가작업 그리고 34개의 파일을 삭제
  1. 22 3
      borg/archive.py
  2. 21 15
      borg/archiver.py
  3. 18 16
      docs/usage.rst

+ 22 - 3
borg/archive.py

@@ -46,6 +46,11 @@ flags_normal = os.O_RDONLY | getattr(os, 'O_BINARY', 0)
 flags_noatime = flags_normal | getattr(os, 'O_NOATIME', 0)
 
 
+def is_special(mode):
+    # file types that get special treatment in --read-special mode
+    return stat.S_ISBLK(mode) or stat.S_ISCHR(mode) or stat.S_ISFIFO(mode)
+
+
 class BackupOSError(Exception):
     """
     Wrapper for OSError raised while accessing backup files.
@@ -589,9 +594,16 @@ Number of files: {0.stats.nfiles}'''.format(
                 return status
             else:
                 self.hard_links[st.st_ino, st.st_dev] = safe_path
-        path_hash = self.key.id_hash(os.path.join(self.cwd, path).encode('utf-8', 'surrogateescape'))
+        is_special_file = is_special(st.st_mode)
+        if not is_special_file:
+            path_hash = self.key.id_hash(os.path.join(self.cwd, path).encode('utf-8', 'surrogateescape'))
+            ids = cache.file_known_and_unchanged(path_hash, st, ignore_inode)
+        else:
+            # in --read-special mode, we may be called for special files.
+            # there should be no information in the cache about special files processed in
+            # read-special mode, but we better play safe as this was wrong in the past:
+            path_hash = ids = None
         first_run = not cache.files
-        ids = cache.file_known_and_unchanged(path_hash, st, ignore_inode)
         if first_run:
             logger.debug('Processing files ...')
         chunks = None
@@ -616,10 +628,17 @@ Number of files: {0.stats.nfiles}'''.format(
                     chunks.append(cache.add_chunk(self.key.id_hash(chunk), chunk, self.stats))
                     if self.show_progress:
                         self.stats.show_progress(item=item, dt=0.2)
-            cache.memorize_file(path_hash, st, [c[0] for c in chunks])
+            if not is_special_file:
+                # we must not memorize special files, because the contents of e.g. a
+                # block or char device will change without its mtime/size/inode changing.
+                cache.memorize_file(path_hash, st, [c[0] for c in chunks])
             status = status or 'M'  # regular file, modified (if not 'A' already)
         item[b'chunks'] = chunks
         item.update(self.stat_attrs(st, path))
+        if is_special_file:
+            # we processed a special file like a regular file. reflect that in mode,
+            # so it can be extracted / accessed in FUSE mount like a regular file:
+            item[b'mode'] = stat.S_IFREG | stat.S_IMODE(item[b'mode'])
         self.stats.nfiles += 1
         self.add_item(item)
         return status

+ 21 - 15
borg/archiver.py

@@ -29,7 +29,7 @@ from .upgrader import AtticRepositoryUpgrader, BorgRepositoryUpgrader
 from .repository import Repository
 from .cache import Cache
 from .key import key_creator, RepoKey, PassphraseKey
-from .archive import backup_io, BackupOSError, Archive, ArchiveChecker, CHUNKER_PARAMS
+from .archive import backup_io, BackupOSError, Archive, ArchiveChecker, CHUNKER_PARAMS, is_special
 from .remote import RepositoryServer, RemoteRepository, cache_if_remote
 
 has_lchflags = hasattr(os, 'lchflags')
@@ -256,15 +256,7 @@ class Archiver:
             return
 
         try:
-            # usually, do not follow symlinks (if we have a symlink, we want to
-            # backup it as such).
-            # but if we are in --read-special mode, we later process <path> as
-            # a regular file (we open and read the symlink target file's content).
-            # thus, in read_special mode, we also want to stat the symlink target
-            # file, for consistency. if we did not, we also have issues extracting
-            # this file, as it would be in the archive as a symlink, not as the
-            # target's file type (which could be e.g. a block device).
-            st = os.stat(path, follow_symlinks=read_special)
+            st = os.lstat(path)
         except OSError as e:
             self.print_warning('%s: %s', path, e)
             return
@@ -277,7 +269,7 @@ class Archiver:
         # Ignore if nodump flag is set
         if has_lchflags and (st.st_flags & stat.UF_NODUMP):
             return
-        if stat.S_ISREG(st.st_mode) or read_special and not stat.S_ISDIR(st.st_mode):
+        if stat.S_ISREG(st.st_mode):
             if not dry_run:
                 try:
                     status = archive.process_file(path, st, cache, self.ignore_inode)
@@ -309,13 +301,26 @@ class Archiver:
                                   read_special=read_special, dry_run=dry_run)
         elif stat.S_ISLNK(st.st_mode):
             if not dry_run:
-                status = archive.process_symlink(path, st)
+                if not read_special:
+                    status = archive.process_symlink(path, st)
+                else:
+                    st_target = os.stat(path)
+                    if is_special(st_target.st_mode):
+                        status = archive.process_file(path, st_target, cache)
+                    else:
+                        status = archive.process_symlink(path, st)
         elif stat.S_ISFIFO(st.st_mode):
             if not dry_run:
-                status = archive.process_fifo(path, st)
+                if not read_special:
+                    status = archive.process_fifo(path, st)
+                else:
+                    status = archive.process_file(path, st, cache)
         elif stat.S_ISCHR(st.st_mode) or stat.S_ISBLK(st.st_mode):
             if not dry_run:
-                status = archive.process_dev(path, st)
+                if not read_special:
+                    status = archive.process_dev(path, st)
+                else:
+                    status = archive.process_file(path, st, cache)
         elif stat.S_ISSOCK(st.st_mode):
             # Ignore unix sockets
             return
@@ -1134,7 +1139,8 @@ class Archiver:
                                     'lzma,0 .. lzma,9 == lzma (with level 0..9).')
         subparser.add_argument('--read-special', dest='read_special',
                                action='store_true', default=False,
-                               help='open and read special files as if they were regular files')
+                               help='open and read block and char device files as well as FIFOs as if they were '
+                                    'regular files. Also follows symlinks pointing to these kinds of files.')
         subparser.add_argument('-n', '--dry-run', dest='dry_run',
                                action='store_true', default=False,
                                help='do not create a backup archive')

+ 18 - 16
docs/usage.rst

@@ -651,32 +651,34 @@ For more details, see :ref:`chunker_details`.
 --read-special
 ~~~~~~~~~~~~~~
 
-The option ``--read-special`` is not intended for normal, filesystem-level (full or
-partly-recursive) backups. You only give this option if you want to do something
-rather ... special -- and if you have hand-picked some files that you want to treat
-that way.
+The --read-special option is special - you do not want to use it for normal
+full-filesystem backups, but rather after carefully picking some targets for it.
 
-``borg create --read-special`` will open all files without doing any special
-treatment according to the file type (the only exception here are directories:
-they will be recursed into). Just imagine what happens if you do ``cat
-filename`` --- the content you will see there is what borg will backup for that
-filename.
+The option ``--read-special`` triggers special treatment for block and char
+device files as well as FIFOs. Instead of storing them as such a device (or
+FIFO), they will get opened, their content will be read and in the backup
+archive they will show up like a regular file.
 
-So, for example, symlinks will be followed, block device content will be read,
-named pipes / UNIX domain sockets will be read.
+Symlinks will also get special treatment if (and only if) they point to such
+a special file: instead of storing them as a symlink, the target special file
+will get processed as described above.
 
-You need to be careful with what you give as filename when using ``--read-special``,
-e.g. if you give ``/dev/zero``, your backup will never terminate.
+One intended use case of this is backing up the contents of one or multiple
+block devices, like e.g. LVM snapshots or inactive LVs or disk partitions.
 
-The given files' metadata is saved as it would be saved without
-``--read-special`` (e.g. its name, its size [might be 0], its mode, etc.) -- but
-additionally, also the content read from it will be saved for it.
+You need to be careful about what you include when using ``--read-special``,
+e.g. if you include ``/dev/zero``, your backup will never terminate.
 
 Restoring such files' content is currently only supported one at a time via
 ``--stdout`` option (and you have to redirect stdout to where ever it shall go,
 maybe directly into an existing device file of your choice or indirectly via
 ``dd``).
 
+To some extent, mounting a backup archive with the backups of special files
+via ``borg mount`` and then loop-mounting the image files from inside the mount
+point will work. If you plan to access a lot of data in there, it likely will
+scale and perform better if you do not work via the FUSE mount.
+
 Example
 +++++++