Browse Source

Merge pull request #1242 from ThomasWaldmann/fix-1214

fixes for --read-special mode
TW 9 years ago
parent
commit
458edf351b
3 changed files with 61 additions and 34 deletions
  1. 22 3
      borg/archive.py
  2. 21 15
      borg/archiver.py
  3. 18 16
      docs/usage.rst

+ 22 - 3
borg/archive.py

@@ -46,6 +46,11 @@ flags_normal = os.O_RDONLY | getattr(os, 'O_BINARY', 0)
 flags_noatime = flags_normal | getattr(os, 'O_NOATIME', 0)
 flags_noatime = flags_normal | getattr(os, 'O_NOATIME', 0)
 
 
 
 
+def is_special(mode):
+    # file types that get special treatment in --read-special mode
+    return stat.S_ISBLK(mode) or stat.S_ISCHR(mode) or stat.S_ISFIFO(mode)
+
+
 class BackupOSError(Exception):
 class BackupOSError(Exception):
     """
     """
     Wrapper for OSError raised while accessing backup files.
     Wrapper for OSError raised while accessing backup files.
@@ -589,9 +594,16 @@ Number of files: {0.stats.nfiles}'''.format(
                 return status
                 return status
             else:
             else:
                 self.hard_links[st.st_ino, st.st_dev] = safe_path
                 self.hard_links[st.st_ino, st.st_dev] = safe_path
-        path_hash = self.key.id_hash(os.path.join(self.cwd, path).encode('utf-8', 'surrogateescape'))
+        is_special_file = is_special(st.st_mode)
+        if not is_special_file:
+            path_hash = self.key.id_hash(os.path.join(self.cwd, path).encode('utf-8', 'surrogateescape'))
+            ids = cache.file_known_and_unchanged(path_hash, st, ignore_inode)
+        else:
+            # in --read-special mode, we may be called for special files.
+            # there should be no information in the cache about special files processed in
+            # read-special mode, but we better play safe as this was wrong in the past:
+            path_hash = ids = None
         first_run = not cache.files
         first_run = not cache.files
-        ids = cache.file_known_and_unchanged(path_hash, st, ignore_inode)
         if first_run:
         if first_run:
             logger.debug('Processing files ...')
             logger.debug('Processing files ...')
         chunks = None
         chunks = None
@@ -616,10 +628,17 @@ Number of files: {0.stats.nfiles}'''.format(
                     chunks.append(cache.add_chunk(self.key.id_hash(chunk), chunk, self.stats))
                     chunks.append(cache.add_chunk(self.key.id_hash(chunk), chunk, self.stats))
                     if self.show_progress:
                     if self.show_progress:
                         self.stats.show_progress(item=item, dt=0.2)
                         self.stats.show_progress(item=item, dt=0.2)
-            cache.memorize_file(path_hash, st, [c[0] for c in chunks])
+            if not is_special_file:
+                # we must not memorize special files, because the contents of e.g. a
+                # block or char device will change without its mtime/size/inode changing.
+                cache.memorize_file(path_hash, st, [c[0] for c in chunks])
             status = status or 'M'  # regular file, modified (if not 'A' already)
             status = status or 'M'  # regular file, modified (if not 'A' already)
         item[b'chunks'] = chunks
         item[b'chunks'] = chunks
         item.update(self.stat_attrs(st, path))
         item.update(self.stat_attrs(st, path))
+        if is_special_file:
+            # we processed a special file like a regular file. reflect that in mode,
+            # so it can be extracted / accessed in FUSE mount like a regular file:
+            item[b'mode'] = stat.S_IFREG | stat.S_IMODE(item[b'mode'])
         self.stats.nfiles += 1
         self.stats.nfiles += 1
         self.add_item(item)
         self.add_item(item)
         return status
         return status

+ 21 - 15
borg/archiver.py

@@ -29,7 +29,7 @@ from .upgrader import AtticRepositoryUpgrader, BorgRepositoryUpgrader
 from .repository import Repository
 from .repository import Repository
 from .cache import Cache
 from .cache import Cache
 from .key import key_creator, RepoKey, PassphraseKey
 from .key import key_creator, RepoKey, PassphraseKey
-from .archive import backup_io, BackupOSError, Archive, ArchiveChecker, CHUNKER_PARAMS
+from .archive import backup_io, BackupOSError, Archive, ArchiveChecker, CHUNKER_PARAMS, is_special
 from .remote import RepositoryServer, RemoteRepository, cache_if_remote
 from .remote import RepositoryServer, RemoteRepository, cache_if_remote
 
 
 has_lchflags = hasattr(os, 'lchflags')
 has_lchflags = hasattr(os, 'lchflags')
@@ -256,15 +256,7 @@ class Archiver:
             return
             return
 
 
         try:
         try:
-            # usually, do not follow symlinks (if we have a symlink, we want to
-            # backup it as such).
-            # but if we are in --read-special mode, we later process <path> as
-            # a regular file (we open and read the symlink target file's content).
-            # thus, in read_special mode, we also want to stat the symlink target
-            # file, for consistency. if we did not, we also have issues extracting
-            # this file, as it would be in the archive as a symlink, not as the
-            # target's file type (which could be e.g. a block device).
-            st = os.stat(path, follow_symlinks=read_special)
+            st = os.lstat(path)
         except OSError as e:
         except OSError as e:
             self.print_warning('%s: %s', path, e)
             self.print_warning('%s: %s', path, e)
             return
             return
@@ -277,7 +269,7 @@ class Archiver:
         # Ignore if nodump flag is set
         # Ignore if nodump flag is set
         if has_lchflags and (st.st_flags & stat.UF_NODUMP):
         if has_lchflags and (st.st_flags & stat.UF_NODUMP):
             return
             return
-        if stat.S_ISREG(st.st_mode) or read_special and not stat.S_ISDIR(st.st_mode):
+        if stat.S_ISREG(st.st_mode):
             if not dry_run:
             if not dry_run:
                 try:
                 try:
                     status = archive.process_file(path, st, cache, self.ignore_inode)
                     status = archive.process_file(path, st, cache, self.ignore_inode)
@@ -309,13 +301,26 @@ class Archiver:
                                   read_special=read_special, dry_run=dry_run)
                                   read_special=read_special, dry_run=dry_run)
         elif stat.S_ISLNK(st.st_mode):
         elif stat.S_ISLNK(st.st_mode):
             if not dry_run:
             if not dry_run:
-                status = archive.process_symlink(path, st)
+                if not read_special:
+                    status = archive.process_symlink(path, st)
+                else:
+                    st_target = os.stat(path)
+                    if is_special(st_target.st_mode):
+                        status = archive.process_file(path, st_target, cache)
+                    else:
+                        status = archive.process_symlink(path, st)
         elif stat.S_ISFIFO(st.st_mode):
         elif stat.S_ISFIFO(st.st_mode):
             if not dry_run:
             if not dry_run:
-                status = archive.process_fifo(path, st)
+                if not read_special:
+                    status = archive.process_fifo(path, st)
+                else:
+                    status = archive.process_file(path, st, cache)
         elif stat.S_ISCHR(st.st_mode) or stat.S_ISBLK(st.st_mode):
         elif stat.S_ISCHR(st.st_mode) or stat.S_ISBLK(st.st_mode):
             if not dry_run:
             if not dry_run:
-                status = archive.process_dev(path, st)
+                if not read_special:
+                    status = archive.process_dev(path, st)
+                else:
+                    status = archive.process_file(path, st, cache)
         elif stat.S_ISSOCK(st.st_mode):
         elif stat.S_ISSOCK(st.st_mode):
             # Ignore unix sockets
             # Ignore unix sockets
             return
             return
@@ -1134,7 +1139,8 @@ class Archiver:
                                     'lzma,0 .. lzma,9 == lzma (with level 0..9).')
                                     'lzma,0 .. lzma,9 == lzma (with level 0..9).')
         subparser.add_argument('--read-special', dest='read_special',
         subparser.add_argument('--read-special', dest='read_special',
                                action='store_true', default=False,
                                action='store_true', default=False,
-                               help='open and read special files as if they were regular files')
+                               help='open and read block and char device files as well as FIFOs as if they were '
+                                    'regular files. Also follows symlinks pointing to these kinds of files.')
         subparser.add_argument('-n', '--dry-run', dest='dry_run',
         subparser.add_argument('-n', '--dry-run', dest='dry_run',
                                action='store_true', default=False,
                                action='store_true', default=False,
                                help='do not create a backup archive')
                                help='do not create a backup archive')

+ 18 - 16
docs/usage.rst

@@ -651,32 +651,34 @@ For more details, see :ref:`chunker_details`.
 --read-special
 --read-special
 ~~~~~~~~~~~~~~
 ~~~~~~~~~~~~~~
 
 
-The option ``--read-special`` is not intended for normal, filesystem-level (full or
-partly-recursive) backups. You only give this option if you want to do something
-rather ... special -- and if you have hand-picked some files that you want to treat
-that way.
+The --read-special option is special - you do not want to use it for normal
+full-filesystem backups, but rather after carefully picking some targets for it.
 
 
-``borg create --read-special`` will open all files without doing any special
-treatment according to the file type (the only exception here are directories:
-they will be recursed into). Just imagine what happens if you do ``cat
-filename`` --- the content you will see there is what borg will backup for that
-filename.
+The option ``--read-special`` triggers special treatment for block and char
+device files as well as FIFOs. Instead of storing them as such a device (or
+FIFO), they will get opened, their content will be read and in the backup
+archive they will show up like a regular file.
 
 
-So, for example, symlinks will be followed, block device content will be read,
-named pipes / UNIX domain sockets will be read.
+Symlinks will also get special treatment if (and only if) they point to such
+a special file: instead of storing them as a symlink, the target special file
+will get processed as described above.
 
 
-You need to be careful with what you give as filename when using ``--read-special``,
-e.g. if you give ``/dev/zero``, your backup will never terminate.
+One intended use case of this is backing up the contents of one or multiple
+block devices, like e.g. LVM snapshots or inactive LVs or disk partitions.
 
 
-The given files' metadata is saved as it would be saved without
-``--read-special`` (e.g. its name, its size [might be 0], its mode, etc.) -- but
-additionally, also the content read from it will be saved for it.
+You need to be careful about what you include when using ``--read-special``,
+e.g. if you include ``/dev/zero``, your backup will never terminate.
 
 
 Restoring such files' content is currently only supported one at a time via
 Restoring such files' content is currently only supported one at a time via
 ``--stdout`` option (and you have to redirect stdout to where ever it shall go,
 ``--stdout`` option (and you have to redirect stdout to where ever it shall go,
 maybe directly into an existing device file of your choice or indirectly via
 maybe directly into an existing device file of your choice or indirectly via
 ``dd``).
 ``dd``).
 
 
+To some extent, mounting a backup archive with the backups of special files
+via ``borg mount`` and then loop-mounting the image files from inside the mount
+point will work. If you plan to access a lot of data in there, it likely will
+scale and perform better if you do not work via the FUSE mount.
+
 Example
 Example
 +++++++
 +++++++