Parcourir la source

Merge branch '1.0-maint'

# Conflicts:
#	docs/changes.rst
#	docs/usage/mount.rst.inc
#	src/borg/archive.py
#	src/borg/archiver.py
#	src/borg/fuse.py
#	src/borg/testsuite/archiver.py
Thomas Waldmann il y a 9 ans
Parent
commit
3baa8a3728

+ 13 - 25
docs/api.rst

@@ -6,63 +6,55 @@ API Documentation
     :members:
     :undoc-members:
 
-.. automodule:: borg.upgrader
-    :members:
-    :undoc-members:
-
 .. automodule:: borg.archive
     :members:
     :undoc-members:
 
-.. automodule:: borg.fuse
-    :members:
-    :undoc-members:
-
-.. automodule:: borg.platform
+.. automodule:: borg.repository
     :members:
     :undoc-members:
 
-.. automodule:: borg.locking
+.. automodule:: borg.remote
     :members:
     :undoc-members:
 
-.. automodule:: borg.shellpattern
+.. automodule:: borg.cache
     :members:
     :undoc-members:
 
-.. automodule:: borg.repository
+.. automodule:: borg.key
     :members:
     :undoc-members:
 
-.. automodule:: borg.lrucache
+.. automodule:: borg.logger
     :members:
     :undoc-members:
 
-.. automodule:: borg.remote
+.. automodule:: borg.helpers
     :members:
     :undoc-members:
 
-.. automodule:: borg.xattr
+.. automodule:: borg.locking
     :members:
     :undoc-members:
 
-.. automodule:: borg.helpers
+.. automodule:: borg.shellpattern
     :members:
     :undoc-members:
 
-.. automodule:: borg.cache
+.. automodule:: borg.lrucache
     :members:
     :undoc-members:
 
-.. automodule:: borg.key
+.. automodule:: borg.fuse
     :members:
     :undoc-members:
 
-.. automodule:: borg.logger
+.. automodule:: borg.xattr
     :members:
     :undoc-members:
 
-.. automodule:: borg.platform_darwin
+.. automodule:: borg.platform
     :members:
     :undoc-members:
 
@@ -75,7 +67,7 @@ API Documentation
     :undoc-members:
 
 .. automodule:: borg.compress
-    :members:
+    :members: get_compressor, Compressor, CompressorBase
     :undoc-members:
 
 .. automodule:: borg.chunker
@@ -85,7 +77,3 @@ API Documentation
 .. automodule:: borg.crypto
     :members:
     :undoc-members:
-
-.. automodule:: borg.platform_freebsd
-    :members:
-    :undoc-members:

+ 79 - 34
docs/changes.rst

@@ -123,6 +123,51 @@ Other changes:
   - ChunkBuffer: add test for leaving partial chunk in buffer, fixes #945
 
 
+Version 1.0.6rc1 (2016-07-10)
+-----------------------------
+
+New features:
+
+- borg check --repair: heal damaged files if missing chunks re-appear (e.g. if
+  the previously missing chunk was added again in a later backup archive),
+  #148. (*) Also improved logging.
+
+Bug fixes:
+
+- sync_dir: silence fsync() failing with EINVAL, #1287
+  Some network filesystems (like smbfs) don't support this and we use this in
+  repository code.
+- borg mount (FUSE):
+
+  - fix directories being shadowed when contained paths were also specified,
+    #1295
+  - raise I/O Error (EIO) on damaged files (unless -o allow_damaged_files is
+    used), #1302. (*)
+- borg extract: warn if a damaged file is extracted, #1299. (*)
+- Added some missing return code checks (ChunkIndex._add, hashindex_resize).
+- borg check: fix/optimize initial hash table size, avoids resize of the table.
+
+Other changes:
+
+- tests:
+
+  - add more FUSE tests, #1284
+  - deduplicate fuse (u)mount code
+  - fix borg binary test issues, #862
+- docs:
+
+  - changelog: added release dates to older borg releases
+  - fix some sphinx (docs generator) warnings, #881
+
+Notes:
+
+(*) Some features depend on information (chunks_healthy list) added to item
+metadata when a file with missing chunks was "repaired" using all-zero
+replacement chunks. The chunks_healthy list is generated since borg 1.0.4,
+thus borg can't recognize such "repaired" (but content-damaged) files if the
+repair was done with an older borg version.
+
+
 Version 1.0.5 (2016-07-07)
 --------------------------
 
@@ -247,8 +292,8 @@ Other changes:
 - add .eggs to .gitignore
 
 
-Version 1.0.3
--------------
+Version 1.0.3 (2016-05-20)
+--------------------------
 
 Bug fixes:
 
@@ -277,8 +322,8 @@ Other changes:
   - borg create help: document format tags, #894
 
 
-Version 1.0.2
--------------
+Version 1.0.2 (2016-04-16)
+--------------------------
 
 Bug fixes:
 
@@ -313,8 +358,8 @@ Other changes:
   - fix confusing usage of "repo" as archive name (use "arch")
 
 
-Version 1.0.1
--------------
+Version 1.0.1 (2016-04-08)
+--------------------------
 
 New features:
 
@@ -365,8 +410,8 @@ Other changes:
   - Document logo font. Recreate logo png. Remove GIMP logo file.
 
 
-Version 1.0.0
--------------
+Version 1.0.0 (2016-03-05)
+--------------------------
 
 The major release number change (0.x -> 1.x) indicates bigger incompatible
 changes, please read the compatibility notes, adapt / test your scripts and
@@ -449,8 +494,8 @@ Other changes:
   - FAQ: how to limit bandwidth
 
 
-Version 1.0.0rc2
-----------------
+Version 1.0.0rc2 (2016-02-28)
+-----------------------------
 
 New features:
 
@@ -491,8 +536,8 @@ Other changes:
   - "connection closed by remote": add FAQ entry and point to issue #636
 
 
-Version 1.0.0rc1
-----------------
+Version 1.0.0rc1 (2016-02-07)
+-----------------------------
 
 New features:
 
@@ -541,8 +586,8 @@ Other changes:
   - misc. updates and fixes
 
 
-Version 0.30.0
---------------
+Version 0.30.0 (2016-01-23)
+---------------------------
 
 Compatibility notes:
 
@@ -619,8 +664,8 @@ Other changes:
   - add gcc gcc-c++ to redhat/fedora/corora install docs, fixes #583
 
 
-Version 0.29.0
---------------
+Version 0.29.0 (2015-12-13)
+---------------------------
 
 Compatibility notes:
 
@@ -695,8 +740,8 @@ Other changes:
   - fix wrong installation instructions for archlinux
 
 
-Version 0.28.2
---------------
+Version 0.28.2 (2015-11-15)
+---------------------------
 
 New features:
 
@@ -719,8 +764,8 @@ Other changes:
   - minor install docs improvements
 
 
-Version 0.28.1
---------------
+Version 0.28.1 (2015-11-08)
+---------------------------
 
 Bug fixes:
 
@@ -734,8 +779,8 @@ Other changes:
 - fix build on readthedocs
 
 
-Version 0.28.0
---------------
+Version 0.28.0 (2015-11-08)
+---------------------------
 
 Compatibility notes:
 
@@ -832,8 +877,8 @@ Other changes:
   - minor development docs update
 
 
-Version 0.27.0
---------------
+Version 0.27.0 (2015-10-07)
+---------------------------
 
 New features:
 
@@ -867,8 +912,8 @@ Other changes:
   - hint to single-file pyinstaller binaries from README
 
 
-Version 0.26.1
---------------
+Version 0.26.1 (2015-09-28)
+---------------------------
 
 This is a minor update, just docs and new pyinstaller binaries.
 
@@ -880,8 +925,8 @@ This is a minor update, just docs and new pyinstaller binaries.
 Note: if you did a python-based installation, there is no need to upgrade.
 
 
-Version 0.26.0
---------------
+Version 0.26.0 (2015-09-19)
+---------------------------
 
 New features:
 
@@ -941,8 +986,8 @@ Other changes:
   - Darwin (OS X Yosemite)
 
 
-Version 0.25.0
---------------
+Version 0.25.0 (2015-08-29)
+---------------------------
 
 Compatibility notes:
 
@@ -1008,8 +1053,8 @@ Other changes:
   - split install docs into system-specific preparations and generic instructions
 
 
-Version 0.24.0
---------------
+Version 0.24.0 (2015-08-09)
+---------------------------
 
 Incompatible changes (compared to 0.23):
 
@@ -1112,8 +1157,8 @@ Other changes:
 - some easy micro optimizations
 
 
-Version 0.23.0
---------------
+Version 0.23.0 (2015-06-11)
+---------------------------
 
 Incompatible changes (compared to attic, fork related):
 

+ 2 - 0
docs/conf.py

@@ -55,6 +55,8 @@ version = sw_version.split('-')[0]
 # The full version, including alpha/beta/rc tags.
 release = version
 
+suppress_warnings = ['image.nonlocal_uri']
+
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
 #language = None

+ 6 - 12
docs/resources.rst

@@ -17,21 +17,15 @@ Some of them refer to attic, but you can do the same stuff (and more) with borgb
 
 - `TW's slides for borgbackup talks / lightning talks <https://slides.com/thomaswaldmann>`_ (just grab the latest ones)
 
-- "Attic / Borg Backup" talk from GPN 2015 (video, german audio, english slides):
-  `media.ccc.de <https://media.ccc.de/browse/conferences/gpn/gpn15/gpn15-6942-attic_borg_backup.html#video>`_
-  or
-  `youtube <https://www.youtube.com/watch?v=Nb5nXEKSN-k>`_
+- `Attic / Borg Backup talk from GPN 2015 (media.ccc.de) <https://media.ccc.de/browse/conferences/gpn/gpn15/gpn15-6942-attic_borg_backup.html#video>`_
+- `Attic / Borg Backup talk from GPN 2015 (youtube) <https://www.youtube.com/watch?v=Nb5nXEKSN-k>`_
 
-- "Attic" talk from Easterhegg 2015 (video, german audio, english slides):
-  `media.ccc.de <https://media.ccc.de/v/eh15_-_49_-__-_saal_-_201504042130_-_attic_-_the_holy_grail_of_backups_-_thomas#video>`_
-  or
-  `youtube <https://www.youtube.com/watch?v=96VEAAFDtJw>`_
+- `Attic talk from Easterhegg 2015 (media.ccc.de) <https://media.ccc.de/v/eh15_-_49_-__-_saal_-_201504042130_-_attic_-_the_holy_grail_of_backups_-_thomas#video>`_
+- `Attic talk from Easterhegg 2015 (youtube) <https://www.youtube.com/watch?v=96VEAAFDtJw>`_
 
-- "Attic Backup: Mount your encrypted backups over ssh", 2014 (video, english):
-  `youtube <https://www.youtube.com/watch?v=BVXDFv9YMp8>`_
+- `Attic Backup: Mount your encrypted backups over ssh (youtube) <https://www.youtube.com/watch?v=BVXDFv9YMp8>`_
 
-- "Evolution of Borg", Oct 2015 (gource visualization of attic and borg development):
-  `youtube <https://www.youtube.com/watch?v=K4k_4wDkG6Q>`_
+- `Evolution of Borg (youtube) <https://www.youtube.com/watch?v=K4k_4wDkG6Q>`_
 
 Software
 --------

+ 6 - 3
docs/usage/check.rst.inc

@@ -59,9 +59,12 @@ Second, the consistency and correctness of the archive metadata is verified:
 - Check if archive metadata chunk is present. if not, remove archive from
   manifest.
 - For all files (items) in the archive, for all chunks referenced by these
-  files, check if chunk is present (if not and we are in repair mode, replace
-  it with a same-size chunk of zeros). This requires reading of archive and
-  file metadata, but not data.
+  files, check if chunk is present.
+  If a chunk is not present and we are in repair mode, replace it with a same-size
+  replacement chunk of zeros.
+  If a previously lost chunk reappears (e.g. via a later backup) and we are in
+  repair mode, the all-zero replacement chunk will be replaced by the correct chunk.
+  This requires reading of archive and file metadata, but not data.
 - If we are in repair mode and we checked all the archives: delete orphaned
   chunks from the repo.
 - if you use a remote repo server via ssh:, the archive check is executed on

+ 39 - 39
docs/usage/help.rst.inc

@@ -1,48 +1,9 @@
 .. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit!
 
-.. _borg_placeholders:
-
-borg help placeholders
-~~~~~~~~~~~~~~~~~~~~~~
-::
-
-
-Repository (or Archive) URLs and --prefix values support these placeholders:
-
-{hostname}
-
-    The (short) hostname of the machine.
-
-{fqdn}
-
-    The full name of the machine.
-
-{now}
-
-    The current local date and time.
-
-{utcnow}
-
-    The current UTC date and time.
-
-{user}
-
-    The user name (or UID, if no name is available) of the user running borg.
-
-{pid}
-
-    The current process ID.
-
-Examples::
-
-    borg create /path/to/repo::{hostname}-{user}-{utcnow} ...
-    borg create /path/to/repo::{hostname}-{now:%Y-%m-%d_%H:%M:%S} ...
-    borg prune --prefix '{hostname}-' ...
 .. _borg_patterns:
 
 borg help patterns
 ~~~~~~~~~~~~~~~~~~
-::
 
 
 Exclusion patterns support four separate styles, fnmatch, shell, regular
@@ -131,3 +92,42 @@ Examples::
     sh:/home/*/.thumbnails
     EOF
     $ borg create --exclude-from exclude.txt backup /
+
+.. _borg_placeholders:
+
+borg help placeholders
+~~~~~~~~~~~~~~~~~~~~~~
+
+
+Repository (or Archive) URLs and --prefix values support these placeholders:
+
+{hostname}
+
+    The (short) hostname of the machine.
+
+{fqdn}
+
+    The full name of the machine.
+
+{now}
+
+    The current local date and time.
+
+{utcnow}
+
+    The current UTC date and time.
+
+{user}
+
+    The user name (or UID, if no name is available) of the user running borg.
+
+{pid}
+
+    The current process ID.
+
+Examples::
+
+    borg create /path/to/repo::{hostname}-{user}-{utcnow} ...
+    borg create /path/to/repo::{hostname}-{now:%Y-%m-%d_%H:%M:%S} ...
+    borg prune --prefix '{hostname}-' ...
+

+ 7 - 0
docs/usage/mount.rst.inc

@@ -38,6 +38,13 @@ used in fstab entries:
 To allow a regular user to use fstab entries, add the ``user`` option:
 ``/path/to/repo /mnt/point fuse.borgfs defaults,noauto,user 0 0``
 
+For mount options, see the fuse(8) manual page. Additional mount options
+supported by borg:
+
+- allow_damaged_files: by default damaged files (where missing chunks were
+  replaced with runs of zeros by borg check --repair) are not readable and
+  return EIO (I/O error). Set this option to read such files.
+
 The BORG_MOUNT_DATA_CACHE_ENTRIES environment variable is meant for advanced users
 to tweak the performance. It sets the number of cached data chunks; additional
 memory usage can be up to ~8 MiB times this number. The default is the number

+ 1 - 1
setup.py

@@ -187,7 +187,7 @@ class build_usage(Command):
                         params = {"topic": topic,
                                   "underline": '~' * len('borg help ' + topic)}
                         doc.write(".. _borg_{topic}:\n\n".format(**params))
-                        doc.write("borg help {topic}\n{underline}\n::\n\n".format(**params))
+                        doc.write("borg help {topic}\n{underline}\n\n".format(**params))
                         doc.write(Archiver.helptext[topic])
                 else:
                     params = {"command": command,

+ 7 - 1
src/borg/_hashindex.c

@@ -100,6 +100,8 @@ static int hashindex_delete(HashIndex *index, const void *key);
 static void *hashindex_next_key(HashIndex *index, const void *key);
 
 /* Private API */
+static void hashindex_free(HashIndex *index);
+
 static int
 hashindex_index(HashIndex *index, const void *key)
 {
@@ -148,7 +150,11 @@ hashindex_resize(HashIndex *index, int capacity)
         return 0;
     }
     while((key = hashindex_next_key(index, key))) {
-        hashindex_set(new, key, key + key_size);
+        if(!hashindex_set(new, key, key + key_size)) {
+            /* This can only happen if there's a bug in the code calculating capacity */
+            hashindex_free(new);
+            return 0;
+        }
     }
     free(index->buckets);
     index->buckets = new->buckets;

+ 45 - 16
src/borg/archive.py

@@ -419,6 +419,7 @@ Number of files: {0.stats.nfiles}'''.format(
         :param hardlink_masters: maps paths to (chunks, link_target) for extracting subtrees with hardlinks correctly
         :param original_path: 'path' key as stored in archive
         """
+        has_damaged_chunks = 'chunks_healthy' in item
         if dry_run or stdout:
             if 'chunks' in item:
                 for _, data in self.pipeline.fetch_many([c.id for c in item.chunks], is_preloaded=True):
@@ -426,6 +427,9 @@ Number of files: {0.stats.nfiles}'''.format(
                         sys.stdout.buffer.write(data)
                 if stdout:
                     sys.stdout.buffer.flush()
+            if has_damaged_chunks:
+                logger.warning('File %s has damaged (all-zero) chunks. Try running borg check --repair.' %
+                               remove_surrogates(item[b'path']))
             return
 
         original_path = original_path or item.path
@@ -481,6 +485,9 @@ Number of files: {0.stats.nfiles}'''.format(
                     fd.truncate(pos)
                     fd.flush()
                     self.restore_attrs(path, item, fd=fd.fileno())
+            if has_damaged_chunks:
+                logger.warning('File %s has damaged (all-zero) chunks. Try running borg check --repair.' %
+                               remove_surrogates(item.path))
             if hardlink_masters:
                 # Update master entry with extracted file path, so that following hardlinks don't extract twice.
                 hardlink_masters[item.get('source') or original_path] = (None, path)
@@ -924,7 +931,7 @@ class ArchiveChecker:
         """
         # Explicitly set the initial hash table capacity to avoid performance issues
         # due to hash table "resonance"
-        capacity = int(len(self.repository) * 1.2)
+        capacity = int(len(self.repository) * 1.35 + 1)  # > len * 1.0 / HASH_MAX_LOAD (see _hashindex.c)
         self.chunks = ChunkIndex(capacity)
         marker = None
         while True:
@@ -1033,31 +1040,53 @@ class ArchiveChecker:
                     self.repository.put(id_, cdata)
 
         def verify_file_chunks(item):
-            """Verifies that all file chunks are present
+            """Verifies that all file chunks are present.
 
-            Missing file chunks will be replaced with new chunks of the same
-            length containing all zeros.
+            Missing file chunks will be replaced with new chunks of the same length containing all zeros.
+            If a previously missing file chunk re-appears, the replacement chunk is replaced by the correct one.
             """
             offset = 0
             chunk_list = []
             chunks_replaced = False
-            for chunk_id, size, csize in item.chunks:
+            has_chunks_healthy = 'chunks_healthy' in item
+            chunks_current = item.chunks
+            chunks_healthy = item.chunks_healthy if has_chunks_healthy else chunks_current
+            assert len(chunks_current) == len(chunks_healthy)
+            for chunk_current, chunk_healthy in zip(chunks_current, chunks_healthy):
+                chunk_id, size, csize = chunk_healthy
                 if chunk_id not in self.chunks:
-                    # If a file chunk is missing, create an all empty replacement chunk
-                    logger.error('{}: Missing file chunk detected (Byte {}-{})'.format(item.path, offset, offset + size))
-                    self.error_found = chunks_replaced = True
-                    data = bytes(size)
-                    chunk_id = self.key.id_hash(data)
-                    cdata = self.key.encrypt(Chunk(data))
-                    csize = len(cdata)
-                    add_reference(chunk_id, size, csize, cdata)
+                    # a chunk of the healthy list is missing
+                    if chunk_current == chunk_healthy:
+                        logger.error('{}: New missing file chunk detected (Byte {}-{}). '
+                                     'Replacing with all-zero chunk.'.format(item.path, offset, offset + size))
+                        self.error_found = chunks_replaced = True
+                        data = bytes(size)
+                        chunk_id = self.key.id_hash(data)
+                        cdata = self.key.encrypt(Chunk(data))
+                        csize = len(cdata)
+                        add_reference(chunk_id, size, csize, cdata)
+                    else:
+                        logger.info('{}: Previously missing file chunk is still missing (Byte {}-{}). It has a '
+                                    'all-zero replacement chunk already.'.format(item.path, offset, offset + size))
+                        chunk_id, size, csize = chunk_current
+                        add_reference(chunk_id, size, csize)
                 else:
-                    add_reference(chunk_id, size, csize)
-                chunk_list.append((chunk_id, size, csize))
+                    if chunk_current == chunk_healthy:
+                        # normal case, all fine.
+                        add_reference(chunk_id, size, csize)
+                    else:
+                        logger.info('{}: Healed previously missing file chunk! '
+                                    '(Byte {}-{}).'.format(item.path, offset, offset + size))
+                        add_reference(chunk_id, size, csize)
+                        mark_as_possibly_superseded(chunk_current[0])  # maybe orphaned the all-zero replacement chunk
+                chunk_list.append([chunk_id, size, csize])  # list-typed element as chunks_healthy is list-of-lists
                 offset += size
-            if chunks_replaced and 'chunks_healthy' not in item:
+            if chunks_replaced and not has_chunks_healthy:
                 # if this is first repair, remember the correct chunk IDs, so we can maybe heal the file later
                 item.chunks_healthy = item.chunks
+            if has_chunks_healthy and chunk_list == chunks_healthy:
+                logger.info('{}: Completely healed previously damaged file!'.format(item.path))
+                del item.chunks_healthy
             item.chunks = chunk_list
 
         def robust_iterator(archive):

+ 15 - 7
src/borg/archiver.py

@@ -1134,8 +1134,7 @@ class Archiver:
             re:^/home/[^/]\.tmp/
             sh:/home/*/.thumbnails
             EOF
-            $ borg create --exclude-from exclude.txt backup /
-        ''')
+            $ borg create --exclude-from exclude.txt backup /\n\n''')
     helptext['placeholders'] = textwrap.dedent('''
         Repository (or Archive) URLs and --prefix values support these placeholders:
 
@@ -1167,8 +1166,7 @@ class Archiver:
 
             borg create /path/to/repo::{hostname}-{user}-{utcnow} ...
             borg create /path/to/repo::{hostname}-{now:%Y-%m-%d_%H:%M:%S} ...
-            borg prune --prefix '{hostname}-' ...
-        ''')
+            borg prune --prefix '{hostname}-' ...\n\n''')
 
     def do_help(self, parser, commands, args):
         if not args.topic:
@@ -1331,9 +1329,12 @@ class Archiver:
         - Check if archive metadata chunk is present. if not, remove archive from
           manifest.
         - For all files (items) in the archive, for all chunks referenced by these
-          files, check if chunk is present (if not and we are in repair mode, replace
-          it with a same-size chunk of zeros). This requires reading of archive and
-          file metadata, but not data.
+          files, check if chunk is present.
+          If a chunk is not present and we are in repair mode, replace it with a same-size
+          replacement chunk of zeros.
+          If a previously lost chunk reappears (e.g. via a later backup) and we are in
+          repair mode, the all-zero replacement chunk will be replaced by the correct chunk.
+          This requires reading of archive and file metadata, but not data.
         - If we are in repair mode and we checked all the archives: delete orphaned
           chunks from the repo.
         - if you use a remote repo server via ssh:, the archive check is executed on
@@ -1739,6 +1740,13 @@ class Archiver:
         To allow a regular user to use fstab entries, add the ``user`` option:
         ``/path/to/repo /mnt/point fuse.borgfs defaults,noauto,user 0 0``
 
+        For mount options, see the fuse(8) manual page. Additional mount options
+        supported by borg:
+
+        - allow_damaged_files: by default damaged files (where missing chunks were
+          replaced with runs of zeros by borg check --repair) are not readable and
+          return EIO (I/O error). Set this option to read such files.
+
         The BORG_MOUNT_DATA_CACHE_ENTRIES environment variable is meant for advanced users
         to tweak the performance. It sets the number of cached data chunks; additional
         memory usage can be up to ~8 MiB times this number. The default is the number

+ 49 - 21
src/borg/fuse.py

@@ -14,7 +14,7 @@ from .logger import create_logger
 logger = create_logger()
 
 from .archive import Archive
-from .helpers import daemonize
+from .helpers import daemonize, safe_encode
 from .item import Item
 from .lrucache import LRUCache
 
@@ -50,6 +50,9 @@ class ItemCache:
 class FuseOperations(llfuse.Operations):
     """Export archive as a fuse filesystem
     """
+
+    allow_damaged_files = False
+
     def __init__(self, key, repository, manifest, archive, cached_repo):
         super().__init__()
         self._inode_count = 0
@@ -79,6 +82,32 @@ class FuseOperations(llfuse.Operations):
                 self.contents[1][os.fsencode(archive_name)] = archive_inode
                 self.pending_archives[archive_inode] = Archive(repository, key, manifest, archive_name)
 
+    def mount(self, mountpoint, mount_options, foreground=False):
+        """Mount filesystem on *mountpoint* with *mount_options*."""
+        options = ['fsname=borgfs', 'ro']
+        if mount_options:
+            options.extend(mount_options.split(','))
+        try:
+            options.remove('allow_damaged_files')
+            self.allow_damaged_files = True
+        except ValueError:
+            pass
+        llfuse.init(self, mountpoint, options)
+        if not foreground:
+            daemonize()
+
+        # If the file system crashes, we do not want to umount because in that
+        # case the mountpoint suddenly appears to become empty. This can have
+        # nasty consequences, imagine the user has e.g. an active rsync mirror
+        # job - seeing the mountpoint empty, rsync would delete everything in the
+        # mirror.
+        umount = False
+        try:
+            signal = fuse_main()
+            umount = (signal is None)  # no crash and no signal -> umount request
+        finally:
+            llfuse.close(umount)
+
     def process_archive(self, archive, prefix=[]):
         """Build fuse inode hierarchy from archive metadata
         """
@@ -88,6 +117,16 @@ class FuseOperations(llfuse.Operations):
             unpacker.feed(data)
             for item in unpacker:
                 item = Item(internal_dict=item)
+                try:
+                    # This can happen if an archive was created with a command line like
+                    # $ borg create ... dir1/file dir1
+                    # In this case the code below will have created a default_dir inode for dir1 already.
+                    inode = self._find_inode(safe_encode(item.path), prefix)
+                except KeyError:
+                    pass
+                else:
+                    self.items[inode] = item
+                    continue
                 segments = prefix + os.fsencode(os.path.normpath(item.path)).split(b'/')
                 del item.path
                 num_segments = len(segments)
@@ -214,6 +253,15 @@ class FuseOperations(llfuse.Operations):
         return self.getattr(inode)
 
     def open(self, inode, flags, ctx=None):
+        if not self.allow_damaged_files:
+            item = self.get_item(inode)
+            if 'chunks_healthy' in item:
+                # Processed archive items don't carry the path anymore; for converting the inode
+                # to the path we'd either have to store the inverse of the current structure,
+                # or search the entire archive. So we just don't print it. It's easy to correlate anyway.
+                logger.warning('File has damaged (all-zero) chunks. Try running borg check --repair. '
+                               'Mount with allow_damaged_files to read damaged files.')
+                raise llfuse.FUSEError(errno.EIO)
         return inode
 
     def opendir(self, inode, ctx=None):
@@ -254,23 +302,3 @@ class FuseOperations(llfuse.Operations):
     def readlink(self, inode, ctx=None):
         item = self.get_item(inode)
         return os.fsencode(item.source)
-
-    def mount(self, mountpoint, extra_options, foreground=False):
-        options = ['fsname=borgfs', 'ro']
-        if extra_options:
-            options.extend(extra_options.split(','))
-        llfuse.init(self, mountpoint, options)
-        if not foreground:
-            daemonize()
-
-        # If the file system crashes, we do not want to umount because in that
-        # case the mountpoint suddenly appears to become empty. This can have
-        # nasty consequences, imagine the user has e.g. an active rsync mirror
-        # job - seeing the mountpoint empty, rsync would delete everything in the
-        # mirror.
-        umount = False
-        try:
-            signal = fuse_main()
-            umount = (signal is None)  # no crash and no signal -> umount request
-        finally:
-            llfuse.close(umount)

+ 2 - 3
src/borg/hashindex.pyx

@@ -18,8 +18,6 @@ cdef extern from "_hashindex.c":
     HashIndex *hashindex_read(char *path)
     HashIndex *hashindex_init(int capacity, int key_size, int value_size)
     void hashindex_free(HashIndex *index)
-    void hashindex_merge(HashIndex *index, HashIndex *other)
-    void hashindex_add(HashIndex *index, void *key, void *value)
     int hashindex_get_size(HashIndex *index)
     int hashindex_write(HashIndex *index, char *path)
     void *hashindex_get(HashIndex *index, void *key)
@@ -323,7 +321,8 @@ cdef class ChunkIndex(IndexBase):
             values[1] = data[1]
             values[2] = data[2]
         else:
-            hashindex_set(self.index, key, data)
+            if not hashindex_set(self.index, key, data):
+                raise Exception('hashindex_set failed')
 
     def merge(self, ChunkIndex other):
         cdef void *key = NULL

+ 2 - 3
src/borg/helpers.py

@@ -927,8 +927,7 @@ def yes(msg=None, false_msg=None, true_msg=None, default_msg=None,
         retry_msg=None, invalid_msg=None, env_msg=None,
         falsish=FALSISH, truish=TRUISH, defaultish=DEFAULTISH,
         default=False, retry=True, env_var_override=None, ofile=None, input=input):
-    """
-    Output <msg> (usually a question) and let user input an answer.
+    """Output <msg> (usually a question) and let user input an answer.
     Qualifies the answer according to falsish, truish and defaultish as True, False or <default>.
     If it didn't qualify and retry_msg is None (no retries wanted),
     return the default [which defaults to False]. Otherwise let user retry
@@ -1112,7 +1111,7 @@ def log_multi(*msgs, level=logging.INFO, logger=logger):
     """
     log multiple lines of text, each line by a separate logging call for cosmetic reasons
 
-    each positional argument may be a single or multiple lines (separated by \n) of text.
+    each positional argument may be a single or multiple lines (separated by newlines) of text.
     """
     lines = []
     for msg in msgs:

+ 10 - 6
src/borg/locking.py

@@ -101,9 +101,11 @@ class NotMyLock(LockErrorT):
 class ExclusiveLock:
     """An exclusive Lock based on mkdir fs operation being atomic.
 
-    If possible, try to use the contextmanager here like:
-    with ExclusiveLock(...) as lock:
-        ...
+    If possible, try to use the contextmanager here like::
+
+        with ExclusiveLock(...) as lock:
+            ...
+
     This makes sure the lock is released again if the block is left, no
     matter how (e.g. if an exception occurred).
     """
@@ -222,9 +224,11 @@ class UpgradableLock:
     noone is allowed reading) and read access to a resource needs a shared
     lock (multiple readers are allowed).
 
-    If possible, try to use the contextmanager here like:
-    with UpgradableLock(...) as lock:
-        ...
+    If possible, try to use the contextmanager here like::
+
+        with UpgradableLock(...) as lock:
+            ...
+
     This makes sure the lock is released again if the block is left, no
     matter how (e.g. if an exception occurred).
     """

+ 1 - 1
src/borg/shellpattern.py

@@ -5,7 +5,7 @@ import re
 def translate(pat):
     """Translate a shell-style pattern to a regular expression.
 
-    The pattern may include "**<sep>" (<sep> stands for the platform-specific path separator; "/" on POSIX systems) for
+    The pattern may include ``**<sep>`` (<sep> stands for the platform-specific path separator; "/" on POSIX systems) for
     matching zero or more directory levels and "*" for matching zero or more arbitrary characters with the exception of
     any path separator. Wrap meta-characters in brackets for a literal match (i.e. "[?]" to match the literal character
     "?").

+ 18 - 0
src/borg/testsuite/__init__.py

@@ -116,6 +116,24 @@ class BaseTestCase(unittest.TestCase):
         for sub_diff in diff.subdirs.values():
             self._assert_dirs_equal_cmp(sub_diff)
 
+    @contextmanager
+    def fuse_mount(self, location, mountpoint, mount_options=None):
+        os.mkdir(mountpoint)
+        args = ['mount', location, mountpoint]
+        if mount_options:
+            args += '-o', mount_options
+        self.cmd(*args, fork=True)
+        self.wait_for_mount(mountpoint)
+        yield
+        if sys.platform.startswith('linux'):
+            cmd = 'fusermount -u %s' % mountpoint
+        else:
+            cmd = 'umount %s' % mountpoint
+        os.system(cmd)
+        os.rmdir(mountpoint)
+        # Give the daemon some time to exit
+        time.sleep(.2)
+
     def wait_for_mount(self, path, timeout=5):
         """Wait until a filesystem is mounted on `path`
         """

+ 132 - 50
src/borg/testsuite/archiver.py

@@ -223,7 +223,8 @@ class ArchiverTestCaseBase(BaseTestCase):
 
     def tearDown(self):
         os.chdir(self._old_wd)
-        shutil.rmtree(self.tmpdir)
+        # note: ignore_errors=True as workaround for issue #862
+        shutil.rmtree(self.tmpdir, ignore_errors=True)
 
     def cmd(self, *args, **kw):
         exit_code = kw.pop('exit_code', 0)
@@ -239,6 +240,13 @@ class ArchiverTestCaseBase(BaseTestCase):
     def create_src_archive(self, name):
         self.cmd('create', self.repository_location + '::' + name, src_dir)
 
+    def open_archive(self, name):
+        repository = Repository(self.repository_path)
+        with repository:
+            manifest, key = Manifest.load(repository)
+            archive = Archive(repository, key, manifest, name)
+        return archive, repository
+
     def create_regular_file(self, name, size=0, contents=None):
         filename = os.path.join(self.input_path, name)
         if not os.path.exists(os.path.dirname(filename)):
@@ -1283,52 +1291,96 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         assert 'This command initializes' not in self.cmd('help', 'init', '--usage-only')
 
     @unittest.skipUnless(has_llfuse, 'llfuse not installed')
-    def test_fuse_mount_repository(self):
-        mountpoint = os.path.join(self.tmpdir, 'mountpoint')
-        os.mkdir(mountpoint)
+    def test_fuse(self):
         self.cmd('init', self.repository_location)
         self.create_test_files()
         self.cmd('create', self.repository_location + '::archive', 'input')
         self.cmd('create', self.repository_location + '::archive2', 'input')
-        try:
-            self.cmd('mount', self.repository_location, mountpoint, fork=True)
-            self.wait_for_mount(mountpoint)
-            if has_lchflags:
-                # remove the file we did not backup, so input and output become equal
-                os.remove(os.path.join('input', 'flagfile'))
+        if has_lchflags:
+            # remove the file we did not backup, so input and output become equal
+            os.remove(os.path.join('input', 'flagfile'))
+        mountpoint = os.path.join(self.tmpdir, 'mountpoint')
+        # mount the whole repository, archive contents shall show up in archivename subdirs of mountpoint:
+        with self.fuse_mount(self.repository_location, mountpoint):
             self.assert_dirs_equal(self.input_path, os.path.join(mountpoint, 'archive', 'input'))
             self.assert_dirs_equal(self.input_path, os.path.join(mountpoint, 'archive2', 'input'))
-        finally:
-            if sys.platform.startswith('linux'):
-                os.system('fusermount -u ' + mountpoint)
+        # mount only 1 archive, its contents shall show up directly in mountpoint:
+        with self.fuse_mount(self.repository_location + '::archive', mountpoint):
+            self.assert_dirs_equal(self.input_path, os.path.join(mountpoint, 'input'))
+            # regular file
+            in_fn = 'input/file1'
+            out_fn = os.path.join(mountpoint, 'input', 'file1')
+            # stat
+            sti1 = os.stat(in_fn)
+            sto1 = os.stat(out_fn)
+            assert sti1.st_mode == sto1.st_mode
+            assert sti1.st_uid == sto1.st_uid
+            assert sti1.st_gid == sto1.st_gid
+            assert sti1.st_size == sto1.st_size
+            assert sti1.st_atime == sto1.st_atime
+            assert sti1.st_ctime == sto1.st_ctime
+            assert sti1.st_mtime == sto1.st_mtime
+            # note: there is another hardlink to this, see below
+            assert sti1.st_nlink == sto1.st_nlink == 2
+            # read
+            with open(in_fn, 'rb') as in_f, open(out_fn, 'rb') as out_f:
+                assert in_f.read() == out_f.read()
+            # list/read xattrs
+            if xattr.is_enabled(self.input_path):
+                assert xattr.listxattr(out_fn) == ['user.foo', ]
+                assert xattr.getxattr(out_fn, 'user.foo') == b'bar'
             else:
-                os.system('umount ' + mountpoint)
-            os.rmdir(mountpoint)
-            # Give the daemon some time to exit
-            time.sleep(.2)
+                assert xattr.listxattr(out_fn) == []
+                try:
+                    xattr.getxattr(out_fn, 'user.foo')
+                except OSError as e:
+                    assert e.errno == llfuse.ENOATTR
+                else:
+                    assert False, "expected OSError(ENOATTR), but no error was raised"
+            # hardlink (to 'input/file1')
+            in_fn = 'input/hardlink'
+            out_fn = os.path.join(mountpoint, 'input', 'hardlink')
+            sti2 = os.stat(in_fn)
+            sto2 = os.stat(out_fn)
+            assert sti2.st_nlink == sto2.st_nlink == 2
+            assert sto1.st_ino == sto2.st_ino
+            # symlink
+            in_fn = 'input/link1'
+            out_fn = os.path.join(mountpoint, 'input', 'link1')
+            sti = os.stat(in_fn, follow_symlinks=False)
+            sto = os.stat(out_fn, follow_symlinks=False)
+            assert stat.S_ISLNK(sti.st_mode)
+            assert stat.S_ISLNK(sto.st_mode)
+            assert os.readlink(in_fn) == os.readlink(out_fn)
+            # FIFO
+            out_fn = os.path.join(mountpoint, 'input', 'fifo1')
+            sto = os.stat(out_fn)
+            assert stat.S_ISFIFO(sto.st_mode)
 
     @unittest.skipUnless(has_llfuse, 'llfuse not installed')
-    def test_fuse_mount_archive(self):
-        mountpoint = os.path.join(self.tmpdir, 'mountpoint')
-        os.mkdir(mountpoint)
+    def test_fuse_allow_damaged_files(self):
         self.cmd('init', self.repository_location)
-        self.create_test_files()
-        self.cmd('create', self.repository_location + '::archive', 'input')
-        try:
-            self.cmd('mount', self.repository_location + '::archive', mountpoint, fork=True)
-            self.wait_for_mount(mountpoint)
-            if has_lchflags:
-                # remove the file we did not backup, so input and output become equal
-                os.remove(os.path.join('input', 'flagfile'))
-            self.assert_dirs_equal(self.input_path, os.path.join(mountpoint, 'input'))
-        finally:
-            if sys.platform.startswith('linux'):
-                os.system('fusermount -u ' + mountpoint)
+        self.create_src_archive('archive')
+        # Get rid of a chunk and repair it
+        archive, repository = self.open_archive('archive')
+        with repository:
+            for item in archive.iter_items():
+                if item.path.endswith('testsuite/archiver.py'):
+                    repository.delete(item.chunks[-1].id)
+                    path = item.path  # store full path for later
+                    break
             else:
-                os.system('umount ' + mountpoint)
-            os.rmdir(mountpoint)
-            # Give the daemon some time to exit
-            time.sleep(.2)
+                assert False  # missed the file
+            repository.commit()
+        self.cmd('check', '--repair', self.repository_location, exit_code=0)
+
+        mountpoint = os.path.join(self.tmpdir, 'mountpoint')
+        with self.fuse_mount(self.repository_location + '::archive', mountpoint):
+            with pytest.raises(OSError) as excinfo:
+                open(os.path.join(mountpoint, path))
+            assert excinfo.value.errno == errno.EIO
+        with self.fuse_mount(self.repository_location + '::archive', mountpoint, 'allow_damaged_files'):
+            open(os.path.join(mountpoint, path)).close()
 
     def verify_aes_counter_uniqueness(self, method):
         seen = set()  # Chunks already seen
@@ -1633,6 +1685,14 @@ class ArchiverTestCaseBinary(ArchiverTestCase):
     def test_recreate_changed_source(self):
         pass
 
+    @unittest.skip('test_basic_functionality seems incompatible with fakeroot and/or the binary.')
+    def test_basic_functionality(self):
+        pass
+
+    @unittest.skip('test_overwrite seems incompatible with fakeroot and/or the binary.')
+    def test_overwrite(self):
+        pass
+
 
 class ArchiverCheckTestCase(ArchiverTestCaseBase):
 
@@ -1643,13 +1703,6 @@ class ArchiverCheckTestCase(ArchiverTestCaseBase):
             self.create_src_archive('archive1')
             self.create_src_archive('archive2')
 
-    def open_archive(self, name):
-        repository = Repository(self.repository_path)
-        with repository:
-            manifest, key = Manifest.load(repository)
-            archive = Archive(repository, key, manifest, name)
-        return archive, repository
-
     def test_check_usage(self):
         output = self.cmd('check', '-v', '--progress', self.repository_location, exit_code=0)
         self.assert_in('Starting repository check', output)
@@ -1672,12 +1725,45 @@ class ArchiverCheckTestCase(ArchiverTestCaseBase):
         with repository:
             for item in archive.iter_items():
                 if item.path.endswith('testsuite/archiver.py'):
-                    repository.delete(item.chunks[-1].id)
+                    valid_chunks = item.chunks
+                    killed_chunk = valid_chunks[-1]
+                    repository.delete(killed_chunk.id)
                     break
+            else:
+                self.assert_true(False)  # should not happen
             repository.commit()
         self.cmd('check', self.repository_location, exit_code=1)
-        self.cmd('check', '--repair', self.repository_location, exit_code=0)
+        output = self.cmd('check', '--repair', self.repository_location, exit_code=0)
+        self.assert_in('New missing file chunk detected', output)
         self.cmd('check', self.repository_location, exit_code=0)
+        # check that the file in the old archives has now a different chunk list without the killed chunk
+        for archive_name in ('archive1', 'archive2'):
+            archive, repository = self.open_archive(archive_name)
+            with repository:
+                for item in archive.iter_items():
+                    if item.path.endswith('testsuite/archiver.py'):
+                        self.assert_not_equal(valid_chunks, item.chunks)
+                        self.assert_not_in(killed_chunk, item.chunks)
+                        break
+                else:
+                    self.assert_true(False)  # should not happen
+        # do a fresh backup (that will include the killed chunk)
+        with patch.object(ChunkBuffer, 'BUFFER_SIZE', 10):
+            self.create_src_archive('archive3')
+        # check should be able to heal the file now:
+        output = self.cmd('check', '-v', '--repair', self.repository_location, exit_code=0)
+        self.assert_in('Healed previously missing file chunk', output)
+        self.assert_in('testsuite/archiver.py: Completely healed previously damaged file!', output)
+        # check that the file in the old archives has the correct chunks again
+        for archive_name in ('archive1', 'archive2'):
+            archive, repository = self.open_archive(archive_name)
+            with repository:
+                for item in archive.iter_items():
+                    if item.path.endswith('testsuite/archiver.py'):
+                        self.assert_equal(valid_chunks, item.chunks)
+                        break
+                else:
+                    self.assert_true(False)  # should not happen
 
     def test_missing_archive_item_chunk(self):
         archive, repository = self.open_archive('archive1')
@@ -1762,11 +1848,7 @@ class RemoteArchiverTestCase(ArchiverTestCase):
     # this was introduced because some tests expect stderr contents to show up
     # in "output" also. Also, the non-forking exec_cmd catches both, too.
     @unittest.skip('deadlock issues')
-    def test_fuse_mount_repository(self):
-        pass
-
-    @unittest.skip('deadlock issues')
-    def test_fuse_mount_archive(self):
+    def test_fuse(self):
         pass
 
     @unittest.skip('only works locally')