Przeglądaj źródła

Check for sufficient free space before committing

Marian Beermann 9 lat temu
rodzic
commit
e9a73b808f

+ 17 - 3
docs/quickstart.rst

@@ -17,13 +17,20 @@ a good amount of free space on the filesystem that has your backup repository
 (and also on ~/.cache). A few GB should suffice for most hard-drive sized
 (and also on ~/.cache). A few GB should suffice for most hard-drive sized
 repositories. See also :ref:`cache-memory-usage`.
 repositories. See also :ref:`cache-memory-usage`.
 
 
+Borg doesn't use space reserved for root on repository disks (even when run as root),
+on file systems which do not support this mechanism (e.g. XFS) we recommend to
+reserve some space in Borg itself just to be safe by adjusting the
+``additional_free_space`` setting in the ``[repository]`` section of a repositories
+``config`` file. A good starting point is ``2G``.
+
 If |project_name| runs out of disk space, it tries to free as much space as it
 If |project_name| runs out of disk space, it tries to free as much space as it
 can while aborting the current operation safely, which allows to free more space
 can while aborting the current operation safely, which allows to free more space
-by deleting/pruning archives. This mechanism is not bullet-proof though.
+by deleting/pruning archives. This mechanism is not bullet-proof in some
+circumstances [1]_.
+
 If you *really* run out of disk space, it can be hard or impossible to free space,
 If you *really* run out of disk space, it can be hard or impossible to free space,
 because |project_name| needs free space to operate - even to delete backup
 because |project_name| needs free space to operate - even to delete backup
-archives. There is a ``--save-space`` option for some commands, but even with
-that |project_name| will need free space to operate.
+archives.
 
 
 You can use some monitoring process or just include the free space information
 You can use some monitoring process or just include the free space information
 in your backup log files (you check them regularly anyway, right?).
 in your backup log files (you check them regularly anyway, right?).
@@ -36,6 +43,13 @@ Also helpful:
 - consider using quotas
 - consider using quotas
 - use `prune` regularly
 - use `prune` regularly
 
 
+.. [1] This failsafe can fail in these circumstances:
+
+    - The underlying file system doesn't support statvfs(2), or returns incorrect
+      data, or the repository doesn't reside on a single file system
+    - Other tasks fill the disk simultaneously
+    - Hard quotas (which may not be reflected in statvfs(2))
+
 
 
 A step by step example
 A step by step example
 ----------------------
 ----------------------

+ 7 - 1
src/borg/_hashindex.c

@@ -441,7 +441,13 @@ hashindex_next_key(HashIndex *index, const void *key)
 }
 }
 
 
 static int
 static int
-hashindex_get_size(HashIndex *index)
+hashindex_len(HashIndex *index)
 {
 {
     return index->num_entries;
     return index->num_entries;
 }
 }
+
+static int
+hashindex_size(HashIndex *index)
+{
+    return sizeof(HashHeader) + index->num_buckets * index->bucket_size;
+}

+ 8 - 3
src/borg/hashindex.pyx

@@ -8,7 +8,7 @@ from libc.stdint cimport uint32_t, UINT32_MAX, uint64_t
 from libc.errno cimport errno
 from libc.errno cimport errno
 from cpython.exc cimport PyErr_SetFromErrnoWithFilename
 from cpython.exc cimport PyErr_SetFromErrnoWithFilename
 
 
-API_VERSION = 2
+API_VERSION = 3
 
 
 
 
 cdef extern from "_hashindex.c":
 cdef extern from "_hashindex.c":
@@ -18,7 +18,8 @@ cdef extern from "_hashindex.c":
     HashIndex *hashindex_read(char *path)
     HashIndex *hashindex_read(char *path)
     HashIndex *hashindex_init(int capacity, int key_size, int value_size)
     HashIndex *hashindex_init(int capacity, int key_size, int value_size)
     void hashindex_free(HashIndex *index)
     void hashindex_free(HashIndex *index)
-    int hashindex_get_size(HashIndex *index)
+    int hashindex_len(HashIndex *index)
+    int hashindex_size(HashIndex *index)
     int hashindex_write(HashIndex *index, char *path)
     int hashindex_write(HashIndex *index, char *path)
     void *hashindex_get(HashIndex *index, void *key)
     void *hashindex_get(HashIndex *index, void *key)
     void *hashindex_next_key(HashIndex *index, void *key)
     void *hashindex_next_key(HashIndex *index, void *key)
@@ -119,7 +120,11 @@ cdef class IndexBase:
             raise
             raise
 
 
     def __len__(self):
     def __len__(self):
-        return hashindex_get_size(self.index)
+        return hashindex_len(self.index)
+
+    def size(self):
+        """Return size (bytes) of hash table."""
+        return hashindex_size(self.index)
 
 
 
 
 cdef class NSIndex(IndexBase):
 cdef class NSIndex(IndexBase):

+ 21 - 1
src/borg/helpers.py

@@ -85,7 +85,7 @@ class PlaceholderError(Error):
 
 
 def check_extension_modules():
 def check_extension_modules():
     from . import platform
     from . import platform
-    if hashindex.API_VERSION != 2:
+    if hashindex.API_VERSION != 3:
         raise ExtensionModuleError
         raise ExtensionModuleError
     if chunker.API_VERSION != 2:
     if chunker.API_VERSION != 2:
         raise ExtensionModuleError
         raise ExtensionModuleError
@@ -618,6 +618,26 @@ def format_file_size(v, precision=2, sign=False):
     return sizeof_fmt_decimal(v, suffix='B', sep=' ', precision=precision, sign=sign)
     return sizeof_fmt_decimal(v, suffix='B', sep=' ', precision=precision, sign=sign)
 
 
 
 
+def parse_file_size(s):
+    """Return int from file size (1234, 55G, 1.7T)."""
+    if not s:
+        return int(s)  # will raise
+    suffix = s[-1]
+    power = 1000
+    try:
+        factor = {
+            'K': power,
+            'M': power**2,
+            'G': power**3,
+            'T': power**4,
+            'P': power**5,
+        }[suffix]
+        s = s[:-1]
+    except KeyError:
+        factor = 1
+    return int(float(s) * factor)
+
+
 def sizeof_fmt(num, suffix='B', units=None, power=None, sep='', precision=2, sign=False):
 def sizeof_fmt(num, suffix='B', units=None, power=None, sep='', precision=2, sign=False):
     prefix = '+' if sign and num > 0 else ''
     prefix = '+' if sign and num > 0 else ''
 
 

+ 52 - 4
src/borg/repository.py

@@ -17,7 +17,7 @@ logger = logging.getLogger(__name__)
 
 
 from .constants import *  # NOQA
 from .constants import *  # NOQA
 from .hashindex import NSIndex
 from .hashindex import NSIndex
-from .helpers import Error, ErrorWithTraceback, IntegrityError
+from .helpers import Error, ErrorWithTraceback, IntegrityError, format_file_size, parse_file_size
 from .helpers import Location
 from .helpers import Location
 from .helpers import ProgressIndicatorPercent
 from .helpers import ProgressIndicatorPercent
 from .helpers import bin_to_hex
 from .helpers import bin_to_hex
@@ -101,6 +101,9 @@ class Repository:
                 id = bin_to_hex(id)
                 id = bin_to_hex(id)
             super().__init__(id, repo)
             super().__init__(id, repo)
 
 
+    class InsufficientFreeSpaceError(Error):
+        """Insufficient free space to complete transaction (required: {}, available: {})."""
+
     def __init__(self, path, create=False, exclusive=False, lock_wait=None, lock=True, append_only=False):
     def __init__(self, path, create=False, exclusive=False, lock_wait=None, lock=True, append_only=False):
         self.path = os.path.abspath(path)
         self.path = os.path.abspath(path)
         self._location = Location('file://%s' % self.path)
         self._location = Location('file://%s' % self.path)
@@ -136,8 +139,10 @@ class Repository:
             # EIO or FS corruption ensues, which is why we specifically check for ENOSPC.
             # EIO or FS corruption ensues, which is why we specifically check for ENOSPC.
             if self._active_txn and no_space_left_on_device:
             if self._active_txn and no_space_left_on_device:
                 logger.warning('No space left on device, cleaning up partial transaction to free space.')
                 logger.warning('No space left on device, cleaning up partial transaction to free space.')
-                self.io.cleanup(self.io.get_segments_transaction_id())
-            self.rollback()
+                cleanup = True
+            else:
+                cleanup = False
+            self.rollback(cleanup)
         self.close()
         self.close()
 
 
     @property
     @property
@@ -160,6 +165,7 @@ class Repository:
         config.set('repository', 'segments_per_dir', str(DEFAULT_SEGMENTS_PER_DIR))
         config.set('repository', 'segments_per_dir', str(DEFAULT_SEGMENTS_PER_DIR))
         config.set('repository', 'max_segment_size', str(DEFAULT_MAX_SEGMENT_SIZE))
         config.set('repository', 'max_segment_size', str(DEFAULT_MAX_SEGMENT_SIZE))
         config.set('repository', 'append_only', str(int(self.append_only)))
         config.set('repository', 'append_only', str(int(self.append_only)))
+        config.set('repository', 'additional_free_space', '0')
         config.set('repository', 'id', bin_to_hex(os.urandom(32)))
         config.set('repository', 'id', bin_to_hex(os.urandom(32)))
         self.save_config(path, config)
         self.save_config(path, config)
 
 
@@ -231,6 +237,7 @@ class Repository:
             raise self.InvalidRepository(path)
             raise self.InvalidRepository(path)
         self.max_segment_size = self.config.getint('repository', 'max_segment_size')
         self.max_segment_size = self.config.getint('repository', 'max_segment_size')
         self.segments_per_dir = self.config.getint('repository', 'segments_per_dir')
         self.segments_per_dir = self.config.getint('repository', 'segments_per_dir')
+        self.additional_free_space = parse_file_size(self.config.get('repository', 'additional_free_space', fallback=0))
         # append_only can be set in the constructor
         # append_only can be set in the constructor
         # it shouldn't be overridden (True -> False) here
         # it shouldn't be overridden (True -> False) here
         self.append_only = self.append_only or self.config.getboolean('repository', 'append_only', fallback=False)
         self.append_only = self.append_only or self.config.getboolean('repository', 'append_only', fallback=False)
@@ -249,6 +256,7 @@ class Repository:
         """Commit transaction
         """Commit transaction
         """
         """
         # save_space is not used anymore, but stays for RPC/API compatibility.
         # save_space is not used anymore, but stays for RPC/API compatibility.
+        self.check_free_space()
         self.io.write_commit()
         self.io.write_commit()
         if not self.append_only:
         if not self.append_only:
             self.compact_segments()
             self.compact_segments()
@@ -349,6 +357,44 @@ class Repository:
             os.unlink(os.path.join(self.path, name))
             os.unlink(os.path.join(self.path, name))
         self.index = None
         self.index = None
 
 
+    def check_free_space(self):
+        """Pre-commit check for sufficient free space to actually perform the commit."""
+        # As a baseline we take four times the current (on-disk) index size.
+        # At this point the index may only be updated by compaction, which won't resize it.
+        # We still apply a factor of four so that a later, separate invocation can free space
+        # (journaling all deletes for all chunks is one index size) or still make minor additions
+        # (which may grow the index up to twice it's current size).
+        # Note that in a subsequent operation the committed index is still on-disk, therefore we
+        # arrive at index_size * (1 + 2 + 1).
+        # In that order: journaled deletes (1), hashtable growth (2), persisted index (1).
+        required_free_space = self.index.size() * 4
+
+        # Conservatively estimate hints file size:
+        # 10 bytes for each segment-refcount pair, 10 bytes for each segment-space pair
+        # Assume maximum of 5 bytes per integer. Segment numbers will usually be packed more densely (1-3 bytes),
+        # as will refcounts and free space integers. For 5 MiB segments this estimate is good to ~20 PB repo size.
+        # Add 4K to generously account for constant format overhead.
+        hints_size = len(self.segments) * 10 + len(self.compact) * 10 + 4096
+        required_free_space += hints_size
+
+        required_free_space += self.additional_free_space
+        if not self.append_only:
+            # Keep one full worst-case segment free in non-append-only mode
+            required_free_space += self.max_segment_size + MAX_OBJECT_SIZE
+        try:
+            st_vfs = os.statvfs(self.path)
+        except OSError as os_error:
+            logger.warning('Failed to check free space before committing: ' + str(os_error))
+            return
+        # f_bavail: even as root - don't touch the Federal Block Reserve!
+        free_space = st_vfs.f_bavail * st_vfs.f_bsize
+        logger.debug('check_free_space: required bytes {}, free bytes {}'.format(required_free_space, free_space))
+        if free_space < required_free_space:
+            self.rollback(cleanup=True)
+            formatted_required = format_file_size(required_free_space)
+            formatted_free = format_file_size(free_space)
+            raise self.InsufficientFreeSpaceError(formatted_required, formatted_free)
+
     def compact_segments(self):
     def compact_segments(self):
         """Compact sparse segments by copying data into new segments
         """Compact sparse segments by copying data into new segments
         """
         """
@@ -548,9 +594,11 @@ class Repository:
             logger.info('Completed repository check, no problems found.')
             logger.info('Completed repository check, no problems found.')
         return not error_found or repair
         return not error_found or repair
 
 
-    def rollback(self):
+    def rollback(self, cleanup=False):
         """
         """
         """
         """
+        if cleanup:
+            self.io.cleanup(self.io.get_segments_transaction_id())
         self.index = None
         self.index = None
         self._active_txn = False
         self._active_txn = False
 
 

+ 15 - 0
src/borg/testsuite/hashindex.py

@@ -122,6 +122,21 @@ class HashIndexTestCase(BaseTestCase):
         assert unique_chunks == 3
         assert unique_chunks == 3
 
 
 
 
+class HashIndexSizeTestCase(BaseTestCase):
+    def test_size_on_disk(self):
+        idx = ChunkIndex()
+        assert idx.size() == 18 + 1031 * (32 + 3 * 4)
+
+    def test_size_on_disk_accurate(self):
+        idx = ChunkIndex()
+        for i in range(1234):
+            idx[H(i)] = i, i**2, i**3
+        with tempfile.NamedTemporaryFile() as file:
+            idx.write(file.name)
+            size = os.path.getsize(file.name)
+        assert idx.size() == size
+
+
 class HashIndexRefcountingTestCase(BaseTestCase):
 class HashIndexRefcountingTestCase(BaseTestCase):
     def test_chunkindex_limit(self):
     def test_chunkindex_limit(self):
         idx = ChunkIndex()
         idx = ChunkIndex()

+ 21 - 1
src/borg/testsuite/helpers.py

@@ -10,7 +10,7 @@ import msgpack
 import msgpack.fallback
 import msgpack.fallback
 
 
 from ..helpers import Location
 from ..helpers import Location
-from ..helpers import partial_format, format_file_size, format_timedelta, format_line, PlaceholderError
+from ..helpers import partial_format, format_file_size, parse_file_size, format_timedelta, format_line, PlaceholderError
 from ..helpers import make_path_safe, clean_lines
 from ..helpers import make_path_safe, clean_lines
 from ..helpers import prune_within, prune_split
 from ..helpers import prune_within, prune_split
 from ..helpers import get_cache_dir, get_keys_dir
 from ..helpers import get_cache_dir, get_keys_dir
@@ -682,6 +682,26 @@ def test_file_size_sign():
         assert format_file_size(size, sign=True) == fmt
         assert format_file_size(size, sign=True) == fmt
 
 
 
 
+@pytest.mark.parametrize('string,value', (
+    ('1', 1),
+    ('20', 20),
+    ('5K', 5000),
+    ('1.75M', 1750000),
+    ('1e+9', 1e9),
+    ('-1T', -1e12),
+))
+def test_parse_file_size(string, value):
+    assert parse_file_size(string) == int(value)
+
+
+@pytest.mark.parametrize('string', (
+    '', '5 Äpfel', '4E', '2229 bit', '1B',
+))
+def test_parse_file_size_invalid(string):
+    with pytest.raises(ValueError):
+        parse_file_size(string)
+
+
 def test_is_slow_msgpack():
 def test_is_slow_msgpack():
     saved_packer = msgpack.Packer
     saved_packer = msgpack.Packer
     try:
     try:

+ 24 - 9
src/borg/testsuite/repository.py

@@ -6,6 +6,8 @@ import sys
 import tempfile
 import tempfile
 from unittest.mock import patch
 from unittest.mock import patch
 
 
+import pytest
+
 from ..hashindex import NSIndex
 from ..hashindex import NSIndex
 from ..helpers import Location
 from ..helpers import Location
 from ..helpers import IntegrityError
 from ..helpers import IntegrityError
@@ -35,6 +37,15 @@ class RepositoryTestCaseBase(BaseTestCase):
             self.repository.close()
             self.repository.close()
         self.repository = self.open()
         self.repository = self.open()
 
 
+    def add_keys(self):
+        self.repository.put(b'00000000000000000000000000000000', b'foo')
+        self.repository.put(b'00000000000000000000000000000001', b'bar')
+        self.repository.put(b'00000000000000000000000000000003', b'bar')
+        self.repository.commit()
+        self.repository.put(b'00000000000000000000000000000001', b'bar2')
+        self.repository.put(b'00000000000000000000000000000002', b'boo')
+        self.repository.delete(b'00000000000000000000000000000003')
+
 
 
 class RepositoryTestCase(RepositoryTestCaseBase):
 class RepositoryTestCase(RepositoryTestCaseBase):
 
 
@@ -168,15 +179,6 @@ class LocalRepositoryTestCase(RepositoryTestCaseBase):
 
 
 class RepositoryCommitTestCase(RepositoryTestCaseBase):
 class RepositoryCommitTestCase(RepositoryTestCaseBase):
 
 
-    def add_keys(self):
-        self.repository.put(b'00000000000000000000000000000000', b'foo')
-        self.repository.put(b'00000000000000000000000000000001', b'bar')
-        self.repository.put(b'00000000000000000000000000000003', b'bar')
-        self.repository.commit()
-        self.repository.put(b'00000000000000000000000000000001', b'bar2')
-        self.repository.put(b'00000000000000000000000000000002', b'boo')
-        self.repository.delete(b'00000000000000000000000000000003')
-
     def test_replay_of_missing_index(self):
     def test_replay_of_missing_index(self):
         self.add_keys()
         self.add_keys()
         for name in os.listdir(self.repository.path):
         for name in os.listdir(self.repository.path):
@@ -274,6 +276,19 @@ class RepositoryAppendOnlyTestCase(RepositoryTestCaseBase):
         assert segments_in_repository() == 6
         assert segments_in_repository() == 6
 
 
 
 
+class RepositoryFreeSpaceTestCase(RepositoryTestCaseBase):
+    def test_additional_free_space(self):
+        self.add_keys()
+        self.repository.config.set('repository', 'additional_free_space', '1000T')
+        self.repository.save_key(b'shortcut to save_config')
+        self.reopen()
+
+        with self.repository:
+            self.repository.put(b'00000000000000000000000000000000', b'foobar')
+            with pytest.raises(Repository.InsufficientFreeSpaceError):
+                self.repository.commit()
+
+
 class RepositoryAuxiliaryCorruptionTestCase(RepositoryTestCaseBase):
 class RepositoryAuxiliaryCorruptionTestCase(RepositoryTestCaseBase):
     def setUp(self):
     def setUp(self):
         super().setUp()
         super().setUp()