2
0
Эх сурвалжийг харах

Merge pull request #428 from ThomasWaldmann/purge-using-least-space

compact_segments: save_space -> free unused segments quickly
TW 9 жил өмнө
parent
commit
3f1e354b0a

+ 4 - 4
borg/archive.py

@@ -661,7 +661,7 @@ class ArchiveChecker:
         self.error_found = False
         self.error_found = False
         self.possibly_superseded = set()
         self.possibly_superseded = set()
 
 
-    def check(self, repository, repair=False, archive=None, last=None):
+    def check(self, repository, repair=False, archive=None, last=None, save_space=False):
         logger.info('Starting archive consistency check...')
         logger.info('Starting archive consistency check...')
         self.check_all = archive is None and last is None
         self.check_all = archive is None and last is None
         self.repair = repair
         self.repair = repair
@@ -676,7 +676,7 @@ class ArchiveChecker:
             self.manifest, _ = Manifest.load(repository, key=self.key)
             self.manifest, _ = Manifest.load(repository, key=self.key)
         self.rebuild_refcounts(archive=archive, last=last)
         self.rebuild_refcounts(archive=archive, last=last)
         self.orphan_chunks_check()
         self.orphan_chunks_check()
-        self.finish()
+        self.finish(save_space=save_space)
         if self.error_found:
         if self.error_found:
             logger.error('Archive consistency check complete, problems found.')
             logger.error('Archive consistency check complete, problems found.')
         else:
         else:
@@ -885,7 +885,7 @@ class ArchiveChecker:
         else:
         else:
             logger.warning('Orphaned objects check skipped (needs all archives checked).')
             logger.warning('Orphaned objects check skipped (needs all archives checked).')
 
 
-    def finish(self):
+    def finish(self, save_space=False):
         if self.repair:
         if self.repair:
             self.manifest.write()
             self.manifest.write()
-            self.repository.commit()
+            self.repository.commit(save_space=save_space)

+ 14 - 4
borg/archiver.py

@@ -105,10 +105,11 @@ class Archiver:
                        env_var_override='BORG_CHECK_I_KNOW_WHAT_I_AM_DOING', truish=('YES', )):
                        env_var_override='BORG_CHECK_I_KNOW_WHAT_I_AM_DOING', truish=('YES', )):
                 return EXIT_ERROR
                 return EXIT_ERROR
         if not args.archives_only:
         if not args.archives_only:
-            if not repository.check(repair=args.repair):
+            if not repository.check(repair=args.repair, save_space=args.save_space):
                 return EXIT_WARNING
                 return EXIT_WARNING
         if not args.repo_only and not ArchiveChecker().check(
         if not args.repo_only and not ArchiveChecker().check(
-                repository, repair=args.repair, archive=args.repository.archive, last=args.last):
+                repository, repair=args.repair, archive=args.repository.archive,
+                last=args.last, save_space=args.save_space):
             return EXIT_WARNING
             return EXIT_WARNING
         return EXIT_SUCCESS
         return EXIT_SUCCESS
 
 
@@ -332,7 +333,7 @@ class Archiver:
             stats = Statistics()
             stats = Statistics()
             archive.delete(stats)
             archive.delete(stats)
             manifest.write()
             manifest.write()
-            repository.commit()
+            repository.commit(save_space=args.save_space)
             cache.commit()
             cache.commit()
             if args.stats:
             if args.stats:
                 logger.info(stats.summary.format(label='Deleted data:', stats=stats))
                 logger.info(stats.summary.format(label='Deleted data:', stats=stats))
@@ -487,7 +488,7 @@ class Archiver:
                 Archive(repository, key, manifest, archive.name, cache).delete(stats)
                 Archive(repository, key, manifest, archive.name, cache).delete(stats)
         if to_delete and not args.dry_run:
         if to_delete and not args.dry_run:
             manifest.write()
             manifest.write()
-            repository.commit()
+            repository.commit(save_space=args.save_space)
             cache.commit()
             cache.commit()
         if args.stats:
         if args.stats:
             logger.info(stats.summary.format(label='Deleted data:', stats=stats))
             logger.info(stats.summary.format(label='Deleted data:', stats=stats))
@@ -762,6 +763,9 @@ class Archiver:
         subparser.add_argument('--repair', dest='repair', action='store_true',
         subparser.add_argument('--repair', dest='repair', action='store_true',
                                default=False,
                                default=False,
                                help='attempt to repair any inconsistencies found')
                                help='attempt to repair any inconsistencies found')
+        subparser.add_argument('--save-space', dest='save_space', action='store_true',
+                               default=False,
+                               help='work slower, but using less space')
         subparser.add_argument('--last', dest='last',
         subparser.add_argument('--last', dest='last',
                                type=int, default=None, metavar='N',
                                type=int, default=None, metavar='N',
                                help='only check last N archives (Default: all)')
                                help='only check last N archives (Default: all)')
@@ -926,6 +930,9 @@ class Archiver:
         subparser.add_argument('-c', '--cache-only', dest='cache_only',
         subparser.add_argument('-c', '--cache-only', dest='cache_only',
                                action='store_true', default=False,
                                action='store_true', default=False,
                                help='delete only the local cache for the given repository')
                                help='delete only the local cache for the given repository')
+        subparser.add_argument('--save-space', dest='save_space', action='store_true',
+                               default=False,
+                               help='work slower, but using less space')
         subparser.add_argument('target', metavar='TARGET', nargs='?', default='',
         subparser.add_argument('target', metavar='TARGET', nargs='?', default='',
                                type=location_validator(),
                                type=location_validator(),
                                help='archive or repository to delete')
                                help='archive or repository to delete')
@@ -1043,6 +1050,9 @@ class Archiver:
                                help='number of yearly archives to keep')
                                help='number of yearly archives to keep')
         subparser.add_argument('-p', '--prefix', dest='prefix', type=str,
         subparser.add_argument('-p', '--prefix', dest='prefix', type=str,
                                help='only consider archive names starting with this prefix')
                                help='only consider archive names starting with this prefix')
+        subparser.add_argument('--save-space', dest='save_space', action='store_true',
+                               default=False,
+                               help='work slower, but using less space')
         subparser.add_argument('repository', metavar='REPOSITORY', nargs='?', default='',
         subparser.add_argument('repository', metavar='REPOSITORY', nargs='?', default='',
                                type=location_validator(archive=False),
                                type=location_validator(archive=False),
                                help='repository to prune')
                                help='repository to prune')

+ 4 - 4
borg/remote.py

@@ -273,11 +273,11 @@ class RemoteRepository:
                     w_fds = []
                     w_fds = []
         self.ignore_responses |= set(waiting_for)
         self.ignore_responses |= set(waiting_for)
 
 
-    def check(self, repair=False):
-        return self.call('check', repair)
+    def check(self, repair=False, save_space=False):
+        return self.call('check', repair, save_space)
 
 
-    def commit(self, *args):
-        return self.call('commit')
+    def commit(self, save_space=False):
+        return self.call('commit', save_space)
 
 
     def rollback(self, *args):
     def rollback(self, *args):
         return self.call('rollback')
         return self.call('rollback')

+ 41 - 17
borg/repository.py

@@ -158,11 +158,11 @@ class Repository:
             self.lock.release()
             self.lock.release()
             self.lock = None
             self.lock = None
 
 
-    def commit(self):
+    def commit(self, save_space=False):
         """Commit transaction
         """Commit transaction
         """
         """
         self.io.write_commit()
         self.io.write_commit()
-        self.compact_segments()
+        self.compact_segments(save_space=save_space)
         self.write_index()
         self.write_index()
         self.rollback()
         self.rollback()
 
 
@@ -220,31 +220,50 @@ class Repository:
             os.unlink(os.path.join(self.path, name))
             os.unlink(os.path.join(self.path, name))
         self.index = None
         self.index = None
 
 
-    def compact_segments(self):
+    def compact_segments(self, save_space=False):
         """Compact sparse segments by copying data into new segments
         """Compact sparse segments by copying data into new segments
         """
         """
         if not self.compact:
         if not self.compact:
             return
             return
         index_transaction_id = self.get_index_transaction_id()
         index_transaction_id = self.get_index_transaction_id()
         segments = self.segments
         segments = self.segments
+        unused = []  # list of segments, that are not used anymore
+
+        def complete_xfer():
+            # complete the transfer (usually exactly when some target segment
+            # is full, or at the very end when everything is processed)
+            nonlocal unused
+            # commit the new, compact, used segments
+            self.io.write_commit()
+            # get rid of the old, sparse, unused segments. free space.
+            for segment in unused:
+                assert self.segments.pop(segment) == 0
+                self.io.delete_segment(segment)
+            unused = []
+
         for segment in sorted(self.compact):
         for segment in sorted(self.compact):
             if self.io.segment_exists(segment):
             if self.io.segment_exists(segment):
                 for tag, key, offset, data in self.io.iter_objects(segment, include_data=True):
                 for tag, key, offset, data in self.io.iter_objects(segment, include_data=True):
                     if tag == TAG_PUT and self.index.get(key, (-1, -1)) == (segment, offset):
                     if tag == TAG_PUT and self.index.get(key, (-1, -1)) == (segment, offset):
-                        new_segment, offset = self.io.write_put(key, data)
+                        try:
+                            new_segment, offset = self.io.write_put(key, data, raise_full=save_space)
+                        except LoggedIO.SegmentFull:
+                            complete_xfer()
+                            new_segment, offset = self.io.write_put(key, data)
                         self.index[key] = new_segment, offset
                         self.index[key] = new_segment, offset
                         segments.setdefault(new_segment, 0)
                         segments.setdefault(new_segment, 0)
                         segments[new_segment] += 1
                         segments[new_segment] += 1
                         segments[segment] -= 1
                         segments[segment] -= 1
                     elif tag == TAG_DELETE:
                     elif tag == TAG_DELETE:
                         if index_transaction_id is None or segment > index_transaction_id:
                         if index_transaction_id is None or segment > index_transaction_id:
-                            self.io.write_delete(key)
+                            try:
+                                self.io.write_delete(key, raise_full=save_space)
+                            except LoggedIO.SegmentFull:
+                                complete_xfer()
+                                self.io.write_delete(key)
                 assert segments[segment] == 0
                 assert segments[segment] == 0
-
-        self.io.write_commit()
-        for segment in sorted(self.compact):
-            assert self.segments.pop(segment) == 0
-            self.io.delete_segment(segment)
+                unused.append(segment)
+        complete_xfer()
         self.compact = set()
         self.compact = set()
 
 
     def replay_segments(self, index_transaction_id, segments_transaction_id):
     def replay_segments(self, index_transaction_id, segments_transaction_id):
@@ -297,7 +316,7 @@ class Repository:
         if self.segments[segment] == 0:
         if self.segments[segment] == 0:
             self.compact.add(segment)
             self.compact.add(segment)
 
 
-    def check(self, repair=False):
+    def check(self, repair=False, save_space=False):
         """Check repository consistency
         """Check repository consistency
 
 
         This method verifies all segment checksums and makes sure
         This method verifies all segment checksums and makes sure
@@ -358,7 +377,7 @@ class Repository:
                     if current_index.get(key, (-1, -1)) != value:
                     if current_index.get(key, (-1, -1)) != value:
                         report_error('Index mismatch for key {}. {} != {}'.format(key, value, current_index.get(key, (-1, -1))))
                         report_error('Index mismatch for key {}. {} != {}'.format(key, value, current_index.get(key, (-1, -1))))
         if repair:
         if repair:
-            self.compact_segments()
+            self.compact_segments(save_space=save_space)
             self.write_index()
             self.write_index()
         self.rollback()
         self.rollback()
         if error_found:
         if error_found:
@@ -441,6 +460,9 @@ class Repository:
 
 
 class LoggedIO:
 class LoggedIO:
 
 
+    class SegmentFull(Exception):
+        """raised when a segment is full, before opening next"""
+
     header_fmt = struct.Struct('<IIB')
     header_fmt = struct.Struct('<IIB')
     assert header_fmt.size == 9
     assert header_fmt.size == 9
     put_header_fmt = struct.Struct('<IIB32s')
     put_header_fmt = struct.Struct('<IIB32s')
@@ -517,8 +539,10 @@ class LoggedIO:
     def segment_filename(self, segment):
     def segment_filename(self, segment):
         return os.path.join(self.path, 'data', str(segment // self.segments_per_dir), str(segment))
         return os.path.join(self.path, 'data', str(segment // self.segments_per_dir), str(segment))
 
 
-    def get_write_fd(self, no_new=False):
+    def get_write_fd(self, no_new=False, raise_full=False):
         if not no_new and self.offset and self.offset > self.limit:
         if not no_new and self.offset and self.offset > self.limit:
+            if raise_full:
+                raise self.SegmentFull
             self.close_segment()
             self.close_segment()
         if not self._write_fd:
         if not self._write_fd:
             if self.segment % self.segments_per_dir == 0:
             if self.segment % self.segments_per_dir == 0:
@@ -630,9 +654,9 @@ class LoggedIO:
             key, data = data[:32], data[32:]
             key, data = data[:32], data[32:]
         return size, tag, key, data
         return size, tag, key, data
 
 
-    def write_put(self, id, data):
+    def write_put(self, id, data, raise_full=False):
+        fd = self.get_write_fd(raise_full=raise_full)
         size = len(data) + self.put_header_fmt.size
         size = len(data) + self.put_header_fmt.size
-        fd = self.get_write_fd()
         offset = self.offset
         offset = self.offset
         header = self.header_no_crc_fmt.pack(size, TAG_PUT)
         header = self.header_no_crc_fmt.pack(size, TAG_PUT)
         crc = self.crc_fmt.pack(crc32(data, crc32(id, crc32(header))) & 0xffffffff)
         crc = self.crc_fmt.pack(crc32(data, crc32(id, crc32(header))) & 0xffffffff)
@@ -640,8 +664,8 @@ class LoggedIO:
         self.offset += size
         self.offset += size
         return self.segment, offset
         return self.segment, offset
 
 
-    def write_delete(self, id):
-        fd = self.get_write_fd()
+    def write_delete(self, id, raise_full=False):
+        fd = self.get_write_fd(raise_full=raise_full)
         header = self.header_no_crc_fmt.pack(self.put_header_fmt.size, TAG_DELETE)
         header = self.header_no_crc_fmt.pack(self.put_header_fmt.size, TAG_DELETE)
         crc = self.crc_fmt.pack(crc32(id, crc32(header)) & 0xffffffff)
         crc = self.crc_fmt.pack(crc32(id, crc32(header)) & 0xffffffff)
         fd.write(b''.join((crc, header, id)))
         fd.write(b''.join((crc, header, id)))

+ 15 - 0
borg/testsuite/archiver.py

@@ -771,6 +771,21 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         self.assert_not_in('test1', output)
         self.assert_not_in('test1', output)
         self.assert_in('test2', output)
         self.assert_in('test2', output)
 
 
+    def test_prune_repository_save_space(self):
+        self.cmd('init', self.repository_location)
+        self.cmd('create', self.repository_location + '::test1', src_dir)
+        self.cmd('create', self.repository_location + '::test2', src_dir)
+        output = self.cmd('prune', '-v', '--dry-run', self.repository_location, '--keep-daily=2')
+        self.assert_in('Keeping archive: test2', output)
+        self.assert_in('Would prune:     test1', output)
+        output = self.cmd('list', self.repository_location)
+        self.assert_in('test1', output)
+        self.assert_in('test2', output)
+        self.cmd('prune', '--save-space', self.repository_location, '--keep-daily=2')
+        output = self.cmd('list', self.repository_location)
+        self.assert_not_in('test1', output)
+        self.assert_in('test2', output)
+
     def test_prune_repository_prefix(self):
     def test_prune_repository_prefix(self):
         self.cmd('init', self.repository_location)
         self.cmd('init', self.repository_location)
         self.cmd('create', self.repository_location + '::foo-2015-08-12-10:00', src_dir)
         self.cmd('create', self.repository_location + '::foo-2015-08-12-10:00', src_dir)

+ 1 - 1
borg/testsuite/repository.py

@@ -311,7 +311,7 @@ class RepositoryCheckTestCase(RepositoryTestCaseBase):
         # Simulate a crash before compact
         # Simulate a crash before compact
         with patch.object(Repository, 'compact_segments') as compact:
         with patch.object(Repository, 'compact_segments') as compact:
             self.repository.commit()
             self.repository.commit()
-            compact.assert_called_once_with()
+            compact.assert_called_once_with(save_space=False)
         self.reopen()
         self.reopen()
         self.check(repair=True)
         self.check(repair=True)
         self.assert_equal(self.repository.get(bytes(32)), b'data2')
         self.assert_equal(self.repository.get(bytes(32)), b'data2')

+ 2 - 1
docs/quickstart.rst

@@ -17,7 +17,8 @@ a good amount of free space on the filesystem that has your backup repository
 
 
 If you run out of disk space, it can be hard or impossible to free space,
 If you run out of disk space, it can be hard or impossible to free space,
 because |project_name| needs free space to operate - even to delete backup
 because |project_name| needs free space to operate - even to delete backup
-archives.
+archives. There is a `--save-space` option for some commands, but even with
+that |project_name| will need free space to operate.
 
 
 You can use some monitoring process or just include the free space information
 You can use some monitoring process or just include the free space information
 in your backup log files (you check them regularly anyway, right?).
 in your backup log files (you check them regularly anyway, right?).