Răsfoiți Sursa

Merge pull request #1008 from ThomasWaldmann/cleanup-checkpoints

prune: kill all checkpoints except the latest, fixes #998
TW 9 ani în urmă
părinte
comite
37de94e532
2 a modificat fișierele cu 39 adăugiri și 7 ștergeri
  1. 23 7
      borg/archiver.py
  2. 16 0
      borg/testsuite/archiver.py

+ 23 - 7
borg/archiver.py

@@ -783,13 +783,21 @@ class Archiver:
                              '"keep-secondly", "keep-minutely", "keep-hourly", "keep-daily", '
                              '"keep-secondly", "keep-minutely", "keep-hourly", "keep-daily", '
                              '"keep-weekly", "keep-monthly" or "keep-yearly" settings must be specified.')
                              '"keep-weekly", "keep-monthly" or "keep-yearly" settings must be specified.')
             return self.exit_code
             return self.exit_code
-        archives = manifest.list_archive_infos(sort_by='ts', reverse=True)  # just a ArchiveInfo list
+        archives_checkpoints = manifest.list_archive_infos(sort_by='ts', reverse=True)  # just a ArchiveInfo list
         if args.prefix:
         if args.prefix:
-            archives = [archive for archive in archives if archive.name.startswith(args.prefix)]
+            archives_checkpoints = [arch for arch in archives_checkpoints if arch.name.startswith(args.prefix)]
+        is_checkpoint = re.compile(r'\.checkpoint(\.\d+)?$').search
+        checkpoints = [arch for arch in archives_checkpoints if is_checkpoint(arch.name)]
+        # keep the latest checkpoint, if there is no later non-checkpoint archive
+        latest_checkpoint = checkpoints[0] if checkpoints else None
+        if archives_checkpoints[0] is latest_checkpoint:
+            keep_checkpoints = [latest_checkpoint, ]
+        else:
+            keep_checkpoints = []
+        checkpoints = set(checkpoints)
         # ignore all checkpoint archives to avoid keeping one (which is an incomplete backup)
         # ignore all checkpoint archives to avoid keeping one (which is an incomplete backup)
         # that is newer than a successfully completed backup - and killing the successful backup.
         # that is newer than a successfully completed backup - and killing the successful backup.
-        is_checkpoint = re.compile(r'\.checkpoint(\.\d+)?$').search
-        archives = [archive for archive in archives if not is_checkpoint(archive.name)]
+        archives = [arch for arch in archives_checkpoints if arch not in checkpoints]
         keep = []
         keep = []
         if args.within:
         if args.within:
             keep += prune_within(archives, args.within)
             keep += prune_within(archives, args.within)
@@ -807,11 +815,10 @@ class Archiver:
             keep += prune_split(archives, '%Y-%m', args.monthly, keep)
             keep += prune_split(archives, '%Y-%m', args.monthly, keep)
         if args.yearly:
         if args.yearly:
             keep += prune_split(archives, '%Y', args.yearly, keep)
             keep += prune_split(archives, '%Y', args.yearly, keep)
-
-        to_delete = set(archives) - set(keep)
+        to_delete = (set(archives) | checkpoints) - (set(keep) | set(keep_checkpoints))
         stats = Statistics()
         stats = Statistics()
         with Cache(repository, key, manifest, do_files=args.cache_files, lock_wait=self.lock_wait) as cache:
         with Cache(repository, key, manifest, do_files=args.cache_files, lock_wait=self.lock_wait) as cache:
-            for archive in archives:
+            for archive in archives_checkpoints:
                 if archive in to_delete:
                 if archive in to_delete:
                     if args.dry_run:
                     if args.dry_run:
                         if args.output_list:
                         if args.output_list:
@@ -1628,11 +1635,20 @@ class Archiver:
         any of the specified retention options. This command is normally used by
         any of the specified retention options. This command is normally used by
         automated backup scripts wanting to keep a certain number of historic backups.
         automated backup scripts wanting to keep a certain number of historic backups.
 
 
+        Also, prune automatically removes checkpoint archives (incomplete archives left
+        behind by interrupted backup runs) except if the checkpoint is the latest
+        archive (and thus still needed). Checkpoint archives are not considered when
+        comparing archive counts against the retention limits (--keep-*).
+
         If a prefix is set with -P, then only archives that start with the prefix are
         If a prefix is set with -P, then only archives that start with the prefix are
         considered for deletion and only those archives count towards the totals
         considered for deletion and only those archives count towards the totals
         specified by the rules.
         specified by the rules.
         Otherwise, *all* archives in the repository are candidates for deletion!
         Otherwise, *all* archives in the repository are candidates for deletion!
 
 
+        If you have multiple sequences of archives with different data sets (e.g.
+        from different machines) in one shared repository, use one prune call per
+        data set that matches only the respective archives using the -P option.
+
         The "--keep-within" option takes an argument of the form "<int><char>",
         The "--keep-within" option takes an argument of the form "<int><char>",
         where char is "H", "d", "w", "m", "y". For example, "--keep-within 2d" means
         where char is "H", "d", "w", "m", "y". For example, "--keep-within 2d" means
         to keep all archives that were created within the past 48 hours.
         to keep all archives that were created within the past 48 hours.

+ 16 - 0
borg/testsuite/archiver.py

@@ -991,20 +991,36 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         # these are not really a checkpoints, but they look like some:
         # these are not really a checkpoints, but they look like some:
         self.cmd('create', self.repository_location + '::test3.checkpoint', src_dir)
         self.cmd('create', self.repository_location + '::test3.checkpoint', src_dir)
         self.cmd('create', self.repository_location + '::test3.checkpoint.1', src_dir)
         self.cmd('create', self.repository_location + '::test3.checkpoint.1', src_dir)
+        self.cmd('create', self.repository_location + '::test4.checkpoint', src_dir)
         output = self.cmd('prune', '-v', '--list', '--dry-run', self.repository_location, '--keep-daily=2')
         output = self.cmd('prune', '-v', '--list', '--dry-run', self.repository_location, '--keep-daily=2')
         self.assert_in('Would prune:     test1', output)
         self.assert_in('Would prune:     test1', output)
         # must keep the latest non-checkpoint archive:
         # must keep the latest non-checkpoint archive:
         self.assert_in('Keeping archive: test2', output)
         self.assert_in('Keeping archive: test2', output)
+        # must keep the latest checkpoint archive:
+        self.assert_in('Keeping archive: test4.checkpoint', output)
         output = self.cmd('list', self.repository_location)
         output = self.cmd('list', self.repository_location)
         self.assert_in('test1', output)
         self.assert_in('test1', output)
         self.assert_in('test2', output)
         self.assert_in('test2', output)
         self.assert_in('test3.checkpoint', output)
         self.assert_in('test3.checkpoint', output)
         self.assert_in('test3.checkpoint.1', output)
         self.assert_in('test3.checkpoint.1', output)
+        self.assert_in('test4.checkpoint', output)
         self.cmd('prune', self.repository_location, '--keep-daily=2')
         self.cmd('prune', self.repository_location, '--keep-daily=2')
         output = self.cmd('list', self.repository_location)
         output = self.cmd('list', self.repository_location)
         self.assert_not_in('test1', output)
         self.assert_not_in('test1', output)
         # the latest non-checkpoint archive must be still there:
         # the latest non-checkpoint archive must be still there:
         self.assert_in('test2', output)
         self.assert_in('test2', output)
+        # only the latest checkpoint archive must still be there:
+        self.assert_not_in('test3.checkpoint', output)
+        self.assert_not_in('test3.checkpoint.1', output)
+        self.assert_in('test4.checkpoint', output)
+        # now we supercede the latest checkpoint by a successful backup:
+        self.cmd('create', self.repository_location + '::test5', src_dir)
+        self.cmd('prune', self.repository_location, '--keep-daily=2')
+        output = self.cmd('list', self.repository_location)
+        # all checkpoints should be gone now:
+        self.assert_not_in('checkpoint', output)
+        # the latest archive must be still there
+        self.assert_in('test5', output)
 
 
     def test_prune_repository_save_space(self):
     def test_prune_repository_save_space(self):
         self.cmd('init', self.repository_location)
         self.cmd('init', self.repository_location)