Browse Source

add --consider-checkpoint-files option, update FAQ

Thomas Waldmann 9 years ago
parent
commit
e5bd6cef20
3 changed files with 31 additions and 24 deletions
  1. 11 17
      docs/faq.rst
  2. 7 2
      src/borg/archive.py
  3. 13 5
      src/borg/archiver.py

+ 11 - 17
docs/faq.rst

@@ -246,27 +246,21 @@ Once your backup has finished successfully, you can delete all
 ``<archive-name>.checkpoint`` archives. If you run ``borg prune``, it will
 also care for deleting unneeded checkpoints.
 
+Note: the checkpointing mechanism creates hidden, partial files in an archive,
+so that checkpoints even work while a big file is being processed.
+They are named ``<filename>.checkpoint_<N>`` and all operations usually ignore
+these files, but you can make them considered by giving the option
+``--consider-checkpoint-files``. You usually only need that option if you are
+really desperate (e.g. if you have no completed backup of that file and you'ld
+rather get a partial file extracted than nothing). You do **not** want to give
+that option under any normal circumstances.
+
 How can I backup huge file(s) over a unstable connection?
 ---------------------------------------------------------
 
-You can use this "split trick" as a workaround for the in-between-files-only
-checkpoints (see above), huge files and a instable connection to the repository:
-
-Split the huge file(s) into parts of manageable size (e.g. 100MB) and create
-a temporary archive of them. Borg will create checkpoints now more frequently
-than if you try to backup the files in their original form (e.g. 100GB).
-
-After that, you can remove the parts again and backup the huge file(s) in
-their original form. This will now work a lot faster as a lot of content chunks
-are already in the repository.
-
-After you have successfully backed up the huge original file(s), you can remove
-the temporary archive you made from the parts.
-
-We realize that this is just a better-than-nothing workaround, see :issue:`1198`
-for a potential solution.
+This is not a problem any more, see previous FAQ item.
 
-Please note that this workaround only helps you for backup, not for restore.
+But please note that this only helps you for backup, not for restore.
 
 If it crashes with a UnicodeError, what can I do?
 -------------------------------------------------

+ 7 - 2
src/borg/archive.py

@@ -231,7 +231,8 @@ class Archive:
 
     def __init__(self, repository, key, manifest, name, cache=None, create=False,
                  checkpoint_interval=300, numeric_owner=False, progress=False,
-                 chunker_params=CHUNKER_PARAMS, start=None, end=None, compression=None, compression_files=None):
+                 chunker_params=CHUNKER_PARAMS, start=None, end=None, compression=None, compression_files=None,
+                 consider_checkpoint_files=False):
         self.cwd = os.getcwd()
         self.key = key
         self.repository = repository
@@ -250,6 +251,7 @@ class Archive:
         if end is None:
             end = datetime.utcnow()
         self.end = end
+        self.consider_checkpoint_files = consider_checkpoint_files
         self.pipeline = DownloadPipeline(self.repository, self.key)
         if create:
             self.items_buffer = CacheChunkBuffer(self.cache, self.key, self.stats)
@@ -328,7 +330,10 @@ Number of files: {0.stats.nfiles}'''.format(
         return 'Archive(%r)' % self.name
 
     def item_filter(self, item, filter=None):
-        return 'checkpoint' not in item and (filter(item) if filter else True)
+        if not self.consider_checkpoint_files and 'checkpoint' in item:
+            # this is a checkpoint (partial) file, we usually don't want to consider it.
+            return False
+        return filter(item) if filter else True
 
     def iter_items(self, filter=None, preload=False):
         for item in self.pipeline.unpack_many(self.metadata[b'items'], preload=preload,

+ 13 - 5
src/borg/archiver.py

@@ -100,7 +100,8 @@ def with_archive(method):
     @functools.wraps(method)
     def wrapper(self, args, repository, key, manifest, **kwargs):
         archive = Archive(repository, key, manifest, args.location.archive,
-                          numeric_owner=getattr(args, 'numeric_owner', False), cache=kwargs.get('cache'))
+                          numeric_owner=getattr(args, 'numeric_owner', False), cache=kwargs.get('cache'),
+                          consider_checkpoint_files=args.consider_checkpoint_files)
         return method(self, args, repository=repository, manifest=manifest, key=key, archive=archive, **kwargs)
     return wrapper
 
@@ -668,7 +669,8 @@ class Archiver:
                 print_output(line)
 
         archive1 = archive
-        archive2 = Archive(repository, key, manifest, args.archive2)
+        archive2 = Archive(repository, key, manifest, args.archive2,
+                           consider_checkpoint_files=args.consider_checkpoint_files)
 
         can_compare_chunk_ids = archive1.metadata.get(b'chunker_params', False) == archive2.metadata.get(
             b'chunker_params', True) or args.same_chunker_params
@@ -753,7 +755,8 @@ class Archiver:
 
         with cache_if_remote(repository) as cached_repo:
             if args.location.archive:
-                archive = Archive(repository, key, manifest, args.location.archive)
+                archive = Archive(repository, key, manifest, args.location.archive,
+                                  consider_checkpoint_files=args.consider_checkpoint_files)
             else:
                 archive = None
             operations = FuseOperations(key, repository, manifest, archive, cached_repo)
@@ -779,7 +782,8 @@ class Archiver:
         if args.location.archive:
             matcher, _ = self.build_matcher(args.excludes, args.paths)
             with Cache(repository, key, manifest, lock_wait=self.lock_wait) as cache:
-                archive = Archive(repository, key, manifest, args.location.archive, cache=cache)
+                archive = Archive(repository, key, manifest, args.location.archive, cache=cache,
+                                  consider_checkpoint_files=args.consider_checkpoint_files)
 
                 if args.format:
                     format = args.format
@@ -981,7 +985,8 @@ class Archiver:
     @with_repository()
     def do_debug_dump_archive_items(self, args, repository, manifest, key):
         """dump (decrypted, decompressed) archive items metadata (not: data)"""
-        archive = Archive(repository, key, manifest, args.location.archive)
+        archive = Archive(repository, key, manifest, args.location.archive,
+                          consider_checkpoint_files=args.consider_checkpoint_files)
         for i, item_id in enumerate(archive.metadata[b'items']):
             _, data = key.decrypt(item_id, repository.get(item_id))
             filename = '%06d_%s.items' % (i, bin_to_hex(item_id))
@@ -1232,6 +1237,9 @@ class Archiver:
                                   help='set umask to M (local and remote, default: %(default)04o)')
         common_group.add_argument('--remote-path', dest='remote_path', metavar='PATH',
                                   help='set remote path to executable (default: "borg")')
+        common_group.add_argument('--consider-checkpoint-files', dest='consider_checkpoint_files',
+                                  action='store_true', default=False,
+                                  help='treat checkpoint files like normal files (e.g. to list/extract them)')
 
         parser = argparse.ArgumentParser(prog=prog, description='Borg - Deduplicated Backups')
         parser.add_argument('-V', '--version', action='version', version='%(prog)s ' + __version__,