Pārlūkot izejas kodu

Merge pull request #4324 from ThomasWaldmann/backports-15

1.1-maint backports (15)
TW 6 gadi atpakaļ
vecāks
revīzija
2f99fba20b
6 mainītis faili ar 74 papildinājumiem un 24 dzēšanām
  1. 9 0
      docs/faq.rst
  2. 17 14
      src/borg/archive.py
  3. 6 2
      src/borg/archiver.py
  4. 14 6
      src/borg/remote.py
  5. 13 2
      src/borg/repository.py
  6. 15 0
      src/borg/testsuite/archiver.py

+ 9 - 0
docs/faq.rst

@@ -289,6 +289,15 @@ the :ref:`borg_recreate` command to rewrite all archives with a
 different ``--exclude`` pattern. See the examples in the
 :ref:`borg_recreate` manpage for more information.
 
+Can I safely change the compression level or algorithm?
+--------------------------------------------------------
+
+The compression level and algorithm don't affect deduplication. Chunk ID hashes
+are calculated *before* compression. New compression settings
+will only be applied to new chunks, not existing chunks. So it's safe
+to change them.
+
+
 Security
 ########
 

+ 17 - 14
src/borg/archive.py

@@ -1391,7 +1391,7 @@ class ArchiveChecker:
                 if self.repair:
                     self.repository.put(id_, cdata)
 
-        def verify_file_chunks(item):
+        def verify_file_chunks(archive_name, item):
             """Verifies that all file chunks are present.
 
             Missing file chunks will be replaced with new chunks of the same length containing all zeros.
@@ -1412,7 +1412,7 @@ class ArchiveChecker:
             chunks_healthy = item.chunks_healthy if has_chunks_healthy else chunks_current
             if has_chunks_healthy and len(chunks_current) != len(chunks_healthy):
                 # should never happen, but there was issue #3218.
-                logger.warning('{}: Invalid chunks_healthy metadata removed!'.format(item.path))
+                logger.warning('{}: {}: Invalid chunks_healthy metadata removed!'.format(archive_name, item.path))
                 del item.chunks_healthy
                 has_chunks_healthy = False
                 chunks_healthy = chunks_current
@@ -1421,20 +1421,23 @@ class ArchiveChecker:
                 if chunk_id not in self.chunks:
                     # a chunk of the healthy list is missing
                     if chunk_current == chunk_healthy:
-                        logger.error('{}: New missing file chunk detected (Byte {}-{}). '
-                                     'Replacing with all-zero chunk.'.format(item.path, offset, offset + size))
+                        logger.error('{}: {}: New missing file chunk detected (Byte {}-{}). '
+                                     'Replacing with all-zero chunk.'.format(
+                                     archive_name, item.path, offset, offset + size))
                         self.error_found = chunks_replaced = True
                         chunk_id, size, csize, cdata = replacement_chunk(size)
                         add_reference(chunk_id, size, csize, cdata)
                     else:
-                        logger.info('{}: Previously missing file chunk is still missing (Byte {}-{}). It has a '
-                                    'all-zero replacement chunk already.'.format(item.path, offset, offset + size))
+                        logger.info('{}: {}: Previously missing file chunk is still missing (Byte {}-{}). It has a '
+                                    'all-zero replacement chunk already.'.format(
+                                    archive_name, item.path, offset, offset + size))
                         chunk_id, size, csize = chunk_current
                         if chunk_id in self.chunks:
                             add_reference(chunk_id, size, csize)
                         else:
-                            logger.warning('{}: Missing all-zero replacement chunk detected (Byte {}-{}). '
-                                           'Generating new replacement chunk.'.format(item.path, offset, offset + size))
+                            logger.warning('{}: {}: Missing all-zero replacement chunk detected (Byte {}-{}). '
+                                           'Generating new replacement chunk.'.format(
+                                           archive_name, item.path, offset, offset + size))
                             self.error_found = chunks_replaced = True
                             chunk_id, size, csize, cdata = replacement_chunk(size)
                             add_reference(chunk_id, size, csize, cdata)
@@ -1443,8 +1446,8 @@ class ArchiveChecker:
                         # normal case, all fine.
                         add_reference(chunk_id, size, csize)
                     else:
-                        logger.info('{}: Healed previously missing file chunk! '
-                                    '(Byte {}-{}).'.format(item.path, offset, offset + size))
+                        logger.info('{}: {}: Healed previously missing file chunk! '
+                                    '(Byte {}-{}).'.format(archive_name, item.path, offset, offset + size))
                         add_reference(chunk_id, size, csize)
                         mark_as_possibly_superseded(chunk_current[0])  # maybe orphaned the all-zero replacement chunk
                 chunk_list.append([chunk_id, size, csize])  # list-typed element as chunks_healthy is list-of-lists
@@ -1453,7 +1456,7 @@ class ArchiveChecker:
                 # if this is first repair, remember the correct chunk IDs, so we can maybe heal the file later
                 item.chunks_healthy = item.chunks
             if has_chunks_healthy and chunk_list == chunks_healthy:
-                logger.info('{}: Completely healed previously damaged file!'.format(item.path))
+                logger.info('{}: {}: Completely healed previously damaged file!'.format(archive_name, item.path))
                 del item.chunks_healthy
             item.chunks = chunk_list
             if 'size' in item:
@@ -1461,8 +1464,8 @@ class ArchiveChecker:
                 item_chunks_size = item.get_size(compressed=False, from_chunks=True)
                 if item_size != item_chunks_size:
                     # just warn, but keep the inconsistency, so that borg extract can warn about it.
-                    logger.warning('{}: size inconsistency detected: size {}, chunks size {}'.format(
-                                   item.path, item_size, item_chunks_size))
+                    logger.warning('{}: {}: size inconsistency detected: size {}, chunks size {}'.format(
+                                   archive_name, item.path, item_size, item_chunks_size))
 
         def robust_iterator(archive):
             """Iterates through all archive items
@@ -1573,7 +1576,7 @@ class ArchiveChecker:
                 items_buffer.write_chunk = add_callback
                 for item in robust_iterator(archive):
                     if 'chunks' in item:
-                        verify_file_chunks(item)
+                        verify_file_chunks(info.name, item)
                     items_buffer.add(item)
                 items_buffer.flush(flush=True)
                 for previous_item_id in archive.items:

+ 6 - 2
src/borg/archiver.py

@@ -125,15 +125,17 @@ def with_repository(fake=False, invert_fake=False, create=False, lock=True,
             location = args.location  # note: 'location' must be always present in args
             append_only = getattr(args, 'append_only', False)
             storage_quota = getattr(args, 'storage_quota', None)
+            make_parent_dirs = getattr(args, 'make_parent_dirs', False)
             if argument(args, fake) ^ invert_fake:
                 return method(self, args, repository=None, **kwargs)
             elif location.proto == 'ssh':
                 repository = RemoteRepository(location, create=create, exclusive=argument(args, exclusive),
-                                              lock_wait=self.lock_wait, lock=lock, append_only=append_only, args=args)
+                                              lock_wait=self.lock_wait, lock=lock, append_only=append_only,
+                                              make_parent_dirs=make_parent_dirs, args=args)
             else:
                 repository = Repository(location.path, create=create, exclusive=argument(args, exclusive),
                                         lock_wait=self.lock_wait, lock=lock, append_only=append_only,
-                                        storage_quota=storage_quota)
+                                        storage_quota=storage_quota, make_parent_dirs=make_parent_dirs)
             with repository:
                 if manifest or cache:
                     kwargs['manifest'], kwargs['key'] = Manifest.load(repository, compatibility)
@@ -2957,6 +2959,8 @@ class Archiver:
         subparser.add_argument('--storage-quota', metavar='QUOTA', dest='storage_quota', default=None,
                                type=parse_storage_quota,
                                help='Set storage quota of the new repository (e.g. 5G, 1.5T). Default: no quota.')
+        subparser.add_argument('--make-parent-dirs', dest='make_parent_dirs', action='store_true',
+                               help='create the parent directories of the repository directory, if they are missing.')
 
         check_epilog = process_epilog("""
         The check command verifies the consistency of a repository and the corresponding archives.

+ 14 - 6
src/borg/remote.py

@@ -341,7 +341,8 @@ class RepositoryServer:  # pragma: no cover
             path = path[3:]
         return os.path.realpath(path)
 
-    def open(self, path, create=False, lock_wait=None, lock=True, exclusive=None, append_only=False):
+    def open(self, path, create=False, lock_wait=None, lock=True, exclusive=None, append_only=False,
+             make_parent_dirs=False):
         logging.debug('Resolving repository path %r', path)
         path = self._resolve_path(path)
         logging.debug('Resolved repository path to %r', path)
@@ -370,7 +371,8 @@ class RepositoryServer:  # pragma: no cover
         self.repository = Repository(path, create, lock_wait=lock_wait, lock=lock,
                                      append_only=append_only,
                                      storage_quota=self.storage_quota,
-                                     exclusive=exclusive)
+                                     exclusive=exclusive,
+                                     make_parent_dirs=make_parent_dirs)
         self.repository.__enter__()  # clean exit handled by serve() method
         return self.repository.id
 
@@ -529,7 +531,8 @@ class RemoteRepository:
     # If compatibility with 1.0.x is not longer needed, replace all checks of this with True and simplify the code
     dictFormat = False  # outside of __init__ for testing of legacy free protocol
 
-    def __init__(self, location, create=False, exclusive=False, lock_wait=None, lock=True, append_only=False, args=None):
+    def __init__(self, location, create=False, exclusive=False, lock_wait=None, lock=True, append_only=False,
+                 make_parent_dirs=False, args=None):
         self.location = self._location = location
         self.preload_ids = []
         self.msgid = 0
@@ -582,7 +585,8 @@ class RemoteRepository:
 
             def do_open():
                 self.id = self.open(path=self.location.path, create=create, lock_wait=lock_wait,
-                                    lock=lock, exclusive=exclusive, append_only=append_only)
+                                    lock=lock, exclusive=exclusive, append_only=append_only,
+                                    make_parent_dirs=make_parent_dirs)
 
             if self.dictFormat:
                 do_open()
@@ -745,6 +749,8 @@ This problem will go away as soon as the server has been upgraded to 1.0.7+.
                     raise PathNotAllowed('(unknown)')
                 else:
                     raise PathNotAllowed(args[0].decode())
+            elif error == 'ParentPathDoesNotExist':
+                raise Repository.ParentPathDoesNotExist(args[0].decode())
             elif error == 'ObjectNotFound':
                 if old_server:
                     raise Repository.ObjectNotFound('(not available)', self.location.orig)
@@ -890,8 +896,10 @@ This problem will go away as soon as the server has been upgraded to 1.0.7+.
         self.ignore_responses |= set(waiting_for)  # we lose order here
 
     @api(since=parse_version('1.0.0'),
-         append_only={'since': parse_version('1.0.7'), 'previously': False})
-    def open(self, path, create=False, lock_wait=None, lock=True, exclusive=False, append_only=False):
+         append_only={'since': parse_version('1.0.7'), 'previously': False},
+         make_parent_dirs={'since': parse_version('1.1.9'), 'previously': False})
+    def open(self, path, create=False, lock_wait=None, lock=True, exclusive=False, append_only=False,
+             make_parent_dirs=False):
         """actual remoting is done via self.call in the @api decorator"""
 
     @api(since=parse_version('1.0.0'))

+ 13 - 2
src/borg/repository.py

@@ -119,6 +119,9 @@ class Repository:
     class PathAlreadyExists(Error):
         """There is already something at {}."""
 
+    class ParentPathDoesNotExist(Error):
+        """The parent path of the repo directory [{}] does not exist."""
+
     class InvalidRepository(Error):
         """{} is not a valid repository. Check repo config."""
 
@@ -146,7 +149,8 @@ class Repository:
         """The storage quota ({}) has been exceeded ({}). Try deleting some archives."""
 
     def __init__(self, path, create=False, exclusive=False, lock_wait=None, lock=True,
-                 append_only=False, storage_quota=None, check_segment_magic=True):
+                 append_only=False, storage_quota=None, check_segment_magic=True,
+                 make_parent_dirs=False):
         self.path = os.path.abspath(path)
         self._location = Location('file://%s' % self.path)
         self.io = None  # type: LoggedIO
@@ -167,6 +171,7 @@ class Repository:
         self.storage_quota_use = 0
         self.transaction_doomed = None
         self.check_segment_magic = check_segment_magic
+        self.make_parent_dirs = make_parent_dirs
 
     def __del__(self):
         if self.lock:
@@ -249,8 +254,14 @@ class Repository:
         """Create a new empty repository at `path`
         """
         self.check_can_create_repository(path)
+        if self.make_parent_dirs:
+            parent_path = os.path.join(path, os.pardir)
+            os.makedirs(parent_path, exist_ok=True)
         if not os.path.exists(path):
-            os.mkdir(path)
+            try:
+                os.mkdir(path)
+            except FileNotFoundError as err:
+                raise self.ParentPathDoesNotExist(path) from err
         with open(os.path.join(path, 'README'), 'w') as fd:
             fd.write(REPOSITORY_README)
         os.mkdir(os.path.join(path, 'data'))

+ 15 - 0
src/borg/testsuite/archiver.py

@@ -438,6 +438,17 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         # the interesting parts of info_output2 and info_output should be same
         self.assert_equal(filter(info_output), filter(info_output2))
 
+    def test_init_parent_dirs(self):
+        parent_path = os.path.join(self.tmpdir, 'parent1', 'parent2')
+        repository_path = os.path.join(parent_path, 'repository')
+        repository_location = self.prefix + repository_path
+        with pytest.raises(Repository.ParentPathDoesNotExist):
+            # normal borg init does NOT create missing parent dirs
+            self.cmd('init', '--encryption=none', repository_location)
+        # but if told so, it does:
+        self.cmd('init', '--encryption=none', '--make-parent-dirs', repository_location)
+        assert os.path.exists(parent_path)
+
     def test_unix_socket(self):
         self.cmd('init', '--encryption=repokey', self.repository_location)
         try:
@@ -2904,6 +2915,10 @@ class ArchiverTestCaseBinary(ArchiverTestCase):
     EXE = 'borg.exe'
     FORK_DEFAULT = True
 
+    @unittest.skip('does not raise Exception, but sets rc==2')
+    def test_init_parent_dirs(self):
+        pass
+
     @unittest.skip('patches objects')
     def test_init_interrupt(self):
         pass