Browse Source

do not upgrade repositories in place by default

instead, we perform the equivalent of `cp -al` on the repository to
keep a backup, and then rewrite the files, breaking the hardlinks as
necessary.

it has to be confirmed that the rest of Borg will also break hardlinks
when operating on files in the repository. if Borg operates in place
on any files of the repository, it could jeoperdize the backup, so
this needs to be verified. I believe that most files are written to a
temporary file and moved into place, however, so the backup should be
safe.

the rationale behind the backup copy is that we want to be extra
careful with user's data by default. the old behavior is retained
through the `--inplace`/`-i` commandline flag. plus, this way we don't
need to tell users to go through extra steps (`cp -a`, in particular)
before running the command.

also, it can take a long time to do the copy of the attic repository
we wish to work on. since `cp -a` doesn't provide progress
information, the new default behavior provides a nicer user experience
of giving an overall impression of the upgrade progress, while
retaining compatibility with Attic by default (in a separate
repository, of course).

this makes the upgrade command much less scary to use and hopefully
will convert drones to the borg collective.

the only place where the default inplace behavior is retained is in
the header_replace() function, to avoid breaking the cache conversion
code and to keep API stability and semantic coherence ("replace" by
defaults means in place).
Antoine Beaupré 9 years ago
parent
commit
6d457aed57
3 changed files with 84 additions and 26 deletions
  1. 19 14
      borg/archiver.py
  2. 39 4
      borg/testsuite/upgrader.py
  3. 26 8
      borg/upgrader.py

+ 19 - 14
borg/archiver.py

@@ -472,7 +472,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
         # XXX: should auto-detect if it is an attic repository here
         repo = AtticRepositoryUpgrader(args.repository.path, create=False)
         try:
-            repo.upgrade(args.dry_run)
+            repo.upgrade(args.dry_run, inplace=args.inplace)
         except NotImplementedError as e:
             print("warning: %s" % e)
         return self.exit_code
@@ -894,7 +894,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
                                help='repository to prune')
 
         upgrade_epilog = textwrap.dedent("""
-        upgrade an existing Borg repository in place. this currently
+        upgrade an existing Borg repository. this currently
         only support converting an Attic repository, but may
         eventually be extended to cover major Borg upgrades as well.
 
@@ -909,13 +909,6 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
         the first backup after the conversion takes longer than expected
         due to the cache resync.
 
-        it is recommended you run this on a copy of the Attic
-        repository, in case something goes wrong, for example:
-
-            cp -a attic borg
-            borg upgrade -n borg
-            borg upgrade borg
-
         upgrade should be able to resume if interrupted, although it
         will still iterate over all segments. if you want to start
         from scratch, use `borg delete` over the copied repository to
@@ -923,11 +916,19 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
 
             borg delete borg
 
-        the conversion can PERMANENTLY DAMAGE YOUR REPOSITORY! Attic
-        will also NOT BE ABLE TO READ THE BORG REPOSITORY ANYMORE, as
-        the magic strings will have changed.
-
-        you have been warned.""")
+        unless ``--inplace`` is specified, the upgrade process first
+        creates a backup copy of the repository, in
+        REPOSITORY.upgrade-DATETIME, using hardlinks. this takes
+        longer than in place upgrades, but is much safer and gives
+        progress information (as opposed to ``cp -al``). once you are
+        satisfied with the conversion, you can safely destroy the
+        backup copy.
+
+        WARNING: running the upgrade in place will make the current
+        copy unuseable with older version, with no way of going back
+        to previous versions. this can PERMANENTLY DAMAGE YOUR
+        REPOSITORY!  Attic CAN NOT READ BORG REPOSITORIES, as the
+        magic strings have changed. you have been warned.""")
         subparser = subparsers.add_parser('upgrade', parents=[common_parser],
                                           description=self.do_upgrade.__doc__,
                                           epilog=upgrade_epilog,
@@ -936,6 +937,10 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
         subparser.add_argument('-n', '--dry-run', dest='dry_run',
                                default=False, action='store_true',
                                help='do not change repository')
+        subparser.add_argument('-i', '--inplace', dest='inplace',
+                               default=False, action='store_true',
+                               help="""rewrite repository in-place, with no chance of going back to older
+                               versions of the repository.""")
         subparser.add_argument('repository', metavar='REPOSITORY', nargs='?', default='',
                                type=location_validator(archive=False),
                                help='path to the repository to be upgraded')

+ 39 - 4
borg/testsuite/upgrader.py

@@ -63,8 +63,11 @@ def attic_repo(tmpdir):
     attic_repo.close()
     return attic_repo
 
+@pytest.fixture(params=[True, False])
+def inplace(request):
+    return request.param
 
-def test_convert_segments(tmpdir, attic_repo):
+def test_convert_segments(tmpdir, attic_repo, inplace):
     """test segment conversion
 
     this will load the given attic repository, list all the segments
@@ -80,7 +83,7 @@ def test_convert_segments(tmpdir, attic_repo):
     repo = AtticRepositoryUpgrader(str(tmpdir), create=False)
     segments = [filename for i, filename in repo.io.segment_iterator()]
     repo.close()
-    repo.convert_segments(segments, dryrun=False)
+    repo.convert_segments(segments, dryrun=False, inplace=inplace)
     repo.convert_cache(dryrun=False)
     assert repo_valid(tmpdir)
 
@@ -141,7 +144,7 @@ def test_keys(tmpdir, attic_repo, attic_key_file):
     assert key_valid(attic_key_file.path)
 
 
-def test_convert_all(tmpdir, attic_repo, attic_key_file):
+def test_convert_all(tmpdir, attic_repo, attic_key_file, inplace):
     """test all conversion steps
 
     this runs everything. mostly redundant test, since everything is
@@ -155,7 +158,39 @@ def test_convert_all(tmpdir, attic_repo, attic_key_file):
     """
     # check should fail because of magic number
     assert not repo_valid(tmpdir)
+    def first_inode(path):
+        return os.stat(os.path.join(path, 'data', '0', '0')).st_ino
+    orig_inode = first_inode(attic_repo.path)
     repo = AtticRepositoryUpgrader(str(tmpdir), create=False)
-    repo.upgrade(dryrun=False)
+    backup = repo.upgrade(dryrun=False, inplace=inplace)
+    if inplace:
+        assert backup is None
+        assert first_inode(repo.path) == orig_inode
+    else:
+        assert backup
+        assert first_inode(repo.path) != first_inode(backup)
+
     assert key_valid(attic_key_file.path)
     assert repo_valid(tmpdir)
+
+def test_hardlink(tmpdir, inplace):
+    """test that we handle hard links properly
+
+    that is, if we are in "inplace" mode, hardlinks should *not*
+    change (ie. we write the file directly, so not the whole file, and
+    not re-create the file).
+
+    if we are *not* in inplace mode, then the inode should change, as
+    we are supposed to leave the original inode alone."""
+    a = str(tmpdir.join('a'))
+    with open(a, 'wb') as tmp:
+        tmp.write(b'aXXX')
+    b = str(tmpdir.join('b'))
+    os.link(a, b)
+    AtticRepositoryUpgrader.header_replace(b, b'a', b'b', inplace=inplace)
+    if not inplace:
+        assert os.stat(a).st_ino != os.stat(b).st_ino
+    else:
+        assert os.stat(a).st_ino == os.stat(b).st_ino
+    with open(b, 'rb') as tmp:
+        assert tmp.read() == b'bXXX'

+ 26 - 8
borg/upgrader.py

@@ -1,4 +1,5 @@
 from binascii import hexlify
+import datetime
 import logging
 logger = logging.getLogger(__name__)
 import os
@@ -15,7 +16,7 @@ ATTIC_MAGIC = b'ATTICSEG'
 
 
 class AtticRepositoryUpgrader(Repository):
-    def upgrade(self, dryrun=True):
+    def upgrade(self, dryrun=True, inplace=False):
         """convert an attic repository to a borg repository
 
         those are the files that need to be upgraded here, from most
@@ -26,6 +27,12 @@ class AtticRepositoryUpgrader(Repository):
         we nevertheless do the order in reverse, as we prefer to do
         the fast stuff first, to improve interactivity.
         """
+        backup = None
+        if not inplace:
+            backup = '{}.upgrade-{:%Y-%m-%d-%H:%M:%S}'.format(self.path, datetime.datetime.now())
+            logger.info('making a hardlink copy in %s', backup)
+            if not dryrun:
+                shutil.copytree(self.path, backup, copy_function=lambda src, dst: os.link(src, dst))
         logger.info("opening attic repository with borg and converting")
         # we need to open the repo to load configuration, keyfiles and segments
         self.open(self.path, exclusive=False)
@@ -42,13 +49,14 @@ class AtticRepositoryUpgrader(Repository):
                                    exclusive=True).acquire()
         try:
             self.convert_cache(dryrun)
-            self.convert_segments(segments, dryrun)
+            self.convert_segments(segments, dryrun=dryrun, inplace=inplace)
         finally:
             self.lock.release()
             self.lock = None
+        return backup
 
     @staticmethod
-    def convert_segments(segments, dryrun):
+    def convert_segments(segments, dryrun=True, inplace=False):
         """convert repository segments from attic to borg
 
         replacement pattern is `s/ATTICSEG/BORG_SEG/` in files in
@@ -60,23 +68,33 @@ class AtticRepositoryUpgrader(Repository):
         i = 0
         for filename in segments:
             i += 1
-            print("\rconverting segment %d/%d in place, %.2f%% done (%s)"
+            print("\rconverting segment %d/%d, %.2f%% done (%s)"
                   % (i, len(segments), 100*float(i)/len(segments), filename),
                   end='', file=sys.stderr)
             if dryrun:
                 time.sleep(0.001)
             else:
-                AtticRepositoryUpgrader.header_replace(filename, ATTIC_MAGIC, MAGIC)
+                AtticRepositoryUpgrader.header_replace(filename, ATTIC_MAGIC, MAGIC, inplace=inplace)
         print(file=sys.stderr)
 
     @staticmethod
-    def header_replace(filename, old_magic, new_magic):
+    def header_replace(filename, old_magic, new_magic, inplace=True):
         with open(filename, 'r+b') as segment:
             segment.seek(0)
             # only write if necessary
             if segment.read(len(old_magic)) == old_magic:
-                segment.seek(0)
-                segment.write(new_magic)
+                if inplace:
+                    segment.seek(0)
+                    segment.write(new_magic)
+                else:
+                    # remove the hardlink and rewrite the file. this
+                    # works because our old file handle is still open
+                    # so even though the file is removed, we can still
+                    # read it until the file is closed.
+                    os.unlink(filename)
+                    with open(filename, 'wb') as new_segment:
+                        new_segment.write(new_magic)
+                        new_segment.write(segment.read())
 
     def find_attic_keyfile(self):
         """find the attic keyfiles