瀏覽代碼

Merge pull request #231 from anarcat/attic-converter

attic to borg one time converter
TW 9 年之前
父節點
當前提交
1207e1a4fa
共有 4 個文件被更改,包括 466 次插入2 次删除
  1. 66 0
      borg/archiver.py
  2. 163 0
      borg/testsuite/upgrader.py
  3. 233 0
      borg/upgrader.py
  4. 4 2
      tox.ini

+ 66 - 0
borg/archiver.py

@@ -17,6 +17,7 @@ import traceback
 from . import __version__
 from .archive import Archive, ArchiveChecker, CHUNKER_PARAMS
 from .compress import Compressor, COMPR_BUFFER
+from .upgrader import AtticRepositoryUpgrader
 from .repository import Repository
 from .cache import Cache
 from .key import key_creator
@@ -462,6 +463,24 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
             stats.print_('Deleted data:', cache)
         return self.exit_code
 
+    def do_upgrade(self, args):
+        """upgrade a repository from a previous version"""
+        # XXX: currently only upgrades from Attic repositories, but may
+        # eventually be extended to deal with major upgrades for borg
+        # itself.
+        #
+        # in this case, it should auto-detect the current repository
+        # format and fire up necessary upgrade mechanism. this remains
+        # to be implemented.
+
+        # XXX: should auto-detect if it is an attic repository here
+        repo = AtticRepositoryUpgrader(args.repository.path, create=False)
+        try:
+            repo.upgrade(args.dry_run)
+        except NotImplementedError as e:
+            print("warning: %s" % e)
+        return self.exit_code
+
     helptext = {}
     helptext['patterns'] = '''
         Exclude patterns use a variant of shell pattern syntax, with '*' matching any
@@ -896,6 +915,53 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
                                type=location_validator(archive=False),
                                help='repository to prune')
 
+        upgrade_epilog = textwrap.dedent("""
+        upgrade an existing Borg repository in place. this currently
+        only support converting an Attic repository, but may
+        eventually be extended to cover major Borg upgrades as well.
+
+        it will change the magic strings in the repository's segments
+        to match the new Borg magic strings. the keyfiles found in
+        $ATTIC_KEYS_DIR or ~/.attic/keys/ will also be converted and
+        copied to $BORG_KEYS_DIR or ~/.borg/keys.
+
+        the cache files are converted, from $ATTIC_CACHE_DIR or
+        ~/.cache/attic to $BORG_CACHE_DIR or ~/.cache/borg, but the
+        cache layout between Borg and Attic changed, so it is possible
+        the first backup after the conversion takes longer than expected
+        due to the cache resync.
+
+        it is recommended you run this on a copy of the Attic
+        repository, in case something goes wrong, for example:
+
+            cp -a attic borg
+            borg upgrade -n borg
+            borg upgrade borg
+
+        upgrade should be able to resume if interrupted, although it
+        will still iterate over all segments. if you want to start
+        from scratch, use `borg delete` over the copied repository to
+        make sure the cache files are also removed:
+
+            borg delete borg
+
+        the conversion can PERMANENTLY DAMAGE YOUR REPOSITORY! Attic
+        will also NOT BE ABLE TO READ THE BORG REPOSITORY ANYMORE, as
+        the magic strings will have changed.
+
+        you have been warned.""")
+        subparser = subparsers.add_parser('upgrade', parents=[common_parser],
+                                          description=self.do_upgrade.__doc__,
+                                          epilog=upgrade_epilog,
+                                          formatter_class=argparse.RawDescriptionHelpFormatter)
+        subparser.set_defaults(func=self.do_upgrade)
+        subparser.add_argument('-n', '--dry-run', dest='dry_run',
+                               default=False, action='store_true',
+                               help='do not change repository')
+        subparser.add_argument('repository', metavar='REPOSITORY', nargs='?', default='',
+                               type=location_validator(archive=False),
+                               help='path to the repository to be upgraded')
+
         subparser = subparsers.add_parser('help', parents=[common_parser],
                                           description='Extra help')
         subparser.add_argument('--epilog-only', dest='epilog_only',

+ 163 - 0
borg/testsuite/upgrader.py

@@ -0,0 +1,163 @@
+import os
+import shutil
+import tempfile
+
+import pytest
+
+try:
+    import attic.repository
+    import attic.key
+    import attic.helpers
+except ImportError:
+    attic = None
+
+from ..upgrader import AtticRepositoryUpgrader, AtticKeyfileKey
+from ..helpers import get_keys_dir
+from ..key import KeyfileKey
+from ..repository import Repository, MAGIC
+
+pytestmark = pytest.mark.skipif(attic is None,
+                                reason='cannot find an attic install')
+
+
+def repo_valid(path):
+    """
+    utility function to check if borg can open a repository
+
+    :param path: the path to the repository
+    :returns: if borg can check the repository
+    """
+    repository = Repository(str(path), create=False)
+    # can't check raises() because check() handles the error
+    state = repository.check()
+    repository.close()
+    return state
+
+
+def key_valid(path):
+    """
+    check that the new keyfile is alright
+
+    :param path: the path to the key file
+    :returns: if the file starts with the borg magic string
+    """
+    keyfile = os.path.join(get_keys_dir(),
+                           os.path.basename(path))
+    with open(keyfile, 'r') as f:
+        return f.read().startswith(KeyfileKey.FILE_ID)
+
+
+@pytest.fixture()
+def attic_repo(tmpdir):
+    """
+    create an attic repo with some stuff in it
+
+    :param tmpdir: path to the repository to be created
+    :returns: a attic.repository.Repository object
+    """
+    attic_repo = attic.repository.Repository(str(tmpdir), create=True)
+    # throw some stuff in that repo, copied from `RepositoryTestCase.test1`
+    for x in range(100):
+        attic_repo.put(('%-32d' % x).encode('ascii'), b'SOMEDATA')
+    attic_repo.commit()
+    attic_repo.close()
+    return attic_repo
+
+
+def test_convert_segments(tmpdir, attic_repo):
+    """test segment conversion
+
+    this will load the given attic repository, list all the segments
+    then convert them one at a time. we need to close the repo before
+    conversion otherwise we have errors from borg
+
+    :param tmpdir: a temporary directory to run the test in (builtin
+    fixture)
+    :param attic_repo: a populated attic repository (fixture)
+    """
+    # check should fail because of magic number
+    assert not repo_valid(tmpdir)
+    print("opening attic repository with borg and converting")
+    repo = AtticRepositoryUpgrader(str(tmpdir), create=False)
+    segments = [filename for i, filename in repo.io.segment_iterator()]
+    repo.close()
+    repo.convert_segments(segments, dryrun=False)
+    repo.convert_cache(dryrun=False)
+    assert repo_valid(tmpdir)
+
+
+class MockArgs:
+    """
+    mock attic location
+
+    this is used to simulate a key location with a properly loaded
+    repository object to create a key file
+    """
+    def __init__(self, path):
+        self.repository = attic.helpers.Location(path)
+
+
+@pytest.fixture()
+def attic_key_file(attic_repo, tmpdir):
+    """
+    create an attic key file from the given repo, in the keys
+    subdirectory of the given tmpdir
+
+    :param attic_repo: an attic.repository.Repository object (fixture
+    define above)
+    :param tmpdir: a temporary directory (a builtin fixture)
+    :returns: the KeyfileKey object as returned by
+    attic.key.KeyfileKey.create()
+    """
+    keys_dir = str(tmpdir.mkdir('keys'))
+
+    # we use the repo dir for the created keyfile, because we do
+    # not want to clutter existing keyfiles
+    os.environ['ATTIC_KEYS_DIR'] = keys_dir
+
+    # we use the same directory for the converted files, which
+    # will clutter the previously created one, which we don't care
+    # about anyways. in real runs, the original key will be retained.
+    os.environ['BORG_KEYS_DIR'] = keys_dir
+    os.environ['ATTIC_PASSPHRASE'] = 'test'
+    return attic.key.KeyfileKey.create(attic_repo,
+                                       MockArgs(keys_dir))
+
+
+def test_keys(tmpdir, attic_repo, attic_key_file):
+    """test key conversion
+
+    test that we can convert the given key to a properly formatted
+    borg key. assumes that the ATTIC_KEYS_DIR and BORG_KEYS_DIR have
+    been properly populated by the attic_key_file fixture.
+
+    :param tmpdir: a temporary directory (a builtin fixture)
+    :param attic_repo: an attic.repository.Repository object (fixture
+    define above)
+    :param attic_key_file: an attic.key.KeyfileKey (fixture created above)
+    """
+    repository = AtticRepositoryUpgrader(str(tmpdir), create=False)
+    keyfile = AtticKeyfileKey.find_key_file(repository)
+    AtticRepositoryUpgrader.convert_keyfiles(keyfile, dryrun=False)
+    assert key_valid(attic_key_file.path)
+
+
+def test_convert_all(tmpdir, attic_repo, attic_key_file):
+    """test all conversion steps
+
+    this runs everything. mostly redundant test, since everything is
+    done above. yet we expect a NotImplementedError because we do not
+    convert caches yet.
+
+    :param tmpdir: a temporary directory (a builtin fixture)
+    :param attic_repo: an attic.repository.Repository object (fixture
+    define above)
+    :param attic_key_file: an attic.key.KeyfileKey (fixture created above)
+    """
+    # check should fail because of magic number
+    assert not repo_valid(tmpdir)
+    print("opening attic repository with borg and converting")
+    repo = AtticRepositoryUpgrader(str(tmpdir), create=False)
+    repo.upgrade(dryrun=False)
+    assert key_valid(attic_key_file.path)
+    assert repo_valid(tmpdir)

+ 233 - 0
borg/upgrader.py

@@ -0,0 +1,233 @@
+from binascii import hexlify
+import os
+import shutil
+import time
+
+from .helpers import get_keys_dir, get_cache_dir
+from .locking import UpgradableLock
+from .repository import Repository, MAGIC
+from .key import KeyfileKey, KeyfileNotFoundError
+
+ATTIC_MAGIC = b'ATTICSEG'
+
+
+class AtticRepositoryUpgrader(Repository):
+    def upgrade(self, dryrun=True):
+        """convert an attic repository to a borg repository
+
+        those are the files that need to be upgraded here, from most
+        important to least important: segments, key files, and various
+        caches, the latter being optional, as they will be rebuilt if
+        missing.
+
+        we nevertheless do the order in reverse, as we prefer to do
+        the fast stuff first, to improve interactivity.
+        """
+        print("reading segments from attic repository using borg")
+        # we need to open it to load the configuration and other fields
+        self.open(self.path, exclusive=False)
+        segments = [filename for i, filename in self.io.segment_iterator()]
+        try:
+            keyfile = self.find_attic_keyfile()
+        except KeyfileNotFoundError:
+            print("no key file found for repository")
+        else:
+            self.convert_keyfiles(keyfile, dryrun)
+        self.close()
+        # partial open: just hold on to the lock
+        self.lock = UpgradableLock(os.path.join(self.path, 'lock'),
+                                   exclusive=True).acquire()
+        try:
+            self.convert_cache(dryrun)
+            self.convert_segments(segments, dryrun)
+        finally:
+            self.lock.release()
+            self.lock = None
+
+    @staticmethod
+    def convert_segments(segments, dryrun):
+        """convert repository segments from attic to borg
+
+        replacement pattern is `s/ATTICSEG/BORG_SEG/` in files in
+        `$ATTIC_REPO/data/**`.
+
+        luckily the magic string length didn't change so we can just
+        replace the 8 first bytes of all regular files in there."""
+        print("converting %d segments..." % len(segments))
+        i = 0
+        for filename in segments:
+            i += 1
+            print("\rconverting segment %d/%d in place, %.2f%% done (%s)"
+                  % (i, len(segments), 100*float(i)/len(segments), filename), end='')
+            if dryrun:
+                time.sleep(0.001)
+            else:
+                AtticRepositoryUpgrader.header_replace(filename, ATTIC_MAGIC, MAGIC)
+        print()
+
+    @staticmethod
+    def header_replace(filename, old_magic, new_magic):
+        with open(filename, 'r+b') as segment:
+            segment.seek(0)
+            # only write if necessary
+            if segment.read(len(old_magic)) == old_magic:
+                segment.seek(0)
+                segment.write(new_magic)
+
+    def find_attic_keyfile(self):
+        """find the attic keyfiles
+
+        the keyfiles are loaded by `KeyfileKey.find_key_file()`. that
+        finds the keys with the right identifier for the repo.
+
+        this is expected to look into $HOME/.attic/keys or
+        $ATTIC_KEYS_DIR for key files matching the given Borg
+        repository.
+
+        it is expected to raise an exception (KeyfileNotFoundError) if
+        no key is found. whether that exception is from Borg or Attic
+        is unclear.
+
+        this is split in a separate function in case we want to use
+        the attic code here directly, instead of our local
+        implementation."""
+        return AtticKeyfileKey.find_key_file(self)
+
+    @staticmethod
+    def convert_keyfiles(keyfile, dryrun):
+
+        """convert key files from attic to borg
+
+        replacement pattern is `s/ATTIC KEY/BORG_KEY/` in
+        `get_keys_dir()`, that is `$ATTIC_KEYS_DIR` or
+        `$HOME/.attic/keys`, and moved to `$BORG_KEYS_DIR` or
+        `$HOME/.borg/keys`.
+
+        no need to decrypt to convert. we need to rewrite the whole
+        key file because magic string length changed, but that's not a
+        problem because the keyfiles are small (compared to, say,
+        all the segments)."""
+        print("converting keyfile %s" % keyfile)
+        with open(keyfile, 'r') as f:
+            data = f.read()
+        data = data.replace(AtticKeyfileKey.FILE_ID, KeyfileKey.FILE_ID, 1)
+        keyfile = os.path.join(get_keys_dir(), os.path.basename(keyfile))
+        print("writing borg keyfile to %s" % keyfile)
+        if not dryrun:
+            with open(keyfile, 'w') as f:
+                f.write(data)
+
+    def convert_cache(self, dryrun):
+        """convert caches from attic to borg
+
+        those are all hash indexes, so we need to
+        `s/ATTICIDX/BORG_IDX/` in a few locations:
+
+        * the repository index (in `$ATTIC_REPO/index.%d`, where `%d`
+          is the `Repository.get_index_transaction_id()`), which we
+          should probably update, with a lock, see
+          `Repository.open()`, which i'm not sure we should use
+          because it may write data on `Repository.close()`...
+
+        * the `files` and `chunks` cache (in `$ATTIC_CACHE_DIR` or
+          `$HOME/.cache/attic/<repoid>/`), which we could just drop,
+          but if we'd want to convert, we could open it with the
+          `Cache.open()`, edit in place and then `Cache.close()` to
+          make sure we have locking right
+        """
+        caches = []
+        transaction_id = self.get_index_transaction_id()
+        if transaction_id is None:
+            print('no index file found for repository %s' % self.path)
+        else:
+            caches += [os.path.join(self.path, 'index.%d' % transaction_id).encode('utf-8')]
+
+        # copy of attic's get_cache_dir()
+        attic_cache_dir = os.environ.get('ATTIC_CACHE_DIR',
+                                         os.path.join(os.path.expanduser('~'),
+                                                      '.cache', 'attic'))
+        attic_cache_dir = os.path.join(attic_cache_dir, hexlify(self.id).decode('ascii'))
+        borg_cache_dir = os.path.join(get_cache_dir(), hexlify(self.id).decode('ascii'))
+
+        def copy_cache_file(path):
+            """copy the given attic cache path into the borg directory
+
+            does nothing if dryrun is True. also expects
+            attic_cache_dir and borg_cache_dir to be set in the parent
+            scope, to the directories path including the repository
+            identifier.
+
+            :params path: the basename of the cache file to copy
+            (example: "files" or "chunks") as a string
+
+            :returns: the borg file that was created or None if non
+            was created.
+
+            """
+            attic_file = os.path.join(attic_cache_dir, path)
+            if os.path.exists(attic_file):
+                borg_file = os.path.join(borg_cache_dir, path)
+                if os.path.exists(borg_file):
+                    print("borg cache file already exists in %s, skipping conversion of %s" % (borg_file, attic_file))
+                else:
+                    print("copying attic cache file from %s to %s" % (attic_file, borg_file))
+                    if not dryrun:
+                        shutil.copyfile(attic_file, borg_file)
+                    return borg_file
+            else:
+                print("no %s cache file found in %s" % (path, attic_file))
+            return None
+
+        # XXX: untested, because generating cache files is a PITA, see
+        # Archiver.do_create() for proof
+        if os.path.exists(attic_cache_dir):
+            if not os.path.exists(borg_cache_dir):
+                os.makedirs(borg_cache_dir)
+
+            # file that we don't have a header to convert, just copy
+            for cache in ['config', 'files']:
+                copy_cache_file(cache)
+
+            # we need to convert the headers of those files, copy first
+            for cache in ['chunks']:
+                copied = copy_cache_file(cache)
+                if copied:
+                    print("converting cache %s" % cache)
+                    if not dryrun:
+                        AtticRepositoryUpgrader.header_replace(cache, b'ATTICIDX', b'BORG_IDX')
+
+
+class AtticKeyfileKey(KeyfileKey):
+    """backwards compatible Attic key file parser"""
+    FILE_ID = 'ATTIC KEY'
+
+    # verbatim copy from attic
+    @staticmethod
+    def get_keys_dir():
+        """Determine where to repository keys and cache"""
+        return os.environ.get('ATTIC_KEYS_DIR',
+                              os.path.join(os.path.expanduser('~'), '.attic', 'keys'))
+
+    @classmethod
+    def find_key_file(cls, repository):
+        """copy of attic's `find_key_file`_
+
+        this has two small modifications:
+
+        1. it uses the above `get_keys_dir`_ instead of the global one,
+           assumed to be borg's
+
+        2. it uses `repository.path`_ instead of
+           `repository._location.canonical_path`_ because we can't
+           assume the repository has been opened by the archiver yet
+        """
+        get_keys_dir = cls.get_keys_dir
+        id = hexlify(repository.id).decode('ascii')
+        keys_dir = get_keys_dir()
+        for name in os.listdir(keys_dir):
+            filename = os.path.join(keys_dir, name)
+            with open(filename, 'r') as fd:
+                line = fd.readline().strip()
+                if line and line.startswith(cls.FILE_ID) and line[10:] == id:
+                    return filename
+        raise KeyfileNotFoundError(repository.path, get_keys_dir())

+ 4 - 2
tox.ini

@@ -2,13 +2,15 @@
 # fakeroot -u tox --recreate
 
 [tox]
-envlist = py32, py33, py34, py35
+envlist = py{32,33,34,35}
 
 [testenv]
 # Change dir to avoid import problem for cython code. The directory does
 # not really matter, should be just different from the toplevel dir.
 changedir = {toxworkdir}
-deps = -rrequirements.d/development.txt
+deps =
+     -rrequirements.d/development.txt
+     attic
 commands = py.test --cov=borg --pyargs {posargs:borg.testsuite}
 # fakeroot -u needs some env vars:
 passenv = *